Skip to content

Commit

Permalink
Fixes the names and tags issue for atlas
Browse files Browse the repository at this point in the history
  • Loading branch information
verdan committed Aug 8, 2019
1 parent 9914c8b commit 1f9620e
Show file tree
Hide file tree
Showing 13 changed files with 308 additions and 301 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
@@ -0,0 +1 @@
include requirements.txt
10 changes: 10 additions & 0 deletions README.md
Expand Up @@ -47,6 +47,16 @@ $ docker run -p 5000:5000 amundsendev/amundsen-metadata
$ curl -v http://localhost:5000/healthcheck
```

## Instructions to start the service from Docker image with gunicorn (production use case)
Note that there below command uses default config of gunicorn. Please visit [Gunicorn homepage](https://gunicorn.org/ "Gunicorn") for more information.
```bash
$ docker pull amundsendev/amundsen-metadata:latest
$ docker run -p 5000:5000 amundsendev/amundsen-metadata gunicorn --bind 0.0.0.0:5000 metadata_service.metadata_wsgi

-- In different terminal, verify getting HTTP/1.0 200 OK
$ curl -v http://localhost:5000/healthcheck
```

## Production environment
By default, Flask comes with Werkzeug webserver, which is for development. For production environment use production grade web server such as [Gunicorn](https://gunicorn.org/ "Gunicorn").

Expand Down
5 changes: 0 additions & 5 deletions metadata_service/config.py
Expand Up @@ -34,11 +34,6 @@ class Config:
# The relationalAttribute name of Atlas Entity that identifies the database entity.
ATLAS_DB_ATTRIBUTE = 'db'

# FixMe: Once GUIDs are in place, then change it to 'name' (display name)
# Display name of Atlas Entities that we use for amundsen project.
# Atlas uses qualifiedName as indexed attribute. but also supports 'name' attribute.
ATLAS_NAME_ATTRIBUTE = 'qualifiedName'


class LocalConfig(Config):
DEBUG = False
Expand Down
28 changes: 13 additions & 15 deletions metadata_service/entity/popular_table.py
@@ -1,21 +1,19 @@
from typing import Optional

import attr
from marshmallow_annotations.ext.attrs import AttrsSchema


@attr.s(auto_attribs=True, kw_only=True)
class PopularTable:
database: str = attr.ib()
cluster: str = attr.ib()
schema: str = attr.ib()
name: str = attr.ib()
description: Optional[str] = None

def __init__(self, *,
database: str,
cluster: str,
schema: str,
name: str,
description: Optional[str] = None) -> None:
self.database = database
self.cluster = cluster
self.schema = schema
self.name = name
self.description = description

def __repr__(self) -> str:
return """Table(database={!r}, cluster={!r}, schema={!r}, name={!r}, description={!r})"""\
.format(self.database, self.cluster,
self.schema, self.name, self.description)
class PopularTableSchema(AttrsSchema):
class Meta:
target = PopularTable
register_as_scheme = True
287 changes: 144 additions & 143 deletions metadata_service/entity/table_detail.py
@@ -1,175 +1,176 @@
from typing import Iterable, Optional
from typing import List, Optional

import attr
from marshmallow_annotations.ext.attrs import AttrsSchema


@attr.s(auto_attribs=True, kw_only=True)
class User:
def __init__(self, *,
email: str,
first_name: str =None,
last_name: str =None) -> None:
self.email = email
self.first_name = first_name
self.last_name = last_name
email: str = attr.ib()
first_name: Optional[str] = None
last_name: Optional[str] = None


def __repr__(self) -> str:
return 'User(email={!r}, first_name={!r}, last_name={!r})'.format(self.email, self.first_name, self.last_name)
class UserSchema(AttrsSchema):
class Meta:
target = User
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class Reader:
def __init__(self, *,
user: User,
read_count: int) -> None:
self.user = user
self.read_count = read_count
user: User = attr.ib()
read_count: int = attr.ib()

def __repr__(self) -> str:
return 'Reader(user={!r}, read_count={!r})'.format(self.user, self.read_count)

class ReaderSchema(AttrsSchema):
class Meta:
target = Reader
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class Tag:
def __init__(self, *,
tag_type: str,
tag_name: str) -> None:
self.tag_name = tag_name
self.tag_type = tag_type
tag_type: str = attr.ib()
tag_name: str = attr.ib()


def __repr__(self) -> str:
return 'Tag(tag_name={!r}, tag_type={!r})'.format(self.tag_name,
self.tag_type)
class TagSchema(AttrsSchema):
class Meta:
target = Tag
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class Watermark:
def __init__(self, *,
watermark_type: str =None,
partition_key: str =None,
partition_value: str =None,
create_time: str =None) -> None:
self.watermark_type = watermark_type
self.partition_key = partition_key
self.partition_value = partition_value
self.create_time = create_time

def __repr__(self) -> str:
return 'Watermark(watermark_type={!r}, ' \
'partition_key={!r}, ' \
'partition_value={!r}, ' \
'create_time={!r}))'.format(self.watermark_type,
self.partition_key,
self.partition_value,
self.create_time)
watermark_type: Optional[str] = None
partition_key: Optional[str] = None
partition_value: Optional[str] = None
create_time: Optional[str] = None


class WatermarkSchema(AttrsSchema):
class Meta:
target = Watermark
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class Statistics:
def __init__(self, *,
stat_type: str,
stat_val: str =None,
start_epoch: int =None,
end_epoch: int =None) -> None:
self.stat_type = stat_type
self.stat_val = stat_val
self.start_epoch = start_epoch
self.end_epoch = end_epoch

def __repr__(self) -> str:
return 'Statistics(stat_type={!r}, ' \
'stat_val={!r},' \
'start_epoch={!r},' \
'end_epoch={!r})'.format(self.stat_type,
self.stat_val,
self.start_epoch,
self.end_epoch)
stat_type: str = attr.ib()
stat_val: Optional[str] = None
start_epoch: Optional[int] = None
end_epoch: Optional[int] = None


class StatisticsSchema(AttrsSchema):
class Meta:
target = Statistics
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class Column:
def __init__(self, *,
name: str,
description: Optional[str],
col_type: str,
sort_order: int,
stats: Iterable[Statistics] =()) -> None:
self.name = name
self.description = description
self.col_type = col_type
self.sort_order = sort_order
self.stats = stats

def __repr__(self) -> str:
return 'Column(name={!r}, description={!r}, col_type={!r}, sort_order={!r}, stats={!r})'\
.format(self.name,
self.description,
self.col_type,
self.sort_order,
self.stats)
name: str = attr.ib()
description: Optional[str] = None
col_type: str = attr.ib()
sort_order: int = attr.ib()
stats: List[Statistics] = []


class ColumnSchema(AttrsSchema):
class Meta:
target = Column
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class PiiDetail:
pii_semantic_type: Optional[str] = None
pii_forget_type: Optional[str] = None


class PiiDetailSchema(AttrsSchema):
class Meta:
target = PiiDetail
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class ForeignKey:
table_uri: Optional[str] = None
column_name: Optional[str] = None


class ForeignKeySchema(AttrsSchema):
class Meta:
target = ForeignKey
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class DataDetail:
data_semantic_type: Optional[str] = None
data_semantic_reference: Optional[str] = None


class DataDetailchema(AttrsSchema):
class Meta:
target = DataDetail
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class Application:
def __init__(self, *,
application_url: str,
description: str,
id: str,
name: str) -> None:
self.application_url = application_url
self.description = description
self.name = name
self.id = id
application_url: str = attr.ib()
description: str = attr.ib()
id: str = attr.ib()
name: str = attr.ib()


def __repr__(self) -> str:
return 'Application(application_url={!r}, description={!r}, name={!r}, id={!r})'\
.format(self.application_url, self.description, self.name, self.id)
class ApplicationSchema(AttrsSchema):
class Meta:
target = Application
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class Source:
def __init__(self, *,
source_type: str,
source: str) -> None:
self.source_type = source_type
self.source = source
source_type: str = attr.ib()
source: str = attr.ib()


class SourceSchema(AttrsSchema):
class Meta:
target = Source
register_as_scheme = True


def __repr__(self) -> str:
return 'Source(source_type={!r}, ' \
'source={!r})'.format(self.source_type,
self.source)
# this is a temporary hack to satisfy mypy. Once https://github.com/python/mypy/issues/6136 is resolved, use
# `attr.converters.default_if_none(default=False)`
def default_if_none(arg: Optional[bool]) -> bool:
return arg or False


@attr.s(auto_attribs=True, kw_only=True)
class Table:
def __init__(self, *,
database: str,
cluster: str,
schema: str,
name: str,
tags: Iterable[Tag] =(),
table_readers: Iterable[Reader] = (),
description: Optional[str] = None,
columns: Iterable[Column],
owners: Iterable[User] = (),
watermarks: Iterable[Watermark] = (),
table_writer: Optional[Application] = None,
last_updated_timestamp: Optional[int],
source: Optional[Source] = None,
is_view: Optional[bool] = None,
) -> None:

self.database = database
self.cluster = cluster
self.schema = schema
self.name = name
self.tags = tags
self.table_readers = table_readers
self.description = description
self.columns = columns
self.owners = owners
self.watermarks = watermarks
self.table_writer = table_writer
self.last_updated_timestamp = last_updated_timestamp
self.source = source
self.is_view = is_view or False

def __repr__(self) -> str:
return """Table(database={!r}, cluster={!r}, schema={!r}, name={!r}, tags={!r}, table_readers={!r},
description={!r}, columns={!r}, owners={!r}, watermarks={!r}, table_writer={!r},
last_updated_timestamp={!r}, source={!r}, is_view={!r})"""\
.format(self.database, self.cluster,
self.schema, self.name, self.tags,
self.table_readers, self.description,
self.columns, self.owners, self.watermarks,
self.table_writer, self.last_updated_timestamp,
self.source, self.is_view)
database: str = attr.ib()
cluster: str = attr.ib()
schema: str = attr.ib()
name: str = attr.ib()
columns: List[Column] = attr.ib()
tags: List[Tag] = []
table_readers: List[Reader] = []
description: Optional[str] = None
owners: List[User] = []
watermarks: List[Watermark] = []
table_writer: Optional[Application] = None
last_updated_timestamp: Optional[int] = None
source: Optional[Source] = None
is_view: Optional[bool] = attr.ib(default=None, converter=default_if_none)


class TableSchema(AttrsSchema):
class Meta:
target = Table
register_as_scheme = True

0 comments on commit 1f9620e

Please sign in to comment.