Skip to content

Commit

Permalink
feat: Make DescriptionMetadata inherit from GraphSerializable (#461)
Browse files Browse the repository at this point in the history
* make descriptionmetadata inherit from GraphSerializable

Signed-off-by: Junda Yang <youngyjd@gmail.com>

* fix mypy

Signed-off-by: Junda Yang <youngyjd@gmail.com>

* address comment

Signed-off-by: Junda Yang <youngyjd@gmail.com>

* bump up databuilder version

Signed-off-by: Junda Yang <youngyjd@gmail.com>
  • Loading branch information
youngyjd committed Mar 25, 2021
1 parent 4bb4452 commit 7f095fb
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 11 deletions.
72 changes: 62 additions & 10 deletions databuilder/models/table_metadata.py
Expand Up @@ -117,7 +117,7 @@ def _create_record_iterator(self) -> Iterator[RDSModel]:


# TODO: this should inherit from ProgrammaticDescription in amundsen-common
class DescriptionMetadata:
class DescriptionMetadata(GraphSerializable):
DESCRIPTION_NODE_LABEL = DESCRIPTION_NODE_LABEL_VAL
PROGRAMMATIC_DESCRIPTION_NODE_LABEL = 'Programmatic_Description'
DESCRIPTION_KEY_FORMAT = '{description}'
Expand All @@ -132,7 +132,10 @@ class DescriptionMetadata:

def __init__(self,
text: Optional[str],
source: str = DEFAULT_SOURCE
source: str = DEFAULT_SOURCE,
description_key: Optional[str] = None,
start_label: Optional[str] = None, # Table, Column, Schema
start_key: Optional[str] = None,
):
"""
:param source: The unique source of what is populating this description.
Expand All @@ -146,17 +149,28 @@ def __init__(self,
else:
self.label = self.PROGRAMMATIC_DESCRIPTION_NODE_LABEL

self.start_label = start_label
self.start_key = start_key
self.description_key = description_key or self.get_description_default_key(start_key)

self._node_iter = self._create_node_iterator()
self._relation_iter = self._create_relation_iterator()

@staticmethod
def create_description_metadata(text: Union[None, str],
source: Optional[str] = DEFAULT_SOURCE
source: Optional[str] = DEFAULT_SOURCE,
description_key: Optional[str] = None,
start_label: Optional[str] = None, # Table, Column, Schema
start_key: Optional[str] = None,
) -> Optional['DescriptionMetadata']:
# We do not want to create a node if there is no description text!
if text is None:
return None
if not source:
description_node = DescriptionMetadata(text=text, source=DescriptionMetadata.DEFAULT_SOURCE)
else:
description_node = DescriptionMetadata(text=text, source=source)
description_node = DescriptionMetadata(text=text,
source=source or DescriptionMetadata.DEFAULT_SOURCE,
description_key=description_key,
start_label=start_label,
start_key=start_key)
return description_node

def get_description_id(self) -> str:
Expand All @@ -165,8 +179,8 @@ def get_description_id(self) -> str:
else:
return "_" + self.source + "_description"

def __repr__(self) -> str:
return f'DescriptionMetadata({self.source!r}, {self.text!r})'
def get_description_default_key(self, start_key: Optional[str]) -> Optional[str]:
return f'{start_key}/{self.get_description_id()}' if start_key else None

def get_node(self, node_key: str) -> GraphNode:
node = GraphNode(
Expand All @@ -179,7 +193,11 @@ def get_node(self, node_key: str) -> GraphNode:
)
return node

def get_relation(self, start_node: str, start_key: Any, end_key: Any) -> GraphRelationship:
def get_relation(self,
start_node: str,
start_key: str,
end_key: str,
) -> GraphRelationship:
relationship = GraphRelationship(
start_label=start_node,
start_key=start_key,
Expand All @@ -191,6 +209,40 @@ def get_relation(self, start_node: str, start_key: Any, end_key: Any) -> GraphRe
)
return relationship

def create_next_node(self) -> Optional[GraphNode]:
# return the string representation of the data
try:
return next(self._node_iter)
except StopIteration:
return None

def create_next_relation(self) -> Optional[GraphRelationship]:
try:
return next(self._relation_iter)
except StopIteration:
return None

def _create_node_iterator(self) -> Iterator[GraphNode]:
if not self.description_key:
raise Exception('Required description node key cannot be None')
yield self.get_node(self.description_key)

def _create_relation_iterator(self) -> Iterator[GraphRelationship]:
if not self.start_label:
raise Exception('Required relation start node label cannot be None')
if not self.start_key:
raise Exception('Required relation start key cannot be None')
if not self.description_key:
raise Exception('Required relation end key cannot be None')
yield self.get_relation(
start_node=self.start_label,
start_key=self.start_key,
end_key=self.description_key
)

def __repr__(self) -> str:
return f'DescriptionMetadata({self.source!r}, {self.text!r})'


class ColumnMetadata:
COLUMN_NODE_LABEL = 'Column'
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -4,7 +4,7 @@

from setuptools import find_packages, setup

__version__ = '4.2.0'
__version__ = '4.2.1'


requirements = [
Expand Down
124 changes: 124 additions & 0 deletions tests/unit/models/test_description_metadata.py
@@ -0,0 +1,124 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

import unittest

from databuilder.models.table_metadata import DescriptionMetadata
from databuilder.serializers import neo4_serializer


class TestDescriptionMetadata(unittest.TestCase):
def test_raise_exception_when_missing_data(self) -> None:
# assert raise when missing description node key
self.assertRaises(
Exception,
DescriptionMetadata(text='test_text').next_node
)
DescriptionMetadata(text='test_text', description_key='test_key').next_node()
DescriptionMetadata(text='test_text', start_key='start_key').next_node()

# assert raise when missing relation start label
self.assertRaises(
Exception,
DescriptionMetadata(text='test_text', start_key='start_key').next_relation
)
DescriptionMetadata(text='test_text', start_key='test_key', start_label='Table').next_relation()

# assert raise when missing relation start key
self.assertRaises(
Exception,
DescriptionMetadata(text='test_text', description_key='test_key', start_label='Table').next_relation
)

def test_serialize_table_description_metadata(self) -> None:
description_metadata = DescriptionMetadata(
text='test text 1',
start_label='Table',
start_key='test_start_key'
)
node_row = description_metadata.next_node()
actual = []
while node_row:
node_row_serialized = neo4_serializer.serialize_node(node_row)
actual.append(node_row_serialized)
node_row = description_metadata.next_node()
expected = [
{'description': 'test text 1', 'KEY': 'test_start_key/_description',
'LABEL': 'Description', 'description_source': 'description'},
]
self.assertEqual(actual, expected)

relation_row = description_metadata.next_relation()
actual = []
while relation_row:
relation_row_serialized = neo4_serializer.serialize_relationship(relation_row)
actual.append(relation_row_serialized)
relation_row = description_metadata.next_relation()
expected = [
{'START_KEY': 'test_start_key', 'START_LABEL': 'Table', 'END_KEY': 'test_start_key/_description',
'END_LABEL': 'Description', 'TYPE': 'DESCRIPTION', 'REVERSE_TYPE': 'DESCRIPTION_OF'}
]
self.assertEqual(actual, expected)

def test_serialize_column_description_metadata(self) -> None:
description_metadata = DescriptionMetadata(
text='test text 2',
start_label='Column',
start_key='test_start_key',
description_key='customized_key'
)
node_row = description_metadata.next_node()
actual = []
while node_row:
node_row_serialized = neo4_serializer.serialize_node(node_row)
actual.append(node_row_serialized)
node_row = description_metadata.next_node()
expected = [
{'description': 'test text 2', 'KEY': 'customized_key',
'LABEL': 'Description', 'description_source': 'description'},
]
self.assertEqual(actual, expected)

relation_row = description_metadata.next_relation()
actual = []
while relation_row:
relation_row_serialized = neo4_serializer.serialize_relationship(relation_row)
actual.append(relation_row_serialized)
relation_row = description_metadata.next_relation()
expected = [
{'START_KEY': 'test_start_key', 'START_LABEL': 'Column', 'END_KEY': 'customized_key',
'END_LABEL': 'Description', 'TYPE': 'DESCRIPTION', 'REVERSE_TYPE': 'DESCRIPTION_OF'}
]
self.assertEqual(actual, expected)

def test_serialize_column_with_source_description_metadata(self) -> None:
description_metadata = DescriptionMetadata(
text='test text 3',
start_label='Column',
start_key='test_start_key',
description_key='customized_key',
source='external',
)
node_row = description_metadata.next_node()
actual = []
while node_row:
node_row_serialized = neo4_serializer.serialize_node(node_row)
actual.append(node_row_serialized)
node_row = description_metadata.next_node()
expected = [
{'description': 'test text 3', 'KEY': 'customized_key',
'LABEL': 'Programmatic_Description', 'description_source': 'external'},
]
self.assertEqual(actual, expected)

relation_row = description_metadata.next_relation()
actual = []
while relation_row:
relation_row_serialized = neo4_serializer.serialize_relationship(relation_row)
actual.append(relation_row_serialized)
relation_row = description_metadata.next_relation()
expected = [
{'START_KEY': 'test_start_key', 'START_LABEL': 'Column', 'END_KEY': 'customized_key',
'END_LABEL': 'Programmatic_Description', 'TYPE': 'DESCRIPTION', 'REVERSE_TYPE': 'DESCRIPTION_OF'}
]
self.assertEqual(actual, expected)

0 comments on commit 7f095fb

Please sign in to comment.