Skip to content

Commit

Permalink
fix(ingest): fix metadata for custom python packages (#9391)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 committed Dec 8, 2023
1 parent 0e40d38 commit d52f030
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 29 deletions.
17 changes: 13 additions & 4 deletions docs/modeling/extending-the-metadata-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ to deploy during development. This will allow Datahub to read and write your new
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

<Tabs>
<Tabs queryString="python-custom-models">
<TabItem value="local" label="Local CLI" default>

If you're purely using the custom models locally, you can use a local development-mode install of the DataHub CLI.
Expand All @@ -273,12 +273,21 @@ If you want to use your custom models beyond your local machine without forking
This package should be installed alongside the base `acryl-datahub` package, and its metadata models will take precedence over the default ones.

```bash
cd metadata-ingestion
../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1"
$ cd metadata-ingestion
$ ../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1"
<bunch of log lines>
Successfully built my-company-datahub-models-0.0.1.tar.gz and acryl_datahub_cloud-0.0.1-py3-none-any.whl

Generated package at custom-package/my-company-datahub-models
This package should be installed alongside the main acryl-datahub package.

Install the custom package locally with `pip install custom-package/my-company-datahub-models`
To enable others to use it, share the file at custom-package/my-company-datahub-models/dist/*.whl and have them install it with `pip install <wheel file>.whl`
Alternatively, publish it to PyPI with `twine upload custom-package/my-company-datahub-models/dist/*`
```

This will generate some Python build artifacts, which you can distribute within your team or publish to PyPI.
The command output will contain additional details and exact CLI commands you can use.
The command output contains additional details and exact CLI commands you can use.

</TabItem>
</Tabs>
Expand Down
29 changes: 4 additions & 25 deletions metadata-ingestion/scripts/avro_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,34 +252,12 @@ def annotate_aspects(aspects: List[dict], schema_class_file: Path) -> None:
schema_classes_lines = schema_class_file.read_text().splitlines()
line_lookup_table = {line: i for i, line in enumerate(schema_classes_lines)}

# Create the Aspect class.
# We ensure that it cannot be instantiated directly, as
# per https://stackoverflow.com/a/7989101/5004662.
# Import the _Aspect class.
schema_classes_lines[
line_lookup_table["__SCHEMAS: Dict[str, RecordSchema] = {}"]
] += """
class _Aspect(DictWrapper):
ASPECT_NAME: ClassVar[str] = None # type: ignore
ASPECT_TYPE: ClassVar[str] = "default"
ASPECT_INFO: ClassVar[dict] = None # type: ignore
def __init__(self):
if type(self) is _Aspect:
raise TypeError("_Aspect is an abstract class, and cannot be instantiated directly.")
super().__init__()
@classmethod
def get_aspect_name(cls) -> str:
return cls.ASPECT_NAME # type: ignore
@classmethod
def get_aspect_type(cls) -> str:
return cls.ASPECT_TYPE
@classmethod
def get_aspect_info(cls) -> dict:
return cls.ASPECT_INFO
from datahub._codegen.aspect import _Aspect
"""

for aspect in aspects:
Expand Down Expand Up @@ -776,6 +754,7 @@ def generate(
import importlib
from typing import TYPE_CHECKING
from datahub._codegen.aspect import _Aspect
from datahub.utilities.docs_build import IS_SPHINX_BUILD
from datahub.utilities._custom_package_loader import get_custom_models_package
Expand All @@ -785,7 +764,7 @@ def generate(
from ._schema_classes import *
# Required explicitly because __all__ doesn't include _ prefixed names.
from ._schema_classes import _Aspect, __SCHEMA_TYPES
from ._schema_classes import __SCHEMA_TYPES
if IS_SPHINX_BUILD:
# Set __module__ to the current module so that Sphinx will document the
Expand Down
7 changes: 7 additions & 0 deletions metadata-ingestion/scripts/custom_package_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ def generate(
"""
)

(src_path / "py.typed").write_text("")

(package_path / "setup.py").write_text(
f"""{autogen_header}
from setuptools import setup
Expand All @@ -87,6 +89,11 @@ def generate(
"avro-gen3=={_avrogen_version}",
"acryl-datahub",
],
package_data={{
"{python_package_name}": ["py.typed"],
"{python_package_name}.models": ["schema.avsc"],
"{python_package_name}.models.schemas": ["*.avsc"],
}},
entry_points={{
"datahub.custom_packages": [
"models={python_package_name}.models.schema_classes",
Expand Down
Empty file.
36 changes: 36 additions & 0 deletions metadata-ingestion/src/datahub/_codegen/aspect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from typing import ClassVar

from avrogen.dict_wrapper import DictWrapper


class _Aspect(DictWrapper):
"""Base class for all aspects types.
All codegened types inherit from DictWrapper, either directly or indirectly.
Types that are aspects inherit directly from _Aspect.
"""

ASPECT_NAME: ClassVar[str] = None # type: ignore
ASPECT_TYPE: ClassVar[str] = "default"
ASPECT_INFO: ClassVar[dict] = None # type: ignore

def __init__(self):
if type(self) is _Aspect:
# Ensure that it cannot be instantiated directly, as
# per https://stackoverflow.com/a/7989101/5004662.
raise TypeError(
"_Aspect is an abstract class, and cannot be instantiated directly."
)
super().__init__()

@classmethod
def get_aspect_name(cls) -> str:
return cls.ASPECT_NAME # type: ignore

@classmethod
def get_aspect_type(cls) -> str:
return cls.ASPECT_TYPE

@classmethod
def get_aspect_info(cls) -> dict:
return cls.ASPECT_INFO

0 comments on commit d52f030

Please sign in to comment.