In [30]:
import os
import time
from dotenv import load_dotenv
from typing import List
from datahub.emitter.mcp import MetadataChangeProposalWrapper
import datahub.metadata.schema_classes as models
import datahub.emitter.mce_builder as builder
from datahub.emitter.rest_emitter import DatahubRestEmitter
load_dotenv()

# replace .env_example with a file called .env and add your own environment variables into it.
datashub_gms_server = os.getenv('DATAHUB_GMS_SERVER', '')
datahub_token = os.getenv('DATAHUB_TOKEN', '')
datahub_actor = os.getenv('DATAHUB_ACTOR', 'urn:li:corpuser:admin')

# start by putting things into DEV to keep PROD clean until you know what you are doing
datahub_env = 'DEV'

### Create emitter

We must first create a `DatahubRestEmitter` object we will use to emit our `MetadataChangeProposalWrapper` change proposals to.

In [31]:
# create an emitter
emitter = DatahubRestEmitter(
    gms_server=datashub_gms_server, 
    token=datahub_token, 
    extra_headers={'X-DataHub-Actor': datahub_actor}
)

### Create some tags

Lets define a dictionary of tags with some key and values defined for each tag.

In [32]:
# create some tags
tags = {
    'healthy': {'description': 'This resource is healthy'}, 
    'failing': {'description': 'This resource is failing'},
    'production': {'description': 'This resource is considered production grade'},
    'dev': {'description': 'This resource is considered development grade'},
    }

# create each tag
for tag in tags:
    
    mcpw = MetadataChangeProposalWrapper(
        "tag",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_tag_urn(tag),
        aspectName="tagProperties",
        aspect=models.TagPropertiesClass(
            name=tag, 
            description=tags[tag].get('description')
            )
    )
    emitter.emit_mcp(mcp=mcpw)

![tags](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/tags.png)

### Create some glossary terms

Lets create some glossary terms we can associate with various entities.

In [33]:
# create some glossary terms

glossary_terms = {
    'active user': {'definition': 'A user who has logged in in last 30d.', 'source': 'INTERNEAL'},
    'inactive user': {'definition': 'A user who has not logged in in last 90d.', 'source': 'INTERNEAL'},
}

# create each term
for glossary_term in glossary_terms:
    
    mcpw = MetadataChangeProposalWrapper(
        "glossaryTerm",
        models.ChangeTypeClass.UPSERT,
        entityUrn=f'urn:li:glossaryTerm:{glossary_term}',
        aspectName="glossaryTermInfo",
        aspect=models.GlossaryTermInfoClass(
            definition=glossary_terms[glossary_term].get('definition'), 
            termSource=glossary_terms[glossary_term].get('source')
            )
    )
    emitter.emit_mcp(mcp=mcpw)

![glossary_terms](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/glossary_terms.png)

### Create some users

In [34]:
# create some users

users = {
    'Joe Bloggs': {'display_name': 'Joe Bloggs', 'email': 'joe.bloggs@fake.com', 'active': True},
    'Dummy User': {'display_name': 'Dummy User', 'email': 'dummy.user@fake.com', 'active': True},
}

# create each user
for user in users:
    
    mcpw = MetadataChangeProposalWrapper(
        "corpUser",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_user_urn(username=user),
        aspectName="corpUserInfo",
        aspect=models.CorpUserInfoClass(
            displayName=users[user].get('display_name'), 
            email=users[user].get('email'),
            active=users[user].get('active')
            )
    )
    emitter.emit_mcp(mcp=mcpw)

![user](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/user.png)

### Create some dataset's

Lets create 3 datasets that we will manually string together via upstream lineages later in the notebook.

In [35]:
# create some datasets
dataset_platform = 'bigquery'

# note: we will add some attributes like 'tags' to use later.

datasets = {
    'project_a.dataset_a.table_1' : {
        'description': 'my great dataset 1', 
        'url': 'https://netdata.cloud/', 
        'platform': dataset_platform, 
        'env': datahub_env, 
        'tags': ['healthy', 'production'],
        'owners': ['Joe Bloggs', 'Dummy User'],
        'glossary terms': ['active user'],
        'properties': {'foo': 'bar', 'key': 'value'},
        'upstream datasets': [],
        'links': {'dataub': 'https://datahubproject.io/', 'dataub demo': 'https://demo.datahubproject.io/'}
        },
    'project_a.dataset_a.table_2' : {
        'description': 'my great dataset 2', 
        'url': 'https://netdata.cloud/', 
        'platform': dataset_platform, 
        'env': datahub_env, 
        'tags': ['failing', 'dev'],
        'owners': ['Dummy User'],
        'glossary terms': ['active user'],
        'properties': {'foo': 'bar', 'key': 'value'},
        'upstream datasets': ['project_a.dataset_a.table_1'],
        'links': {'dataub': 'https://datahubproject.io/', 'dataub demo': 'https://demo.datahubproject.io/'}
        },
    'project_a.dataset_a.table_3' : {
        'description': 'my great dataset 3', 
        'url': 'https://netdata.cloud/', 
        'platform': dataset_platform, 
        'env': datahub_env, 
        'tags': ['failing', 'production'],
        'owners': ['Joe Bloggs'],
        'glossary terms': ['inactive user'],
        'properties': {'foo': 'bar', 'key': 'value'},
        'upstream datasets': ['project_a.dataset_a.table_2'],
        'links': {'dataub': 'https://datahubproject.io/', 'dataub demo': 'https://demo.datahubproject.io/'}
        },
}

# make each dataset
for dataset in datasets:
    
    mcpw = MetadataChangeProposalWrapper(
        "dataset",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_dataset_urn(
            platform=datasets[dataset].get('platform'), 
            name=dataset, 
            env=datasets[dataset].get('env')
            ),
        aspectName="datasetProperties",
        aspect=models.DatasetPropertiesClass(
            description=datasets[dataset].get('description'), 
            externalUrl=datasets[dataset].get('url')
            )
    )
    emitter.emit_mcp(mcp=mcpw)

![dataset](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/dataset.png)

### Create some charts

In [36]:
# create some charts
chart_platform = 'datastudio'

# note: we will add some attributes like 'tags' to use later.

charts = {
    'chart_1' : {
        'title': 'chart 1',
        'description': 'lovely chart 1', 
        'url': 'https://netdata.cloud/', 
        'platform': chart_platform, 
        'tags': ['healthy', 'production'],
        'owners': ['Joe Bloggs', 'Dummy User'],
        'glossary terms': ['active user'],
        'properties': {'foo': 'bar', 'key': 'value'},
        'inputs': ['project_a.dataset_a.table_1']
        },
    'chart_2' : {
        'title': 'chart 2',
        'description': 'my great chart 2', 
        'url': 'https://netdata.cloud/', 
        'platform': chart_platform, 
        'tags': ['failing', 'dev'],
        'owners': ['Dummy User'],
        'glossary terms': ['active user'],
        'properties': {'foo': 'bar', 'key': 'value'},
        'inputs': ['project_a.dataset_a.table_2']
        },
    'chart_3' : {
        'title': 'chart 3',
        'description': 'my great chart 3', 
        'url': 'https://netdata.cloud/', 
        'platform': chart_platform, 
        'tags': ['failing', 'production'],
        'owners': ['Joe Bloggs'],
        'glossary terms': ['inactive user'],
        'properties': {'foo': 'bar', 'key': 'value'},
        'inputs': ['project_a.dataset_a.table_3']
        },
}

# make each chart
for chart in charts:
    
    mcpw = MetadataChangeProposalWrapper(
        "chart",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_chart_urn(
            platform=charts[chart].get('platform'), 
            name=chart
        ),
        aspectName="chartInfo",
        aspect=models.ChartInfoClass(
            title=charts[chart].get('title'),
            description=charts[chart].get('description'), 
            lastModified=models.ChangeAuditStampsClass(
                created=models.AuditStampClass(
                    time=int(time.time()),
                    actor=datahub_actor
                )
            ),
            inputs=[
                builder.make_dataset_urn(
                    platform=datasets[input].get('platform'), 
                    name=input, 
                    env=datasets[input].get('env')
                    )
                for input in charts[chart].get('inputs',[])
            ]
        )
    )
    emitter.emit_mcp(mcp=mcpw)

![charts](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/charts.png)

![dataset_chart_dashboard_lineage](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/dataset_chart_dashboard_lineage.png)

### Create some dashboards

In [37]:
# create some dashboards
dashboard_platform = 'datastudio'

# note: we will add some attributes like 'tags' to use later.

dashboards = {
    'dashboard_1' : {
        'title': 'dashboard 1',
        'description': 'lovely dashboard 1',
        'charts': ['chart_1'],
        'url': 'https://netdata.cloud/', 
        'platform': dashboard_platform, 
        'tags': ['healthy', 'production'],
        'owners': ['Joe Bloggs', 'Dummy User'],
        'glossary terms': ['active user'],
        'properties': {'foo': 'bar', 'key': 'value'}
        },
    'dashboard_2' : {
        'title': 'dashboard 2',
        'description': 'my great dashboard 2',
        'charts': ['chart_2'], 
        'url': 'https://netdata.cloud/', 
        'platform': dashboard_platform, 
        'tags': ['failing', 'dev'],
        'owners': ['Dummy User'],
        'glossary terms': ['active user'],
        'properties': {'foo': 'bar', 'key': 'value'}
        },
    'dashboard_3' : {
        'title': 'dashboard 3',
        'description': 'my great dashboard 3',
        'charts': ['chart_3'], 
        'url': 'https://netdata.cloud/', 
        'platform': dashboard_platform, 
        'tags': ['failing', 'production'],
        'owners': ['Joe Bloggs'],
        'glossary terms': ['inactive user'],
        'properties': {'foo': 'bar', 'key': 'value'}
        },
}

# make each dashboard
for dashboard in dashboards:
    
    mcpw = MetadataChangeProposalWrapper(
        "dashboard",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_dashboard_urn(
            platform=charts[chart].get('platform'), 
            name=dashboard
        ),
        aspectName="dashboardInfo",
        aspect=models.DashboardInfoClass(
            title=dashboards[dashboard].get('title'),
            description=dashboards[dashboard].get('description'), 
            charts=[
                builder.make_chart_urn(
                    platform=charts[chart].get('platform'), 
                    name=chart
                ) 
                for chart in dashboards[dashboard].get('charts',[])
            ],
            lastModified=models.ChangeAuditStampsClass(
                created=models.AuditStampClass(
                    time=int(time.time()),
                    actor=datahub_actor
                )
            )
        )
    )
    emitter.emit_mcp(mcp=mcpw)

![dashboards](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/dashboards.png)

### Add tags to datasets

Lets use the tags we defined for each dataset.

In [38]:
# for each dataset
for dataset in datasets:
    
    # add tags
    mcpw = MetadataChangeProposalWrapper(
        "dataset",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_dataset_urn(
            platform=datasets[dataset].get('platform'), 
            name=dataset, 
            env=datasets[dataset].get('env')
            ),
        aspectName="globalTags",
        aspect=models.GlobalTagsClass(tags=[models.TagAssociationClass(builder.make_tag_urn(tag)) for tag in datasets[dataset].get('tags',[])])
    )
    emitter.emit_mcp(mcp=mcpw)

### Add tags to charts

In [39]:
# for each chart
for chart in charts:
    
    # add tags
    mcpw = MetadataChangeProposalWrapper(
        "chart",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_chart_urn(
            platform=charts[chart].get('platform'), 
            name=chart,
            ),
        aspectName="globalTags",
        aspect=models.GlobalTagsClass(tags=[models.TagAssociationClass(builder.make_tag_urn(tag)) for tag in charts[chart].get('tags',[])])
    )
    emitter.emit_mcp(mcp=mcpw)

### Add tags to dashboards

In [40]:
# for each dashboard
for dashboard in dashboards:
    
    # add tags
    mcpw = MetadataChangeProposalWrapper(
        "dashboard",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_dashboard_urn(
            platform=dashboards[dashboard].get('platform'), 
            name=dashboard,
            ),
        aspectName="globalTags",
        aspect=models.GlobalTagsClass(tags=[models.TagAssociationClass(builder.make_tag_urn(tag)) for tag in dashboards[dashboard].get('tags',[])])
    )
    emitter.emit_mcp(mcp=mcpw)

![dataset_tags](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/dataset_tags.png)

### Add owners to datasets

In [41]:
# for each dataset
for dataset in datasets:

    # add owners
    mcpw = MetadataChangeProposalWrapper(
        "dataset",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_dataset_urn(
            platform=datasets[dataset].get('platform'), 
            name=dataset, 
            env=datasets[dataset].get('env')
            ),
        aspectName="ownership",
        aspect=models.OwnershipClass(
            owners=[
                models.OwnerClass(builder.make_user_urn(owner), type='DATAOWNER') 
                for owner in datasets[dataset].get('owners', [])
                ]
            )
    )
    emitter.emit_mcp(mcp=mcpw)

### Add owners to charts

In [42]:
# for each chart
for chart in charts:

    # add owners
    mcpw = MetadataChangeProposalWrapper(
        "chart",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_chart_urn(
            platform=charts[chart].get('platform'), 
            name=chart
            ),
        aspectName="ownership",
        aspect=models.OwnershipClass(
            owners=[
                models.OwnerClass(builder.make_user_urn(owner), type='DATAOWNER') 
                for owner in charts[chart].get('owners', [])
                ]
            )
    )
    emitter.emit_mcp(mcp=mcpw)

### Add owners to dashboards

In [43]:
# for each dashboard
for dashboard in dashboards:

    # add owners
    mcpw = MetadataChangeProposalWrapper(
        "dashboard",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_dashboard_urn(
            platform=dashboards[dashboard].get('platform'), 
            name=dashboard
            ),
        aspectName="ownership",
        aspect=models.OwnershipClass(
            owners=[
                models.OwnerClass(builder.make_user_urn(owner), type='DATAOWNER') 
                for owner in dashboards[dashboard].get('owners', [])
                ]
            )
    )
    emitter.emit_mcp(mcp=mcpw)

![dataset_owners](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/dataset_owners.png)

### Add glossary terms to datasets

In [44]:
# add glossary terms for each dataset
for dataset in datasets:

    # add glossary terms
    mcpw = MetadataChangeProposalWrapper(
        "dataset",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_dataset_urn(
            platform=datasets[dataset].get('platform'), 
            name=dataset, 
            env=datasets[dataset].get('env')
            ),
        aspectName="glossaryTerms",
        aspect=models.GlossaryTermsClass(
            terms=[
                models.GlossaryTermAssociationClass(f'urn:li:glossaryTerm:{term}') 
                for term in datasets[dataset].get('glossary terms', [])
                ], 
            auditStamp=models.AuditStampClass(time=int(time.time()), actor=datahub_actor)
            )
    )
    emitter.emit_mcp(mcp=mcpw)


### Add glossary terms to charts

In [45]:
# add glossary terms for each chart
for chart in charts:

    # add glossary terms
    mcpw = MetadataChangeProposalWrapper(
        "chart",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_chart_urn(
            platform=charts[chart].get('platform'), 
            name=chart
            ),
        aspectName="glossaryTerms",
        aspect=models.GlossaryTermsClass(
            terms=[
                models.GlossaryTermAssociationClass(f'urn:li:glossaryTerm:{term}') 
                for term in charts[chart].get('glossary terms', [])
                ], 
            auditStamp=models.AuditStampClass(time=int(time.time()), actor=datahub_actor)
            )
    )
    emitter.emit_mcp(mcp=mcpw)

### Add glossary terms to dashboards

In [46]:
# add glossary terms for each dashboard
for dashboard in dashboards:

    # add glossary terms
    mcpw = MetadataChangeProposalWrapper(
        "dashboard",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_dashboard_urn(
            platform=dashboards[dashboard].get('platform'), 
            name=dashboard
            ),
        aspectName="glossaryTerms",
        aspect=models.GlossaryTermsClass(
            terms=[
                models.GlossaryTermAssociationClass(f'urn:li:glossaryTerm:{term}') 
                for term in dashboards[dashboard].get('glossary terms', [])
                ], 
            auditStamp=models.AuditStampClass(time=int(time.time()), actor=datahub_actor)
            )
    )
    emitter.emit_mcp(mcp=mcpw)

![dataset_glossary_terms](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/dataset_glossary_terms.png)

### Add properties to datasets

In [47]:
# for each dataset
for dataset in datasets:

    # add the properties
    mcpw = MetadataChangeProposalWrapper(
        "dataset",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_dataset_urn(
            platform=datasets[dataset].get('platform'), 
            name=dataset, 
            env=datasets[dataset].get('env')
            ),
        aspectName="datasetProperties",
        aspect=models.DatasetPropertiesClass(
            customProperties=datasets[dataset].get('properties', {})
            )
    )
    emitter.emit_mcp(mcp=mcpw)

![dataset_properties](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/dataset_properties.png)

### Add upstream lineages to datasets

In [48]:
for dataset in datasets:

    mcpw = MetadataChangeProposalWrapper(
        "dataset",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_dataset_urn(
            platform=datasets[dataset].get('platform'), 
            name=dataset, 
            env=datasets[dataset].get('env')
            ),
        aspectName="upstreamLineage",
        aspect=models.UpstreamLineageClass(
            upstreams=[
                models.UpstreamClass(
                    dataset=builder.make_dataset_urn(
                        platform=datasets[dataset].get('platform'), 
                        name=dataset, 
                        env=datasets[dataset].get('env')
                        ), 
                    type=models.DatasetLineageTypeClass.TRANSFORMED
                    ) 
                for dataset in datasets[dataset].get('upstream datasets', [])
            ]
        )
    )
    emitter.emit_mcp(mcp=mcpw)

![dataset_lineage](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/dataset_lineage.png)

### Add links to datasets

In [49]:
# for each dataset
for dataset in datasets:
    
    # add tags
    mcpw = MetadataChangeProposalWrapper(
        "dataset",
        models.ChangeTypeClass.UPSERT,
        entityUrn=builder.make_dataset_urn(
            platform=datasets[dataset].get('platform'), 
            name=dataset,
            env=datasets[dataset].get('env')
            ),
        aspectName="institutionalMemory",
        aspect=models.InstitutionalMemoryClass(
            elements=[
                models.InstitutionalMemoryMetadataClass(
                    url=datasets[dataset].get('links',{})[link], 
                    description=link,
                    createStamp=models.AuditStampClass(time=0,actor=datahub_actor)
                )
                for link in datasets[dataset].get('links',{})
            ]
        )
    )
    emitter.emit_mcp(mcp=mcpw)

![dataset_links](https://raw.githubusercontent.com/andrewm4894/learn-datahub/main/images/dataset_links.png)