# Metadata Manager

## Config

In [1]:
metadata_folder = '../metadata'
metadata_repo_location = '../metadata/metadata'
metadata_stash_folder = '../metadata/stash'
load_workspace_from = 'store' # 'stash' or 'store'

## Load and validate

In [2]:
# Setup and check
from rich import print, inspect

from metadata_lib.schemas import Namespace, Schema, System, Data_entity, Pipeline
from metadata_lib.manage import (
    MetadataLocation, MetadataStructure, MetadataManager
)
from metadata_lib.build import flatten_instance
import pendulum
from metadata_lib.validate import get_next_free_id
from metadata_lib.storage_adapters import LocalFilesystem

from deps.mm_setup import setup_manager

import pandas as pd
# Pandas settings
# Do not truncate cell contents
pd.set_option('display.max_colwidth', None)
# Do not truncate nr of columns shown
pd.set_option('display.max_columns', None)
# Do not truncate nr of rows
pd.set_option('display.max_rows', None)

# Check data structures
valid = setup_manager(
    root=metadata_folder,
    repo=metadata_repo_location,
    stash=metadata_stash_folder
)

# init
if valid:
    print()
    print()
    print('[bold bright_green]** Datastructures are valid. Loading Metadata Manager **[/bold bright_green]')
    store = MetadataLocation(
        storage_type='localstorage',
        Storage_Adapter=LocalFilesystem,
        address=metadata_repo_location,
        entities_to_load=None,
        is_git_repo=False
    )
    stash = MetadataLocation(
        storage_type='localstorage',
        Storage_Adapter=LocalFilesystem,
        address=metadata_stash_folder,
        entities_to_load=None,
        is_git_repo=False
    )
    mm = MetadataManager(
        store=store,
        stash=stash,
        load_current=True,
        load_workspace=load_workspace_from
    )
else:
    print()
    print()
    print('[bold bright_red]** Invalid datastructures. Metadata Manager can not be loaded **[/bold bright_red]')

***

## Save

### Stash

In [None]:
mm.stash_workspace()

### Store (Repo)

In [None]:
mm.store_workspace()

***

***

## ** Namespaces **

### View

In [None]:
mm.workspace.view['namespaces']

***

### Add New

#### - Create new namespace

In [None]:
new_namespace = Namespace(
    id=get_next_free_id(mm.workspace.by_id, 'namespaces'),
    unid=None,
    name='department_X',
    description='Mysterious department',
    created=pendulum.now(tz='Europe/Amsterdam'),
    modified=pendulum.now(tz='Europe/Amsterdam')
)
print(new_namespace)

##### -- Template

In [None]:
new_namespace = Namespace(
    id=get_next_free_id(mm.workspace.by_id, 'namespaces'),
    unid=None,
    name='department_X',
    description='Mysterious department',
    created=pendulum.now(tz='Europe/Amsterdam'),
    modified=pendulum.now(tz='Europe/Amsterdam')
)
print(new_namespace)

#### - Validate new namespace and add it to workspace

In [None]:
mm.add_new_entity(new_namespace)

#### - [Save](#Save)

***

### Edit existing

#### - Get the namespace you want to edit

In [None]:
namespace_UNID = 'UNID'

edited_namespace = mm.get_entity_by_unid(
    entity_type='namespaces',
    unid=namespace_UNID
)
print(edited_namespace)

##### -- Template

In [None]:
namespace_UNID = 'UNID'

edited_namespace = mm.get_entity_by_unid(
    entity_type='namespaces',
    unid=namespace_UNID
)
print(edited_namespace)

#### - Edit the namespace

In [None]:
edited_namespace.name = 'NEW NAME'
edited_namespace.description = 'NEW DESCRIPTION'

print(edited_namespace)

##### -- Template

In [None]:
edited_namespace.name = 'NEW NAME'
edited_namespace.description = 'NEW DESCRIPTION'

print(edited_namespace)

#### - Validate the namespace and add it to workspace structure

In [None]:
mm.update_entity(edited_namespace)

#### - [Save](#Save)

***

### Delete

#### - Specify UNID and inspect namespace

In [None]:
namespace_UNID_to_delete = 'UNID'

namespace_to_delete = mm.get_entity_by_unid(
    entity_type='namespaces',
    unid=namespace_UNID_to_delete
)
print(namespace_to_delete)

##### -- Template

In [None]:
namespace_UNID_to_delete = 'UNID'

namespace_to_delete = mm.get_entity_by_unid(
    entity_type='namespaces',
    unid=namespace_UNID_to_delete
)
print(namespace_to_delete)

#### - Validate removal of the namespace and remove it from workspace

In [None]:
report = mm.delete_entity(
    entity_type='namespaces',
    unid=namespace_UNID_to_delete
)
print(report)

##### -- Template

In [None]:
report = mm.delete_entity(
    entity_type='namespaces',
    unid=namespace_UNID_to_delete
)
print(report)

#### - [Save](#Save)

***

## ** Schemas **

### View

In [None]:
mm.workspace.view['schemas']

***

### Add New

#### - Create new schema

In [None]:
new_schema = Schema(
    id=get_next_free_id(mm.workspace.by_id, 'schemas'),
    unid=None,
    namespace='dataplatform',
    name='test',
    description=None,
    type='bigquery',
    version=1,
    entity_schema=[{'mode': 'REQUIRED', 'name': 'nummer', 'type': 'STRING'}],
    created=pendulum.now(tz='Europe/Amsterdam'),
    modified=pendulum.now(tz='Europe/Amsterdam')
)
print(new_schema)

##### -- Template

In [None]:
new_schema = Schema(
    id=get_next_free_id(mm.workspace.by_id, 'schemas'),
    unid=None,
    namespace='dataplatform',
    name='test',
    description=None,
    type='bigquery',
    version=1,
    entity_schema=[{'mode': 'REQUIRED', 'name': 'nummer', 'type': 'STRING'}],
    created=pendulum.now(tz='Europe/Amsterdam'),
    modified=pendulum.now(tz='Europe/Amsterdam')
)
print(new_schema)

#### - Validate new schema and add it to workspace

In [None]:
mm.add_new_entity(new_schema)

#### - [Save](#Save)

***

### Edit existing

#### - Get the schema to edit

In [None]:
schema_UNID = 'UNID'

edited_schema = mm.get_entity_by_unid(
    entity_type='schemas',
    unid=schema_UNID
)
print(edited_schema)

##### -- Template

In [None]:
schema_UNID = 'UNID'

edited_schema = mm.get_entity_by_unid(
    entity_type='schemas',
    unid=schema_UNID
)
print(edited_schema)

#### - Edit the schema

In [None]:
edited_schema.namespace = 'NEW NAMESPACE UNID'
edited_schema.name = 'NEW NAME'
edited_schema.description = 'NEW DESCRIPTION' # None
edited_schema.type = 'bigquery' # 'avro'
edited_schema.version = edited_schema.version + 1
edited_schema.entity_schema = [
    {'mode': 'REQUIRED', 'name': 'nummer', 'type': 'STRING'}
]

print(edited_schema)

##### -- Template

In [None]:
edited_schema.namespace = 'NEW NAMESPACE UNID'
edited_schema.name = 'NEW NAME'
edited_schema.description = 'NEW DESCRIPTION' # None
edited_schema.type = 'bigquery' # 'avro'
edited_schema.version = edited_schema.version + 1
edited_schema.entity_schema = [
    {'mode': 'REQUIRED', 'name': 'nummer', 'type': 'STRING'}
]

print(edited_schema)

#### - Validate the schema and add it to workspace structure

In [None]:
mm.update_entity(edited_schema)

#### - [Save](#Save)

***

### Delete

#### - Specify UNID and inspect schema

In [None]:
schema_UNID_to_delete = 'UNID'

schema_to_delete = mm.get_entity_by_unid(
    entity_type='schemas',
    unid=schema_UNID_to_delete
)
print(schema_to_delete)

##### -- Template

In [None]:
schema_UNID_to_delete = 'UNID'

schema_to_delete = mm.get_entity_by_unid(
    entity_type='schemas',
    unid=schema_UNID_to_delete
)
print(schema_to_delete)

#### - Validate removal of the schema and remove it from workspace

In [None]:
report = mm.delete_entity(
    entity_type='schemas',
    unid=schema_UNID_to_delete
)
print(report)

##### -- Template

In [None]:
report = mm.delete_entity(
    entity_type='schemas',
    unid=schema_UNID_to_delete
)
print(report)

#### - [Save](#Save)

***

## ** Systems **

### View

In [None]:
mm.workspace.view['systems']

***

### Add New

#### - Create new system

In [None]:
new_system = System(
    id=get_next_free_id(mm.workspace.by_id, 'systems'),
    unid=None,
    namespace='dataplatform',
    name='super_system',
    description='It does everything',
    type='external',
    config={'url': 'www.supersystem.com'},
    created=pendulum.now(tz='Europe/Amsterdam'),
    modified=pendulum.now(tz='Europe/Amsterdam')
)
print(new_system)

##### -- Template

In [None]:
new_system = System(
    id=get_next_free_id(mm.workspace.by_id, 'systems'),
    unid=None,
    namespace='dataplatform',
    name='super_system',
    description='It does everything',
    type='external',
    config={'url': 'www.supersystem.com'},
    created=pendulum.now(tz='Europe/Amsterdam'),
    modified=pendulum.now(tz='Europe/Amsterdam')
)
print(new_system)

#### - Validate new system and add it to workspace

In [None]:
mm.add_new_entity(new_system)

#### - [Save](#Save)

***

### Edit existing

#### - Get the system to edit

In [None]:
system_UNID = 'UNID'

edited_system = mm.get_entity_by_unid(
    entity_type='systems',
    unid=system_UNID
)
print(edited_system)

##### -- Template

In [None]:
system_UNID = 'UNID'

edited_system = mm.get_entity_by_unid(
    entity_type='systems',
    unid=system_UNID
)
print(edited_system)

#### - Edit the system

In [None]:
edited_system.namespace = 'NEW NAMESPACE UNID'
edited_system.name = 'NEW NAME'
edited_system.description = 'NEW DESCRIPTION' # None
edited_system.type = 'external' # 'internal', 'platform'
edited_system.config = {'key': 'value'}

print(edited_system)

##### -- Template

In [None]:
edited_system.namespace = 'NEW NAMESPACE UNID'
edited_system.name = 'NEW NAME'
edited_system.description = 'NEW DESCRIPTION' # None
edited_system.type = 'external' # 'internal', 'platform'
edited_system.config = {'key': 'value'}

print(edited_system)

#### - Validate the system and add it to workspace structure

In [None]:
mm.update_entity(edited_system)

#### - [Save](#Save)

***

### Delete

#### - Specify UNID and inspect system

In [None]:
system_UNID_to_delete = 'UNID'

system_to_delete = mm.get_entity_by_unid(
    entity_type='systems',
    unid=system_UNID_to_delete
)
print(system_to_delete)

##### -- Template

In [None]:
system_UNID_to_delete = 'UNID'

system_to_delete = mm.get_entity_by_unid(
    entity_type='systems',
    unid=system_UNID_to_delete
)
print(system_to_delete)

#### - Validate removal of the system and remove it from workspace

In [None]:
report = mm.delete_entity(
    entity_type='systems',
    unid=system_UNID_to_delete
)
print(report)

##### -- Template

In [None]:
report = mm.delete_entity(
    entity_type='systems',
    unid=system_UNID_to_delete
)
print(report)

#### - [Save](#Save)

***

## ** Data Entities **

### View

In [None]:
mm.workspace.view['data_entities']

***

### Add New

#### - Create new data entity

In [None]:
new_data_entity = Data_entity(
    id=get_next_free_id(mm.workspace.by_id, 'data_entities'),
    unid=None,
    namespace='dataplatform',
    system='dataplatform.supersystem',
    name='tickets',
    description='A nice api endpoint',
    type='datasource',
    interface='api_rest',
    entity_schema='test.1',
    checks=[],
    config={'table': 'tickets'},
    created=pendulum.now(tz='Europe/Amsterdam'),
    modified=pendulum.now(tz='Europe/Amsterdam')
)
print(new_data_entity)

##### -- Template

In [None]:
new_data_entity = Data_entity(
    id=get_next_free_id(mm.workspace.by_id, 'data_entities'),
    unid=None,
    namespace='dataplatform',
    system='dataplatform.supersystem',
    name='tickets',
    description='A nice api endpoint',
    type='datasource',
    interface='api_rest',
    entity_schema='test.1',
    checks=[],
    config={'table': 'tickets'},
    created=pendulum.now(tz='Europe/Amsterdam'),
    modified=pendulum.now(tz='Europe/Amsterdam')
)
print(new_data_entity)

#### - Validate new data entity and add it to workspace

In [None]:
mm.add_new_entity(new_data_entity)

#### - [Save](#Save)

***

### Edit existing

#### - Get the data_entity to edit

In [None]:
data_entity_UNID = 'UNID'

edited_data_entity = mm.get_entity_by_unid(
    entity_type='data_entities',
    unid=data_entity_UNID
)
print(edited_data_entity)

##### -- Template

In [None]:
data_entity_UNID = 'UNID'

edited_data_entity = mm.get_entity_by_unid(
    entity_type='data_entities',
    unid=data_entity_UNID
)
print(edited_data_entity)

#### - Edit the data_entity

In [None]:
edited_data_entity.namespace = 'NEW NAMESPACE UNID'
edited_data_entity.system = 'NEW SYSTEM UNID'
edited_data_entity.name = 'NEW NAME'
edited_data_entity.description = 'NEW DESCRIPTION' # None
edited_data_entity.type = 'datasource'
edited_data_entity.interface = 'api_rest'
edited_data_entity.entity_schema = 'NEW SCHEMA UNID'
edited_data_entity.checks=[]
edited_data_entity.config={'key': 'value'} # None

print(edited_data_entity)

##### -- Template

In [None]:
edited_data_entity.namespace = 'NEW NAMESPACE UNID'
edited_data_entity.system = 'NEW SYSTEM UNID'
edited_data_entity.name = 'NEW NAME'
edited_data_entity.description = 'NEW DESCRIPTION' # None
edited_data_entity.type = 'datasource'
edited_data_entity.interface = 'api_rest'
edited_data_entity.entity_schema = 'NEW SCHEMA UNID'
edited_data_entity.checks=[]
edited_data_entity.config={'key': 'value'} # None

print(edited_data_entity)

#### - Validate the data_entity and add it to workspace structure

In [None]:
mm.update_entity(edited_data_entity)

#### - [Save](#Save)

***

### Delete

#### - Specify UNID and inspect data_entity

In [None]:
data_entity_UNID_to_delete = 'UNID'

data_entity_to_delete = mm.get_entity_by_unid(
    entity_type='data_entities',
    unid=data_entity_UNID_to_delete
)
print(data_entity_to_delete)

##### -- Template

In [None]:
data_entity_UNID_to_delete = 'UNID'

data_entity_to_delete = mm.get_entity_by_unid(
    entity_type='data_entities',
    unid=data_entity_UNID_to_delete
)
print(data_entity_to_delete)

#### - Validate removal of the data_entity and remove it from workspace

In [None]:
report = mm.delete_entity(
    entity_type='data_entities',
    unid=data_entity_UNID_to_delete
)
print(report)

##### -- Template

In [None]:
report = mm.delete_entity(
    entity_type='data_entities',
    unid=data_entity_UNID_to_delete
)
print(report)

#### - [Save](#Save)

***

## ** Pipelines **

### View

In [None]:
mm.workspace.view['pipelines']

***

### Add New

#### - Create new pipeline

In [None]:
new_pipeline = Pipeline(
    id=get_next_free_id(mm.workspace.by_id, 'pipelines'),
    unid=None,
    namespace='dataplatform',
    name='testest',
    description='Ingest pipeline for supersystem',
    enabled=True,
    version=1,
    scope='single',
    type='ingest',
    velocity='batch',
    input_output=[
        {'input':['dataplatform.dataplatform.hayabusa_sshfs.my_table.dataset'], 'output':['dataplatform.dataplatform.hayabusa_sshfs.your_table.dataset']}
    ],
    config={'apiversion': 'beta'},
    created=pendulum.now(tz='Europe/Amsterdam'),
    modified=pendulum.now(tz='Europe/Amsterdam')
)
print(new_pipeline)

##### -- Template

In [None]:
new_pipeline = Pipeline(
    id=get_next_free_id(mm.workspace.by_id, 'pipelines'),
    unid=None,
    namespace='dataplatform',
    name='testest',
    description='Ingest pipeline for supersystem',
    enabled=True,
    version=1,
    scope='single',
    type='ingest',
    velocity='batch',
    input_output=[
        {'input':['dataplatform.dataplatform.hayabusa_sshfs.my_table.dataset'], 'output':['dataplatform.dataplatform.hayabusa_sshfs.your_table.dataset']}
    ],
    config={'apiversion': 'beta'},
    created=pendulum.now(tz='Europe/Amsterdam'),
    modified=pendulum.now(tz='Europe/Amsterdam')
)
print(new_pipeline)

#### - Validate new pipeline and add it to workspace

In [None]:
mm.add_new_entity(new_pipeline)

#### - [Save](#Save)

***

### Edit existing

#### - Get the pipeline to edit

In [None]:
pipeline_UNID = 'UNID'

edited_pipeline = mm.get_entity_by_unid(
    entity_type='pipelines',
    unid=pipeline_UNID
)
print(edited_pipeline)

##### -- Template

In [None]:
pipeline_UNID = 'UNID'

edited_pipeline = mm.get_entity_by_unid(
    entity_type='pipelines',
    unid=pipeline_UNID
)
print(edited_pipeline)

#### - Edit the pipeline

In [None]:
edited_pipeline.namespace = 'NEW NAMESPACE UNID'
edited_pipeline.name = 'NEW NAME'
edited_pipeline.description = 'NEW DESCRIPTION' # None
edited_pipeline.enabled = True # False
edited_pipeline.version = edited_pipeline.version + 1
edited_pipeline.scope = 'compound' # 'single'
edited_pipeline.type = 'ingest' # 'transform', 'delivery'
edited_pipeline.velocity = 'batch' # 'streaming'
edited_pipeline.input_output = [
    {'input':'ENTITY_UNID_INPUT', 'output':'ENTITY_UNID_OUTPUT'}
] # {'input':['ENTITY_UNID_INPUT_1', 'ENTITY_UNID_INPUT_2'], 'output':['ENTITY_UNID_OUTPUT_1', 'ENTITY_UNID_OUTPUT_2']}
edited_pipeline.config={'key': 'value'} # None

print(edited_pipeline)

##### -- Template

In [None]:
edited_pipeline.namespace = 'NEW NAMESPACE UNID'
edited_pipeline.name = 'NEW NAME'
edited_pipeline.description = 'NEW DESCRIPTION' # None
edited_pipeline.enabled = True # False
edited_pipeline.version = edited_pipeline.version + 1
edited_pipeline.scope = 'compound' # 'single'
edited_pipeline.type = 'ingest' # 'transform', 'delivery'
edited_pipeline.velocity = 'batch' # 'streaming'
edited_pipeline.input_output = [
    {'input':'ENTITY_UNID_INPUT', 'output':'ENTITY_UNID_OUTPUT'}
] # {'input':['ENTITY_UNID_INPUT_1', 'ENTITY_UNID_INPUT_2'], 'output':['ENTITY_UNID_OUTPUT_1', 'ENTITY_UNID_OUTPUT_2']}
edited_pipeline.config={'key': 'value'} # None

print(edited_pipeline)

#### - Validate the pipeline and add it to workspace structure

In [None]:
mm.update_entity(edited_pipeline)

#### - [Save](#Save)

***

### Delete

#### - Specify UNID and inspect the pipeline

In [None]:
pipeline_UNID_to_delete = 'UNID'

pipeline_to_delete = mm.get_entity_by_unid(
    entity_type='pipelines',
    unid=pipeline_UNID_to_delete
)
print(pipeline_to_delete)

##### -- Template

In [None]:
pipeline_UNID_to_delete = 'UNID'

pipeline_to_delete = mm.get_entity_by_unid(
    entity_type='pipelines',
    unid=pipeline_UNID_to_delete
)
print(pipeline_to_delete)

#### - Validate removal of the pipeline and remove it from workspace

In [None]:
report = mm.delete_entity(
    entity_type='pipelines',
    unid=pipeline_UNID_to_delete
)
print(report)

##### -- Template

In [None]:
report = mm.delete_entity(
    entity_type='pipelines',
    unid=pipeline_UNID_to_delete
)
print(report)

#### - [Save](#Save)

***

## ** Config for DAGs and scripts **

#### - View pipelines

In [None]:
mm.workspace.view['pipelines']

#### - Complete DAG_CONFIG

In [None]:
print(mm.workspace.dag_config)

#### - Specific pipeline DAG_CONFIG

In [None]:
unid_to_view = 'dataplatform.example_pipeline.ingest.1'
print(mm.workspace.dag_config[unid_to_view])