# Defining Custom Types in Microsoft Purview

In [None]:
%pip install msal

## Authentication using MSAL

To authenticate using the Microsoft Authentication Library using the device flow method you will need the tenant ID for the Purview instance and the client ID. When the initiated_device_flow method is called the user_code is printed, follow the URL to authenticate.

In [None]:
from msal import PublicClientApplication, SerializableTokenCache
import requests

tenant_id = "<tenant_id>"

cache = SerializableTokenCache()

app = PublicClientApplication(
    client_id = "<client_id>",
    authority = f"https://login.microsoftonline.com/{tenant_id}",
    token_cache = cache
)

flow = app.initiate_device_flow(["<scope_to_authenticate>"])

print(flow) # Print to display code for authentication

### Create a bearer token

In [None]:
token = app.acquire_token_by_device_flow(flow)

def get_auth_header(scope = None):
    return dict(
        Authorization="Bearer {token}".format(
            token=token["access_token"]
        )
    )

### Wrapper function for Purview API requests

In [None]:
def request(
    method,
    api,
    body=None,
    json=None,
    api_version="api-version=2023-09-01"
):
    base = f"https://{tenant_id}-api.purview-service.microsoft.com"
    if "?" in api:
        return requests.request(method, f"{base}{api}&{api_version}", data=body, json=json, headers=get_auth_header())
    else:
        return requests.request(method, f"{base}{api}?{api_version}", data=body, json=json, headers=get_auth_header())

## Defining custom types

In this notebook we will create custom types to model a table with a tabular schema. This schema will contain a set of column entities linked to the table.

### Useful properties when working with entities and relationships

A list of some of the most relevant properties for this notebook.

* attributeDefs - list of [AtlasAttributeDef](https://learn.microsoft.com/en-us/rest/api/purview/datamapdataplane/type/bulk-create?view=rest-purview-datamapdataplane-2023-09-01&tabs=HTTP#atlasattributedef) objects defining type attributes
* category - the type of object being defined e.g. 'ENTITY' or 'RELATIONSHIP'
* guid - unique identifier
* options - additional type options
* typeName - the type of the object being created e.g. databricks_table
* subTypes - list of types which inherit from this
* superTypes - list of types from which this type inherits e.g. Asset, Purview_Table

Other docs links:
* [AtlasEntityDef](https://learn.microsoft.com/en-us/rest/api/purview/datamapdataplane/type/bulk-create?view=rest-purview-datamapdataplane-2023-09-01&tabs=HTTP#atlasentitydef)
* [available categories](https://atlas.apache.org/api/v2/json_TypeCategory.html)


### Define custom schema type

In [49]:
import json

custom_schema_type = {
    "category": "ENTITY",
    "version": 1,
    "name": "test_entity_schema",
    "description": "Test schema",
    "typeVersion": "1.0",
    "options": {
        'purviewEntityExtDef': json.dumps({
            "isContainer":True,
            "friendlyName":"Test Schema",
            "search": {
                "searchBehavior":"StandardSearch",
                "standardSearch": {
                    "taxonomy": {
                        "source":"Attributes",
                        "attributes": ["name"],
                        "assetTaxonomy":["Schema"],
                        "customizeTaxonomyMapping": {
                            "Schema":"name"
                        }
                    },
                    "browse": {
                    "source":"QualifiedName",
                    "browseHierarchy": [
                        {
                            "entityTypeName":"test_entity_schema",
                            "isPath":False,
                            "isRequired":True,
                            "componentName":"Schema"
                        }
                    ]
                    }
                }
            }
        })
    },
    "attributeDefs": [
        {
            'name': 'comment',
            'typeName': 'string',
            'isOptional': True,
            'cardinality': 'SINGLE',
            'valuesMinCount': 0,
            'valuesMaxCount': 1,
            'isUnique': False,
            'isIndexable': False,
            'includeInNotification': False
        }
    ],
    "superTypes": ["Asset"],
    "subTypes": [],
    
}

### Define custom table type

The below code defines a data storing entity with tabular schema. To accomplish this we inherit from DataSet (the default to inherit from when creating a type that stores data) and Purview_Table.

An important property to note here is _schemaElementsAttribute_ within _options_. _schemaElementsAttribute_ impacts what is displayed in the schema tab within Purview. Here we are setting it to point at a relationship of columns.

In [50]:
import json

custom_table_type = {
    "category": "ENTITY",
    "version": 1,
    "name": "test_entity_table",
    "description": "Custom data source",
    "typeVersion": "1.0",
    "options": {
        "schemaElementsAttribute": "columns",
        'purviewEntityExtDef': json.dumps({
            "compact": {
                "isHostForCompact":True,
                "relationshipsToCompact":["test_entity_table_columns"]
            },
            "isContainer":False,
            "friendlyName":"Test Table",
            "search": {
                "searchBehavior":"StandardSearch",
                "standardSearch": {
                    "taxonomy": {
                        "source":"Attributes",
                        "attributes": ["schemaName"],
                        "assetTaxonomy": ["Schema"],
                        "customizeTaxonomyMapping": {
                            "Schema":"schemaName"
                        }
                    },
                    "browse": {
                        "source":"QualifiedName",
                        "browseHierarchy": [
                            {
                                "entityTypeName":"test_entity_schema",
                                "isPath":False,
                                "isRequired":True,
                                "componentName":"Schema"
                            }, {
                                "entityTypeName":"test_entity_table",
                                "isPath":False,
                                "isRequired":True,
                                "componentName":"Table"
                            }
                        ]
                    }
                }
            }
        })
    },
    "attributeDefs": [],
    "superTypes": [
        "DataSet",
        "Purview_Table"
    ],
    "subTypes": [],
    "relationshipAttributeDefs": []
}

### Define custom column type

Our column type also stores data, so we inherit from the DataSet type here as well. However, we don't want a schema for the column so we omit the Purview_Table. We also set a few attribute definitions for information we want to store for each column.

Take note of _schemaAttributes_ within the _options_ property. This again impacts what is shown in the tables schema tab. Setting this property informs Purview to populate the schema tab with the listed attributes, in this instance the dataType attribute.

In [39]:
import json

custom_column_type = {
    "category": "ENTITY",
    "version": 1,
    "name": "test_entity_table_column",
    "description": "Custom column",
    "typeVersion": "1.0",
    "options": {
        'purviewEntityExtDef': json.dumps({
            "isContainer":False,
            "friendlyName":"Test Column",
            "search": {
                "searchBehavior":"RelevantSearch",
                "relevantSearch": {
                    "relevantRelationships": ["test_entity_table_columns"],
                    "relevantAttributes": ["dataType"]
                }
            }
        }),
        "schemaAttributes": "['dataType']"
    },
    "attributeDefs": [
        {
            'name': 'dataType',
            'typeName': 'string',
            'isOptional': True,
            'cardinality': 'SINGLE',
            'valuesMinCount': 0,
            'valuesMaxCount': 1,
            'isUnique': False,
            'isIndexable': False,
            'includeInNotification': False
        }, {
            'name': 'isNullable',
            'typeName': 'boolean',
            'isOptional': True,
            'cardinality': 'SINGLE',
            'valuesMinCount': 0,
            'valuesMaxCount': 1,
            'isUnique': False,
            'isIndexable': False,
            'includeInNotification': False
        }, {
            'name': 'comment',
            'typeName': 'string',
            'isOptional': True,
            'cardinality': 'SINGLE',
            'valuesMinCount': 0,
            'valuesMaxCount': 1,
            'isUnique': False,
            'isIndexable': False,
            'includeInNotification': False
        }
    ],
    "superTypes": ["DataSet"],
    "subTypes": []
}

### Define a relationship between the table and column types

To create our table entity we want the table to be container of columns. 

In [40]:
custom_relationship = {
    "category": "RELATIONSHIP",
    "version": 1,
    "name": "test_entity_table_columns",
    "description": "1:N test_entity_table:test_entity_table_column relationship",
    "typeVersion": "1.0",
    "attributeDefs": [],
    "relationshipCategory": "COMPOSITION",
    "propagateTags": "NONE",
    "endDef1": {
        "type": "test_entity_table",
        "name": "columns",
        "isContainer": True,
        "cardinality": "SET",
        "isLegacyAttribute": False
    },
    "endDef2": {
        "type": "test_entity_table_column",
        "name": "table",
        "isContainer": False,
        "cardinality": "SINGLE",
        "isLegacyAttribute": False
    }
}


custom_schema_relationship = {
    'category': 'RELATIONSHIP',
    'version': 1,
    'name': 'test_entity_schema_tables',
    'description': 'test_entity_schema_tables',
    'typeVersion': '1.0',
    'lastModifiedTS': '1',
    'attributeDefs': [],
    'relationshipCategory': 'COMPOSITION',
    'propagateTags': 'NONE',
    'endDef1': {
        'type': 'test_entity_schema',
        'name': 'tables',
        'isContainer': True,
        'cardinality': 'SET',
        'isLegacyAttribute': False
    },
    'endDef2': {
        'type': 'test_entity_table',
        'name': 'testSchema',
        'isContainer': False,
        'cardinality': 'SINGLE',
        'isLegacyAttribute': False
    }
  }

### Create entity typedefs

Call the API to create the types we have defined in Purview

In [None]:
response = request(
    "POST",
    "/datamap/api/atlas/v2/types/typedefs",
    json = {
        "entityDefs": [custom_table_type, custom_column_type, custom_schema_type],
        "relationshipDefs": [custom_relationship, custom_schema_relationship]
    }
)

response.json()

### Create entity templates

In [68]:
def create_schema_template(name, desc, qualified_name, tables):
    return {
        "typeName": "test_entity_schema",
        "status": "ACTIVE",
        "version": 1,
        "attributes": {
            "name": name,
            "description": desc,
            "qualifiedName": qualified_name,
            "tables": tables,
        }
    }


def create_table_template(name, desc, qualified_name, schema_name = "none"):
    return {
        "typeName": "test_entity_table",
        "status": "ACTIVE",
        "version": 1,
        "attributes": {
            "name": name,
            "description": desc,
            "qualifiedName": qualified_name,
            "schemaName": schema_name
        }
    }


def create_table_template_with_cols(name, desc, qualified_name, cols, schema_name = "none"):
    return {
        "typeName": "test_entity_table",
        "status": "ACTIVE",
        "version": 1,
        "attributes": {
            "name": name,
            "description": desc,
            "qualifiedName": qualified_name,
            "columns": cols
        }
    }

def create_column_template(id, name, desc, qualified_name, data_type, nullable, comment):
    return {
        "typeName": "test_entity_table_column",
        "status": "ACTIVE",
        "version": 1,
        "attributes": {
            "name": name + f"{id}",
            "description": desc,
            "qualifiedName": qualified_name +f"{id}",
            "dataType": data_type,
            "isNullable": nullable,
            "comment": comment
        }
    }

def create_table_col_relationship(table_guid, column_guid):
    return {
        "typeName": "test_entity_table_columns",
        "end1": {
            "guid": table_guid,
            "typeName": "test_entity_table"
        },
        "end2": {
            "guid": column_guid,
            "typeName": "test_entity_table_column"
        }
    }

## Create table with some columns

### Method 1

In [None]:
table = [create_table_template("Test table", "This is a test table", "https:some/qualified/name")]
cols = [create_column_template(x, "Test Column ", "This is a test column", "https:some/table/column", "STRING", True, "Hi") for x in range(0, 5)]

entities_to_create = table + cols


guid_map = {
    "tables": [],
    "columns": [],
    "relationships": []
}

# Bulk create table and columns
for entity in entities_to_create:
    create_entity_res = request(
        "POST",
        "/datamap/api/atlas/v2/entity",
        json={
            "entity": entity
        }
    ).json()

    print(create_entity_res)

    for entity in create_entity_res["mutatedEntities"]["CREATE"]:
        if entity["typeName"] == "test_entity_table":
            guid_map["tables"] += [entity["guid"]]
        elif entity["typeName"] == "test_entity_table_column":
            guid_map["columns"] += [entity["guid"]]

print(guid_map)

#### Link the columns to the table

In [None]:
for column_guid in guid_map["columns"]:
    table_col_relationship = create_table_col_relationship(guid_map["tables"][0], column_guid)

    # Create relationship
    create_relationships_res = request(
        "POST",
        "/datamap/api/atlas/v2/relationship",
        json=table_col_relationship
    ).json()

    guid_map["relationships"] += [create_relationships_res["guid"]]


print(guid_map)

### Method 2

In [None]:
table = create_table_template("Test table", "This is a test table", "https:some/qualified/name")
cols = [create_column_template(x, "Test Column ", "This is a test column", "https:some/table/column", "STRING", True, "Hi") for x in range(0, 2)]

guid_map = {
    "tables": [],
    "columns": [],
    "relationships": []
}

# create the columns
for col in cols:
    create_entity_res = request(
        "POST",
        "/datamap/api/atlas/v2/entity",
        json={
            "entity": col
        }
    ).json()
    guid_map["columns"].append(create_entity_res["mutatedEntities"]["CREATE"][0]["guid"])

for idx, col in enumerate(cols):
    col["guid"] = guid_map["columns"][idx]

# create table
create_entity_res = request(
    "POST",
    "/datamap/api/atlas/v2/entity",
    json={
        "entity": create_table_template_with_cols("Test table", "This is a test table", "https:some/qualified/name", cols)
    }
).json()


guid_map["tables"].append(create_entity_res["mutatedEntities"]["CREATE"][0]["guid"])


print(guid_map)

### Method 3

In [None]:
table = create_table_template("Test Table", "This is a test table", "https:some/qualified/name", "test_schema")
cols = [create_column_template(x, "Test Column ", "This is a test column", "https:some/table/column", "STRING", True, "Hi") for x in range(0, 2)]
schema = create_schema_template("test_schema", "This is a test schema", "https:some/qualified/name", [table])

guid_map = {
    "tables": [],
    "columns": [],
    "relationships": []
}

# create the columns
for col in cols:
    create_entity_res = request(
        "POST",
        "/datamap/api/atlas/v2/entity",
        json={
            "entity": col
        }
    ).json()
    guid_map["columns"].append(create_entity_res["mutatedEntities"]["CREATE"][0]["guid"])

for idx, col in enumerate(cols):
    col["guid"] = guid_map["columns"][idx]

# create table
create_entity_res = request(
    "POST",
    "/datamap/api/atlas/v2/entity",
    json={
        "entity": create_table_template_with_cols("Test Table", "This is a test table", "https:some/qualified/name", cols)
    }
).json()


guid_map["tables"].append(create_entity_res["mutatedEntities"]["CREATE"][0]["guid"])


print(guid_map)

table["guid"] = create_entity_res["mutatedEntities"]["CREATE"][0]["guid"]
# create schema
create_entity_res = request(
    "POST",
    "/datamap/api/atlas/v2/entity",
    json={
        "entity": create_schema_template("Test Schema", "This is a test schema", "https:some/qualified/name2", [table])
    }
).json()

create_entity_res

### Move assets to collection

In [103]:
move_table_response = request(
    "POST",
    "/datamap/api/entity/moveTo?collectionId=ql7jd6",
    json={
        "entityGuids": guid_map["tables"] + guid_map["columns"]
    }
)

### Cleanup table

In [None]:
table_guid = guid_map["tables"][0]
request(
    "DELETE",
    f"/datamap/api/atlas/v2/entity/guid/{table_guid}"
)

### Delete type definitions

In [None]:
request(
    "DELETE",
    "/datamap/api/atlas/v2/types/typedef/name/test_entity_table_columns"
)

In [None]:
request(
    "DELETE",
    "/datamap/api/atlas/v2/types/typedef/name/test_entity_table_column"
)

In [None]:
request(
    "DELETE",
    "/datamap/api/atlas/v2/types/typedef/name/test_entity_table"
)

In [None]:
request(
    "GET",
    "/datamap/api/atlas/v2/entity/guid/5635a6e0-3968-4065-95c6-862f516b2352"
).json()

In [None]:
"databricks_schema_tables"

request(
    "GET",
    "/datamap/api/atlas/v2/types/typedef/name/databricks_schema_tables"
).json()
