In [None]:
%pip install msal

### Authentication using MSAL

In [1]:
from msal import PublicClientApplication, SerializableTokenCache
import requests

bjss_tenant_id = "a64cb840-fecf-45ea-a7e2-a7526e51be02"

cache = SerializableTokenCache()

app = PublicClientApplication(
    client_id = "04b07795-8ddb-461a-bbee-02f9e1bf7b46",
    authority = f"https://login.microsoftonline.com/{bjss_tenant_id}",
    token_cache = cache
)

flow = app.initiate_device_flow(["73c2949e-da2d-457a-9607-fcc665198967/.default"])

print(flow) # Print to display code for authentication

token = app.acquire_token_by_device_flow(flow)

def get_auth_header(scope = None):
    return dict(
        Authorization="Bearer {token}".format(
            token=token["access_token"]
        )
    )


# REST api request wrapper
def request(
    method,
    api,
    body=None,
    json=None,
    api_version="api-version=2023-09-01"
):
    base = f"https://{bjss_tenant_id}-api.purview-service.microsoft.com"
    if "?" in api:
        return requests.request(method, f"{base}{api}&{api_version}", data=body, json=json, headers=get_auth_header())
    else:
        return requests.request(method, f"{base}{api}?{api_version}", data=body, json=json, headers=get_auth_header())

{'user_code': 'DAD8EEJZT', 'device_code': 'DAQABIQEAAADW6jl31mB3T7ugrWTT8pFedKgiRPisRb_0FjAm2yFN5xBPWDWm7ASVntjtEQM7s2de_q9tGPv0PFf3N8SaJ1-ofVAUudfpnJJQShEEHiH2FYPXCSDQwoC9avnIKIgYCV9rsNrr11neMQbflTolWElNGVC7cYrP6DoScbNc1oF_uDpPpP-rOQi18Ns5doaCac4gAA', 'verification_uri': 'https://microsoft.com/devicelogin', 'expires_in': 900, 'interval': 5, 'message': 'To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code DAD8EEJZT to authenticate.', 'expires_at': 1734623960.945635, '_correlation_id': '802a8bd7-77c7-4466-b21e-e825be948aa2'}


### Define custom schema type

In [49]:
import json

custom_schema_type = {
    "category": "ENTITY",
    "version": 1,
    "name": "test_entity_schema",
    "description": "Test schema",
    "typeVersion": "1.0",
    "options": {
        'purviewEntityExtDef': json.dumps({
            "isContainer":True,
            "friendlyName":"Test Schema",
            "search": {
                "searchBehavior":"StandardSearch",
                "standardSearch": {
                    "taxonomy": {
                        "source":"Attributes",
                        "attributes": ["name"],
                        "assetTaxonomy":["Schema"],
                        "customizeTaxonomyMapping": {
                            "Schema":"name"
                        }
                    },
                    "browse": {
                    "source":"QualifiedName",
                    "browseHierarchy": [
                        {
                            "entityTypeName":"test_entity_schema",
                            "isPath":False,
                            "isRequired":True,
                            "componentName":"Schema"
                        }
                    ]
                    }
                }
            }
        })
    },
    "attributeDefs": [
        {
            'name': 'comment',
            'typeName': 'string',
            'isOptional': True,
            'cardinality': 'SINGLE',
            'valuesMinCount': 0,
            'valuesMaxCount': 1,
            'isUnique': False,
            'isIndexable': False,
            'includeInNotification': False
        }
    ],
    "superTypes": ["Asset"],
    "subTypes": [],
    
}

### Define custom table type

In [50]:
import json

custom_table_type = {
    "category": "ENTITY",
    "version": 1,
    "name": "test_entity_table",
    "description": "Custom data source",
    "typeVersion": "1.0",
    "options": {
        "schemaElementsAttribute": "columns",
        'purviewEntityExtDef': json.dumps({
            "compact": {
                "isHostForCompact":True,
                "relationshipsToCompact":["test_entity_table_columns"]
            },
            "isContainer":False,
            "friendlyName":"Test Table",
            "search": {
                "searchBehavior":"StandardSearch",
                "standardSearch": {
                    "taxonomy": {
                        "source":"Attributes",
                        "attributes": ["schemaName"],
                        "assetTaxonomy": ["Schema"],
                        "customizeTaxonomyMapping": {
                            "Schema":"schemaName"
                        }
                    },
                    "browse": {
                        "source":"QualifiedName",
                        "browseHierarchy": [
                            {
                                "entityTypeName":"test_entity_schema",
                                "isPath":False,
                                "isRequired":True,
                                "componentName":"Schema"
                            }, {
                                "entityTypeName":"test_entity_table",
                                "isPath":False,
                                "isRequired":True,
                                "componentName":"Table"
                            }
                        ]
                    }
                }
            }
        })
    },
    "attributeDefs": [
        {
            'name': 'comment',
            'typeName': 'string',
            'isOptional': True,
            'cardinality': 'SINGLE',
            'valuesMinCount': 0,
            'valuesMaxCount': 1,
            'isUnique': False,
            'isIndexable': False,
            'includeInNotification': False
        }
   ],
    "superTypes": [
        "DataSet",
        "Purview_Table"
    ],
    "subTypes": [],
    "relationshipAttributeDefs": []
}

### Define custom column type

In [39]:
import json

custom_column_type = {
    "category": "ENTITY",
    "version": 1,
    "name": "test_entity_table_column",
    "description": "Custom column",
    "typeVersion": "1.0",
    "options": {
        'purviewEntityExtDef': json.dumps({
            "isContainer":False,
            "friendlyName":"Test Column",
            "search": {
                "searchBehavior":"RelevantSearch",
                "relevantSearch": {
                    "relevantRelationships": ["test_entity_table_columns"],
                    "relevantAttributes": ["dataType"]
                }
            }
        }),
        "schemaAttributes": "[\"dataType\"]"
    },
    "attributeDefs": [
        {
            'name': 'dataType',
            'typeName': 'string',
            'isOptional': True,
            'cardinality': 'SINGLE',
            'valuesMinCount': 0,
            'valuesMaxCount': 1,
            'isUnique': False,
            'isIndexable': False,
            'includeInNotification': False
        }, {
            'name': 'isNullable',
            'typeName': 'boolean',
            'isOptional': True,
            'cardinality': 'SINGLE',
            'valuesMinCount': 0,
            'valuesMaxCount': 1,
            'isUnique': False,
            'isIndexable': False,
            'includeInNotification': False
        }, {
            'name': 'comment',
            'typeName': 'string',
            'isOptional': True,
            'cardinality': 'SINGLE',
            'valuesMinCount': 0,
            'valuesMaxCount': 1,
            'isUnique': False,
            'isIndexable': False,
            'includeInNotification': False
        }
    ],
    "superTypes": ["DataSet"],
    "subTypes": []
}

### Define 1:N relationship between table and column

In [40]:
custom_relationship = {
    "category": "RELATIONSHIP",
    "version": 1,
    "name": "test_entity_table_columns",
    "description": "1:N test_entity_table to test_entity_table_column relationship",
    "typeVersion": "1.0",
    "attributeDefs": [],
    "relationshipCategory": "COMPOSITION",
    "propagateTags": "NONE",
    "endDef1": {
        "type": "test_entity_table",
        "name": "columns",
        "isContainer": True,
        "cardinality": "SET",
        "isLegacyAttribute": False
    },
    "endDef2": {
        "type": "test_entity_table_column",
        "name": "table",
        "isContainer": False,
        "cardinality": "SINGLE",
        "isLegacyAttribute": False
    }
}


custom_schema_relationship = {
    'category': 'RELATIONSHIP',
    'version': 1,
    'name': 'test_entity_schema_tables',
    'description': 'test_entity_schema_tables',
    'typeVersion': '1.0',
    'lastModifiedTS': '1',
    'attributeDefs': [],
    'relationshipCategory': 'COMPOSITION',
    'propagateTags': 'NONE',
    'endDef1': {
        'type': 'test_entity_schema',
        'name': 'tables',
        'isContainer': True,
        'cardinality': 'SET',
        'isLegacyAttribute': False
    },
    'endDef2': {
        'type': 'test_entity_table',
        'name': 'testSchema',
        'isContainer': False,
        'cardinality': 'SINGLE',
        'isLegacyAttribute': False
    }
  }

### Create entity typedefs (don't run if types already exist, instead run the update section)

In [61]:
response = request(
    "POST",
    "/datamap/api/atlas/v2/types/typedefs",
    json = {
        "entityDefs": [custom_table_type, custom_column_type, custom_schema_type],
        "relationshipDefs": [custom_relationship, custom_schema_relationship]
    }
)

response.json()

{'enumDefs': [],
 'structDefs': [],
 'classificationDefs': [],
 'entityDefs': [{'category': 'ENTITY',
   'guid': 'd0a98aba-f562-b7d5-52ca-d5ec5b737354',
   'createdBy': '2bf90d31-e183-4dc6-bd56-0aa3f1117d1a',
   'updatedBy': '2bf90d31-e183-4dc6-bd56-0aa3f1117d1a',
   'createTime': 1734626859311,
   'updateTime': 1734626859311,
   'version': 1,
   'name': 'test_entity_table',
   'description': 'Custom data source',
   'typeVersion': '1.0',
   'options': {'schemaElementsAttribute': 'columns',
    'purviewEntityExtDef': '{"compact": {"isHostForCompact": true, "relationshipsToCompact": ["test_entity_table_columns"]}, "isContainer": false, "friendlyName": "Test Table", "search": {"searchBehavior": "StandardSearch", "standardSearch": {"taxonomy": {"source": "Attributes", "attributes": ["schemaName"], "assetTaxonomy": ["Schema"], "customizeTaxonomyMapping": {"Schema": "schemaName"}}, "browse": {"source": "QualifiedName", "browseHierarchy": [{"entityTypeName": "test_entity_schema", "isPath": f

### Update

In [None]:
response = request(
    "PUT",
    "/datamap/api/atlas/v2/types/typedefs",
    json = {
        "entityDefs": [custom_table_type, custom_column_type],
        "relationshipDefs": [custom_relationship]
    }
)

### Create entity templates

In [68]:
def create_schema_template(name, desc, qualified_name, tables):
    return {
        "typeName": "test_entity_schema",
        "status": "ACTIVE",
        "version": 1,
        "attributes": {
            "name": name,
            "description": desc,
            "qualifiedName": qualified_name,
            "tables": tables,
        }
    }


def create_table_template(name, desc, qualified_name, schema_name = "none"):
    return {
        "typeName": "test_entity_table",
        "status": "ACTIVE",
        "version": 1,
        "attributes": {
            "name": name,
            "description": desc,
            "qualifiedName": qualified_name,
            "schemaName": schema_name
        }
    }


def create_table_template_with_cols(name, desc, qualified_name, cols, schema_name = "none"):
    return {
        "typeName": "test_entity_table",
        "status": "ACTIVE",
        "version": 1,
        "attributes": {
            "name": name,
            "description": desc,
            "qualifiedName": qualified_name,
            "columns": cols
        }
    }

def create_column_template(id, name, desc, qualified_name, data_type, nullable, comment):
    return {
        "typeName": "test_entity_table_column",
        "status": "ACTIVE",
        "version": 1,
        "attributes": {
            "name": name + f"{id}",
            "description": desc,
            "qualifiedName": qualified_name +f"{id}",
            "dataType": data_type,
            "isNullable": nullable,
            "comment": comment
        }
    }

def create_table_col_relationship(table_guid, column_guid):
    return {
        "typeName": "test_entity_table_columns",
        "end1": {
            "guid": table_guid,
            "typeName": "test_entity_table"
        },
        "end2": {
            "guid": column_guid,
            "typeName": "test_entity_table_column"
        }
    }

### Create table with some columns

In [124]:
table = [create_table_template("Test table", "This is a test table", "https:some/qualified/name")]
cols = [create_column_template(x, "Test Column ", "This is a test column", "https:some/table/column", "STRING", True, "Hi") for x in range(0, 5)]

entities_to_create = table + cols


guid_map = {
    "tables": [],
    "columns": [],
    "relationships": []
}

# Bulk create table and columns
for entity in entities_to_create:
    create_entity_res = request(
        "POST",
        "/datamap/api/atlas/v2/entity",
        json={
            "entity": entity
        }
    ).json()

    print(create_entity_res)

    for entity in create_entity_res["mutatedEntities"]["CREATE"]:
        if entity["typeName"] == "test_entity_table":
            guid_map["tables"] += [entity["guid"]]
        elif entity["typeName"] == "test_entity_table_column":
            guid_map["columns"] += [entity["guid"]]

print(guid_map)

{'mutatedEntities': {'CREATE': [{'typeName': 'test_entity_table', 'attributes': {'qualifiedName': 'https:some/qualified/name', 'name': 'Test table', 'description': 'This is a test table'}, 'lastModifiedTS': '1', 'guid': 'c2fdb3e8-dbc5-450f-b3b0-94b2e9c5eacd', 'status': 'ACTIVE', 'displayText': 'Test table', 'classificationNames': [], 'classifications': [], 'meaningNames': [], 'meanings': [], 'isIncomplete': False, 'labels': [], 'isIndexed': True}]}, 'guidAssignments': {'-475679062274': 'c2fdb3e8-dbc5-450f-b3b0-94b2e9c5eacd'}}
{'mutatedEntities': {'CREATE': [{'typeName': 'test_entity_table_column', 'attributes': {'qualifiedName': 'https:some/table/column0', 'name': 'Test Column 0', 'description': 'This is a test column'}, 'lastModifiedTS': '1', 'guid': 'cae3cd4a-b678-44d5-8280-817fe2a592e1', 'status': 'ACTIVE', 'displayText': 'Test Column 0', 'classificationNames': [], 'classifications': [], 'meaningNames': [], 'meanings': [], 'isIncomplete': False, 'labels': [], 'isIndexed': True}]}, '

In [141]:
table = create_table_template("Test table", "This is a test table", "https:some/qualified/name")
cols = [create_column_template(x, "Test Column ", "This is a test column", "https:some/table/column", "STRING", True, "Hi") for x in range(0, 2)]

guid_map = {
    "tables": [],
    "columns": [],
    "relationships": []
}

# create the columns
for col in cols:
    create_entity_res = request(
        "POST",
        "/datamap/api/atlas/v2/entity",
        json={
            "entity": col
        }
    ).json()
    guid_map["columns"].append(create_entity_res["mutatedEntities"]["CREATE"][0]["guid"])

for idx, col in enumerate(cols):
    col["guid"] = guid_map["columns"][idx]

# create table
create_entity_res = request(
    "POST",
    "/datamap/api/atlas/v2/entity",
    json={
        "entity": create_table_template_with_cols("Test table", "This is a test table", "https:some/qualified/name", cols)
    }
).json()


guid_map["tables"].append(create_entity_res["mutatedEntities"]["CREATE"][0]["guid"])


print(guid_map)

{'tables': ['ddea228b-4a18-4b3a-a166-5c4c83e7ec97'], 'columns': ['3459f507-2b78-45e2-a2a5-e834fe08b116', 'bc84fa00-b56b-4ffb-bb44-d4bf3104bfcb'], 'relationships': []}


In [69]:
table = create_table_template("Test Table", "This is a test table", "https:some/qualified/name", "test_schema")
cols = [create_column_template(x, "Test Column ", "This is a test column", "https:some/table/column", "STRING", True, "Hi") for x in range(0, 2)]
schema = create_schema_template("test_schema", "This is a test schema", "https:some/qualified/name", [table])

guid_map = {
    "tables": [],
    "columns": [],
    "relationships": []
}

# create the columns
for col in cols:
    create_entity_res = request(
        "POST",
        "/datamap/api/atlas/v2/entity",
        json={
            "entity": col
        }
    ).json()
    guid_map["columns"].append(create_entity_res["mutatedEntities"]["CREATE"][0]["guid"])

for idx, col in enumerate(cols):
    col["guid"] = guid_map["columns"][idx]

# create table
create_entity_res = request(
    "POST",
    "/datamap/api/atlas/v2/entity",
    json={
        "entity": create_table_template_with_cols("Test Table", "This is a test table", "https:some/qualified/name", cols)
    }
).json()


guid_map["tables"].append(create_entity_res["mutatedEntities"]["CREATE"][0]["guid"])


print(guid_map)

table["guid"] = create_entity_res["mutatedEntities"]["CREATE"][0]["guid"]
# create schema
create_entity_res = request(
    "POST",
    "/datamap/api/atlas/v2/entity",
    json={
        "entity": create_schema_template("Test Schema", "This is a test schema", "https:some/qualified/name2", [table])
    }
).json()

{'tables': ['8f0dd6a6-2b76-49aa-944f-dcae08e065a3'], 'columns': ['8167b33b-8415-42ca-ab42-2d3f46ffb19a', 'b4d4f99f-8eb1-423f-bd67-a626c5352000'], 'relationships': []}


In [64]:
print(create_entity_res)

{'mutatedEntities': {'CREATE': [{'typeName': 'test_entity_schema', 'attributes': {'qualifiedName': 'https:some/qualified/name2', 'name': 'Test Schema', 'description': 'This is a test schema'}, 'lastModifiedTS': '1', 'guid': '5635a6e0-3968-4065-95c6-862f516b2352', 'status': 'ACTIVE', 'displayText': 'Test Schema', 'classificationNames': [], 'classifications': [], 'meaningNames': [], 'meanings': [], 'isIncomplete': False, 'labels': [], 'isIndexed': True}]}, 'guidAssignments': {'-187345692944': '5635a6e0-3968-4065-95c6-862f516b2352'}}


In [24]:
create_relationships_res = request(
        "POST",
        "/datamap/api/atlas/v2/relationship",
        json={
        "typeName": "test_entity_schema_tables",
        "end1": {
            "guid": "f6399943-904f-4b7d-9860-93f363fdae18",
            "typeName": "test_entity_table"
        },
        "end2": {
            "guid": "1e823718-6d5b-4758-b5e9-dfbf6187b96a",
            "typeName": "test_entity_table_column"
        }
    }).json()

print(create_relationships_res)

{'requestId': '99fac26c-e1e8-4c0a-9100-abb06ef69cb4', 'errorCode': 'ATLAS-409-00-004', 'errorMessage': 'relationship test_entity_schema_tables already exists between entities 1e823718-6d5b-4758-b5e9-dfbf6187b96a and f6399943-904f-4b7d-9860-93f363fdae18'}


### Link the columns to the table

In [102]:

for column_guid in guid_map["columns"]:
    table_col_relationship = create_table_col_relationship(guid_map["tables"][0], column_guid)

    # Create relationship
    create_relationships_res = request(
        "POST",
        "/datamap/api/atlas/v2/relationship",
        json=table_col_relationship
    ).json()

    guid_map["relationships"] += [create_relationships_res["guid"]]


print(guid_map)

{'tables': ['73479018-c455-49f7-883d-69ee219f073d'], 'columns': ['0554a132-7af6-442f-ac3a-f031989ca1a8', '6e37644b-b889-451b-8bb1-e8009882fc3f', 'b9e1cdb5-bd9b-4c4e-9d89-440f9baf165f', 'dadb22bc-4900-4598-a988-a20232805c7d', 'ce4cb4f1-6480-4b1e-9267-e2f21578283d'], 'relationships': ['afb4c1e9-cfde-4d85-9e51-2245eba35cc5', '82632511-e577-49d3-b435-6ab4f90e46a7', 'e385901a-dcfe-473b-9c06-3e1ca8c406d6', 'c537ff82-0109-45d0-a2ee-607d3235bf74', '5a6320a2-07aa-44d7-ab56-3401a7fbda63']}


### Move assets to collection

In [103]:
move_table_response = request(
    "POST",
    "/datamap/api/entity/moveTo?collectionId=ql7jd6",
    json={
        "entityGuids": guid_map["tables"] + guid_map["columns"]
    }
)

### Cleanup table

In [19]:
table_guid = guid_map["tables"][0]
request(
    "DELETE",
    f"/datamap/api/atlas/v2/entity/guid/{table_guid}"
)

<Response [200]>

### Delete type definitions

In [None]:
request(
    "DELETE",
    "/datamap/api/atlas/v2/types/typedef/name/test_entity_table_columns"
)


<Response [204]>

In [52]:
request(
    "DELETE",
    "/datamap/api/atlas/v2/types/typedef/name/test_entity_table_column"
)

<Response [204]>

In [60]:
request(
    "DELETE",
    "/datamap/api/atlas/v2/types/typedef/name/test_entity_table"
)

<Response [204]>

In [66]:
request(
    "GET",
    "/datamap/api/atlas/v2/entity/guid/5635a6e0-3968-4065-95c6-862f516b2352"
).json()

{'referredEntities': {'dd1a023d-ddc5-48a2-968c-ad8e6c4601dc': {'typeName': 'test_entity_table_column',
   'attributes': {'owner': None,
    'replicatedTo': None,
    'userDescription': None,
    'replicatedFrom': None,
    'qualifiedName': 'https:some/table/column1',
    'displayName': None,
    'dataType': 'STRING',
    'isNullable': True,
    'name': 'Test Column 1',
    'description': 'This is a test column',
    'comment': 'Hi'},
   'lastModifiedTS': '1',
   'guid': 'dd1a023d-ddc5-48a2-968c-ad8e6c4601dc',
   'isIncomplete': False,
   'provenanceType': 0,
   'status': 'ACTIVE',
   'createdBy': '2bf90d31-e183-4dc6-bd56-0aa3f1117d1a',
   'updatedBy': '2bf90d31-e183-4dc6-bd56-0aa3f1117d1a',
   'createTime': 1734626871777,
   'updateTime': 1734626871777,
   'version': 1,
   'isIndexed': True,
   'relationshipAttributes': {'inputToProcesses': [],
    'schema': [],
    'sources': [],
    'sinks': [],
    'attachedSchema': [],
    'meanings': [],
    'table': {'guid': '44e01100-01f9-43fc-a

In [147]:
"databricks_schema_tables"

request(
    "GET",
    "/datamap/api/atlas/v2/types/typedef/name/databricks_schema_tables"
).json()


{'category': 'RELATIONSHIP',
 'guid': 'ee25cbe9-486b-d9c5-071e-9ec9de190f6b',
 'createdBy': 'admin',
 'updatedBy': 'admin',
 'createTime': 1733374762879,
 'updateTime': 1733374762879,
 'version': 1,
 'name': 'databricks_schema_tables',
 'description': 'databricks_schema_tables',
 'typeVersion': '1.0',
 'serviceType': 'Databricks UC',
 'lastModifiedTS': '1',
 'attributeDefs': [],
 'relationshipCategory': 'COMPOSITION',
 'propagateTags': 'NONE',
 'endDef1': {'type': 'databricks_schema',
  'name': 'tables',
  'isContainer': True,
  'cardinality': 'SET',
  'isLegacyAttribute': False},
 'endDef2': {'type': 'databricks_table',
  'name': 'dbSchema',
  'isContainer': False,
  'cardinality': 'SINGLE',
  'isLegacyAttribute': False}}