In [45]:
import requests
from requests.auth import HTTPBasicAuth
import ipywidgets as widgets
import pandas as pd

In [46]:
# change
def x(l, k, v): l[k] = v

In [47]:
# selection widget
def w(w, o, n): v = w(options=sorted(o), description=n, layout=widgets.Layout(width='40%')); display(v); return v

In [48]:
# is custom attribute being used by dictionary category
def isCustomAttributeValid(attribute, category):
    try:
        return len([binding for binding in attribute.get("rep").get("glossaryBindings") if binding.get("category") == category])>0
        
    except Exception as e:
        return False

In [49]:
# get all custom attributes defined in the dictionary 
def getattributes(signavio):
    url = f"{signavio.get('host')}/p/meta"

    headers = {"x-signavio-id": signavio.get("authToken"), "accept":"application/json"}

    cookies = {"JSESSIONID": signavio.get("jsessionId"), "LBROUTEID": signavio.get("lbrouteId")}

    request = requests.get(url, headers=headers, cookies=cookies)

    return request.json()

In [50]:
# get all dictionary categories found in the dictionary
def getcategories(signavio):
    url = f"{signavio.get('host')}/p/glossarycategory?allCategories=true"

    headers = {"x-signavio-id": signavio.get("authToken"), "accept":"application/json"}

    cookies = {"JSESSIONID": signavio.get("jsessionId"), "LBROUTEID": signavio.get("lbrouteId")}

    request = requests.get(url, headers=headers, cookies=cookies)

    return request.json()

In [51]:
# add a dictionary entry with a given name and category
def addEntry(signavio, category, uuidAttribute, typeAttribute, hrefAttribute, collibra, asset):
    url = f"{signavio.get('host')}/p/glossary"

    headers = {"x-signavio-id": signavio.get("authToken"), "accept":"application/json", 'content-type': 'application/x-www-form-urlencoded'}

    cookies = {"JSESSIONID": signavio.get("jsessionId"), "LBROUTEID": signavio.get("lbrouteId")}

    metaDataValues= f"{{\"{uuidAttribute.get('rep').get('id')}\":\"{asset.get('id')}\", \"{typeAttribute.get('rep').get('id')}\":\"{asset.get('assetType')[0].get('assetTypeId')}\", \"{hrefAttribute.get('rep').get('id')}\":{{\"label\":\"\", \"url\":\"{collibra.get('host')}/asset/{asset.get('id')}\"}}}}"
    
    payload = f"title={asset.get('name')}&category={category.get('href').split('/')[-1]}&description={asset.get('description')[0].get('descriptionAttributeValue') if 'description' in asset else ''}&metaDataValues={metaDataValues}"
    
    request = requests.post(url, headers=headers, cookies=cookies, data=payload)

    return request.json()

In [52]:
# update a dictionary entry with a given name and category
def updateEntry(signavio, category, uuidAttribute, typeAttribute, hrefAttribute, collibra, asset):
    url = f"{signavio.get('host')}/p/glossary/{asset.get('href')[0].get('hrefAttributeValue').split('/')[-1]}/info"

    headers = {"x-signavio-id": signavio.get("authToken"), "accept":"application/json", 'content-type': 'application/x-www-form-urlencoded'}

    cookies = {"JSESSIONID": signavio.get("jsessionId"), "LBROUTEID": signavio.get("lbrouteId")}

    metaDataValues= f"{{\"{uuidAttribute.get('rep').get('id')}\":\"{asset.get('id')}\", \"{typeAttribute.get('rep').get('id')}\":\"{asset.get('assetType')[0].get('assetTypeId')}\", \"{hrefAttribute.get('rep').get('id')}\":{{\"label\":\"\", \"url\":\"{collibra.get('host')}/asset/{asset.get('id')}\"}}}}"

    payload = f"title={asset.get('name')}&category={category.get('href').split('/')[-1]}&description={asset.get('description')[0].get('descriptionAttributeValue') if 'description' in asset else ''}&metaDataValues={metaDataValues}"
    
    request = requests.put(url, headers=headers, cookies=cookies, data=payload)

    return request.json()

In [53]:
# get assets of type and status from a given community
def getAssets(collibra, communities, assetTypes, statuses, hrefAttributeType):
    viewConfig = {
        "ViewConfig": {
            "maxCountLimit": "-1",
            "Resources": {
                "Asset": {
                    "name": "Assets",
                    "Signifier": {
                        "name": "name"
                    },
                    "Id": {
                        "name": "id"
                    },
                    "AssetType": {
                        "name": "assetType",
                        "Id": {
                            "name": "assetTypeId"
                        }
                    },
                    "Status": {
                        "name": "assetStatus",
                        "Id": {
                            "name": "assetStatusId"
                        }
                    },
                    "Domain": {
                        "name": "assetDomain",
                        "Id": {
                            "name": "assetDomainId"
                        },
                        "Community": {
                            "name": "assetCommunity",
                            "Id": {
                                "name": "assetCommunityId"
                            }
                        }
                    },
                    "StringAttribute": [
                        {
                            "name": "description",
                            "labelId": "00000000-0000-0000-0000-000000003114",
                            "Id": {
                                "name": "descriptionAttributeId"
                            },
                            "LongExpression": {
                                "name": "descriptionAttributeValue"
                            }
                        },
                        {
                            "name": "href",
                            "labelId": hrefAttributeType,
                            "Id": {
                                "name": "hrefAttributeId"
                            },
                            "LongExpression": {
                                "name": "hrefAttributeValue"
                            }
                        }  
                    ],
                    "Filter": {
                        "AND": [
                            {
                                "Field": {
                                    "name": "assetCommunityId",
                                    "operator": "IN", 
                                    "values": [community.get("id") for community in communities],
                                    "descendants": "true"
                                }                                
                            },
                            {
                                "Field": {
                                    "name": "assetTypeId",
                                    "operator": "IN", 
                                    "values": [assetType.get("id") for assetType in assetTypes]
                                }                                
                            },
                            {
                                "Field": {
                                    "name": "assetStatusId",
                                    "operator": "IN",
                                    "value": [status.get("id") for status in statuses]
                                }
                            }
                        ]
                    }              
                }
            }
        }
    }
 
    response = collibra.get("session").post(f"{collibra.get('endpoint')}/outputModule/export/json?validationEnabled=false", json=viewConfig)

    return response.json().get("view").get("Assets")

In [54]:
# credentials
collibra = {"host": "https://print.collibra.com", "username": "[username]", "password": "[password]"}

collibra["endpoint"] = f"{collibra['host']}/rest/2.0"

In [55]:
# connect to collibra 
collibra["session"] = requests.Session()

collibra.get("session").auth = HTTPBasicAuth(collibra.get("username"), collibra.get("password"))

In [56]:
# get collibra asset types
assetTypes = {}

response = collibra.get("session").get(f"{collibra.get('endpoint')}/assetTypes")

_=[x(assetTypes, assetType.get("name"), assetType) for assetType in response.json()["results"]] 

In [57]:
# get collibra attribute types
attributeTypes = {}

response = collibra.get("session").get(f"{collibra.get('endpoint')}/attributeTypes")

_=[x(attributeTypes, attributeType.get("name"), attributeType) for attributeType in response.json()["results"]]

In [58]:
# get collibra statuses types
statuses = {}

response = collibra.get("session").get(f"{collibra.get('endpoint')}/statuses")

_=[x(statuses, status.get("name"), status) for status in response.json()["results"]]

In [59]:
# get collibra communities
communities = {}

response = collibra.get("session").get(f"{collibra.get('endpoint')}/communities")

_=[x(communities, community.get("name"), community) for community in response.json()["results"]]

In [60]:
# choose collibra communities to query
widget = w(widgets.SelectMultiple, [f"{k}" for k,v in communities.items()], 'Communities')

SelectMultiple(description='Communities', layout=Layout(width='40%'), options=('Airflow', 'Amazon', 'Asia', 'A…

In [61]:
communitiesToQuery = [communities.get(community) for community in widget.value]

In [62]:
# choose the collibra asset types to list
widget = w(widgets.SelectMultiple, [f"{k}" for k,v in assetTypes.items()], 'Asset Types')

SelectMultiple(description='Asset Types', layout=Layout(width='40%'), options=('ADLS Container', 'ADLS File Sy…

In [63]:
assetTypesToList = [assetTypes.get(assetType) for assetType in widget.value]

In [64]:
# choose the collibra status types to include
widget = w(widgets.SelectMultiple, [f"{k}" for k,v in statuses.items()], 'Status Types')

SelectMultiple(description='Status Types', layout=Layout(width='40%'), options=('Accepted', 'Access Granted', …

In [65]:
statusesToGet = [statuses.get(statusType) for statusType in widget.value]

In [66]:
# choose the collibra attribute holding href
widget = w(widgets.Dropdown, [f"{k}" for k,v in attributeTypes.items()], 'Attribute Type')

Dropdown(description='Attribute Type', layout=Layout(width='40%'), options=('1st Decile', '1st Percentile', '1…

In [67]:
attributeTypeToSet = attributeTypes.get(widget.value)

In [68]:
# credentials
signavio = {"host": "https://editor.signavio.com", "tenant": "93ab506a8d87439f9fbb680fdbc95d4b", "username": "antonio.castelo@collibra.com", "password": "[password]"}

In [69]:
# connect to signavio 
url = f"{signavio.get('host')}/p/login"

data = {"name": signavio.get("username"), "password": signavio.get("password"), "tenant": signavio.get("tenant"), "tokenonly": "true"}

request = requests.post(url, data)

authToken = request.content.decode("utf-8")

jsessionId = request.cookies.get("JSESSIONID")

lbrouteId = request.cookies.get("LBROUTEID")

signavio = {"host": signavio.get("host"), "tenant": signavio.get("tenant"), "authToken": authToken, "jsessionId": jsessionId, "lbrouteId": lbrouteId}

In [70]:
# get all signavio dictionary categories
categories = {}

_=[x(categories, category.get("rep").get("name"), category) for category in getcategories(signavio) if category.get("rel") == "cat"]

In [71]:
# choose the signavio dictionary category to map
widget = w(widgets.Dropdown, [f"{k}" for k,v in categories.items()], 'Category')

Dropdown(description='Category', layout=Layout(width='40%'), options=('Activities', 'Controls', 'Data Concepts…

In [72]:
categoryToUpdate = categories.get(widget.value)

In [32]:
# get the custom attributes used by the signavio category 
attributes = {}

_=[x(attributes, attribute.get("rep").get("name"), attribute) for attribute in getattributes(signavio) if isCustomAttributeValid(attribute, categoryToUpdate.get("href").split("/")[-1])]

In [33]:
# choose the custom attribute which will hold the collibra asset id
widget = w(widgets.Dropdown, [f"{k}" for k,v in attributes.items()], 'Asset Uuid')

Dropdown(description='Asset Uuid', layout=Layout(width='40%'), options=('href', 'id', 'typeId'), value='href')

In [34]:
uuidAttributeToSet = attributes.get(widget.value)

In [35]:
# choose the custom attribute which will hold the collibra asset type id
widget = w(widgets.Dropdown, [f"{k}" for k,v in attributes.items()], 'Type Uuid')

Dropdown(description='Type Uuid', layout=Layout(width='40%'), options=('href', 'id', 'typeId'), value='href')

In [36]:
typeAttributeToSet = attributes.get(widget.value)

In [38]:
# choose the custom attribute which will hold the collibra asset url ref
widget = w(widgets.Dropdown, [f"{k}" for k,v in attributes.items()], 'Asset href')

Dropdown(description='Asset href', layout=Layout(width='40%'), options=('href', 'id', 'typeId'), value='href')

In [39]:
hrefAttributeToSet = attributes.get(widget.value)

In [40]:
# get all collibra assets of a given list of asset types and statuses found within a list of communities
assetsToCreate = []

assetsToUpdate = []

_=[assetsToUpdate.append(asset) if "href" in asset else assetsToCreate.append(asset) for asset in getAssets(collibra, communitiesToQuery, assetTypesToList, statusesToGet, attributeTypeToSet.get("id"))]

In [41]:
# create dictionary items when no href attribute set is found, list dictionary items which fail to create
entriesCreated = [addEntry(signavio, categoryToUpdate, uuidAttributeToSet, typeAttributeToSet, hrefAttributeToSet, collibra, asset) for asset in assetsToCreate]

entriesThatFailed = [entry for entry in entriesCreated if 'errors' in entry]

df = pd.DataFrame([[entry.get("categoryNames"), entry.get("title"), entry.get("message")] for entry in entriesThatFailed], columns =['category', 'title', 'error'])

if not df.empty: display(df)

In [42]:
# update collibra, when the href attribute set is found, build payload with all href attributes to add
def p(entry, uuidAttribute, attributeType): 
    return {"assetId": entry.get("rep").get("metaDataValues").get(uuidAttribute), "typeId": attributeType, "value": f"dictionary/entry/{entry.get('rep').get('id')}"}

payload = [p(entry, uuidAttributeToSet.get("rep").get("id"), attributeTypeToSet.get("id")) for entry in entriesCreated if "rep" in entry]

response = collibra.get("session").post(f"{collibra.get('endpoint')}/attributes/bulk", json=payload)

In [43]:
# update dictionary items when href attribute set is found, list dictionary items which fail to update
entriesUpdated = [updateEntry(signavio, categoryToUpdate, uuidAttributeToSet, typeAttributeToSet, hrefAttributeToSet, collibra, asset) for asset in assetsToUpdate]
                            
entriesThatFailed = [entry for entry in entriesUpdated if 'errors' in entry]

df = pd.DataFrame([[entry.get("categoryNames"), entry.get("title"), entry.get("message")] for entry in entriesThatFailed], columns =['category', 'title', 'error'])

if not df.empty: display(df)

In [15]:
# def getDictionaryItems(signavio):
#     try:                  
#         url = f"{signavio.get('host')}/p/glossary?q=*&category=19e32344735c43978b3e510c211fe069"

#         headers = {"x-signavio-id": signavio.get("authToken"), "accept":"application/json"}

#         cookies = {"JSESSIONID": signavio.get("jsessionId"), "LBROUTEID": signavio.get("lbrouteId")}

#         request = requests.get(url, headers=headers, cookies=cookies)

#         return request.json()
        
#     except Exception as e:
#         return e
    

# def deleteDictionaryEntry(signavio, item):
#     try:         
#         url = f"{signavio.get('host')}/p{item}"

#         headers = {"x-signavio-id": signavio.get("authToken"), "accept":"application/json", 'content-type': 'application/x-www-form-urlencoded'}

#         cookies = {"JSESSIONID": signavio.get("jsessionId"), "LBROUTEID": signavio.get("lbrouteId")}

#         request = requests.delete(url, headers=headers, cookies=cookies)

#         return request.json()
        
#     except Exception as e:
#         return e    



# items = [deleteDictionaryEntry(signavio, i.get("href")) for i in getDictionaryItems(signavio) if i.get("rel")=="gitem"]