# Define Business Vocabulary


This notebook implements the section *"Define Business Vocabulary"* of the [Level 4 PoX - Knowledge Catalog Tutorial](https://cp4d-outcomes.techzone.ibm.com/l4-pox/knowledge-catalog) and uses the [Watson Data API](https://cloud.ibm.com/apidocs/watson-data-api)

## Authorisation - mandatory customization

Before executing the next cell, observe the variables `LOCAL_DIR_PREFIX` and `FILE_CREDENTIALS` and adjust them to your environment. If they are set wrong, the csv files containing the artifact definitions or your credentials in a file (in case you want to use one) will not be found.

In [None]:
import json
import requests # type: ignore
import time

LOCAL_DIR_PREFIX = ""

# uncomment the next line if you cloned the repository and the notebook will run locally on your laptop
LOCAL_DIR_PREFIX = "../../"

# uncomment the next line if hardcoding the credentials on the code, global class variables
FILE_CREDENTIALS = ""

# uncomment the next two lines if using the file ikcapikey.json for storing credentials
# FILE_CREDENTIALS = "python/ikcapikey.json"
# FILE_CREDENTIALS = LOCAL_DIR_PREFIX + FILE_CREDENTIALS

class credentials :

    file_credentials = ""
    url_server = "https://cpd-cpd.apps.6645c6d6ca5b92001e29286f.cloud.techzone.ibm.com"
    username = "admin"
    apikey = "SfpLD0yMQFh4xpdOrgPuTK9AdBtEVEqF1gK2HSlw"
    access_token = ""

    def __init__(self, file_credentials):

        if file_credentials != "" :
            try :
                with open(file_credentials) as f :
                    data = json.load(f)
                    self.url_server = data["url_server"]
                    self.username = data["username"]
                    self.apikey = data["api_key"]
                    self.file_credentials = file_credentials
            except :
                print("Error with the file ", file_credentials)
    
   
    def urlRequest(self, urlSuffix):
        return self.url_server + urlSuffix

    def get_bearer_token(self):
        
        # Get a bearer token with the API key - Cloud Pak for Data SaaS
        # url = "https://iam.cloud.ibm.com/identity/token"
        # headers = {"Content-Type" : "application/x-www-form-urlencoded"}
        # data = "grant_type=urn:ibm:params:oauth:grant-type:apikey&apikey=" + apikey
        # r = requests.post(url, headers=headers, data=data)
        # access_token = r.json()["access_token"]

        # Get a bearer token with the API key - Cloud Pak for Data Software
        urlSuffix = "/icp4d-api/v1/authorize"
        headers = {'Accept': 'application/json', 'Content-type': 'application/json'}
        data = {"username" : self.username, "api_key" : self.apikey}
        r = requests.post(self.urlRequest(urlSuffix), headers=headers, data=json.dumps(data))

        if r.status_code != 200:
            print("Error with the request. Code: ", r.status_code)
            print(r.text)
        else :
            try:
                self.access_token = r.json()["token"]
            except KeyError:
                print("Error with the token. Code: ", r.status_code)
                print("Hint: check the credentials file ", self.file_credentials)
                print(r.text)
                
            return self.access_token

myconn = credentials(FILE_CREDENTIALS)
access_token = myconn.get_bearer_token()


## Define the Business Vocabulary


Now, you can follow one by one the tasks as indicated in the PoX instructions.

### 1. Create Categories
   

In [None]:
print("---- Import Categories from CSV----")

IMPORT_CSV_FILE = "artifacts/governance-categories.csv"
IMPORT_CSV_FILE = LOCAL_DIR_PREFIX + IMPORT_CSV_FILE

urlSuffix='/v3/governance_artifact_types/category/import?merge_option=all'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
files = {'file': (IMPORT_CSV_FILE, open(IMPORT_CSV_FILE, 'rb'), 'text/csv')}

r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, files=files)

if r.status_code == 200 :
    status = r.json()["status"]
    print("Import finished. Status = ", status)
    print(r.text)
elif r.status_code == 202 :
    process_id = r.json()["process_id"]
    print(f"----- Import process started: {process_id} ----- ")
    print("----- Entering wait loop ------")
    urlSuffix='/v3/governance_artifact_types/import/status/' + process_id
    headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
    while True :
        r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)
        if r.status_code != 200 :
            print("Error with the request. Code: ", r.status_code)
            print(r.text)
        status = r.json()["status"]
        if status != "IN_PROGRESS" :
            break
        else :
            print ("Import in progess, please wait")
            time.sleep(5)
else :
    print("Error with the request. Code: ", r.status_code)
    print(r.text)


### 2. Update Classifications

#### 2.a. Change the definitions 

In [None]:
print("---- Update Classifications from CSV----")

IMPORT_CSV_FILE = "artifacts/governance-classifications.csv" 
IMPORT_CSV_FILE = LOCAL_DIR_PREFIX + IMPORT_CSV_FILE

urlSuffix='/v3/governance_artifact_types/classification/import?merge_option=specified'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
files = {'file': (IMPORT_CSV_FILE, open(IMPORT_CSV_FILE, 'rb'), 'text/csv')}

r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, files=files)

if r.status_code == 200 :
    status = r.json()["status"]
    print("Import finished. Status = ", status)
    print(r.text)

elif r.status_code == 202 :
    process_id = r.json()["process_id"]
    print(f"----- Import process started: {process_id} ----- ")
    print("----- Entering wait loop ------")
    urlSuffix='/v3/governance_artifact_types/import/status/' + process_id
    headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}

    while True :
        r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)
        if r.status_code != 200 :
            print("Error with the request. Code: ", r.status_code)
            print(r.text)
            break
        status = r.json()["status"]
        if status != "IN_PROGRESS" :
            break
        else :
            print ("Import in progess, please wait")
            time.sleep(5)
else :
    print("Error with the request. Code: ", r.status_code)
    print(r.text)

workflow_id = r.json()["workflow_id"]


#### 2.b. Publish the definitions

Before executing this cell, you may want to check the "Task Inbox" in CloudPak for Data if you are not sure about what will be published

In [None]:
urlSuffix='/v3/workflows/' + workflow_id + '?include_user_tasks=true'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)

user_tasks = r.json()["entity"]["user_tasks"]
for i in user_tasks :
    if i["metadata"]["workflow_id"] == workflow_id :
        task_id = i["metadata"]["task_id"]

urlSuffix='/v3/workflow_user_tasks/' + task_id + '/actions'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
payload = {'action': 'complete', 'form_properties': [{'id': 'action', 'value': '#publish'}]}
r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, json=payload)

if r.status_code == 202 or r.status_code == 204 :
    print("Publish Successful, Code = ", r.status_code)
else :
    print("Error in publishing artifacts, Code = ", r.status_code)
    print(r.text)

### 3. Create Data Classes

#### 3.a. Add new Data Classes

In [None]:
print("---- Create Data Classes from CSV----")

IMPORT_CSV_FILE = "artifacts/governance-data-classes.csv" 
IMPORT_CSV_FILE = LOCAL_DIR_PREFIX + IMPORT_CSV_FILE

urlSuffix='/v3/governance_artifact_types/data_class/import?merge_option=all'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
files = {'file': (IMPORT_CSV_FILE, open(IMPORT_CSV_FILE, 'rb'), 'text/csv')}

r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, files=files)

if r.status_code == 200 :
    status = r.json()["status"]
    print("Import finished. Status = ", status)
    print(r.text)

elif r.status_code == 202 :
    process_id = r.json()["process_id"]
    print(f"----- Import process started: {process_id} ----- ")
    print("----- Entering wait loop ------")
    urlSuffix='/v3/governance_artifact_types/import/status/' + process_id
    headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}

    while True :
        r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)
        if r.status_code != 200 :
            print("Error with the request. Code: ", r.status_code)
            print(r.text)
            break
        status = r.json()["status"]
        if status != "IN_PROGRESS" :
            break
        else :
            print ("Import in progess, please wait")
            time.sleep(5)
else :
    print("Error with the request. Code: ", r.status_code)
    print(r.text)

workflow_id = r.json()["workflow_id"]

#### 3.b. Publish the changes

In [None]:
urlSuffix='/v3/workflows/' + workflow_id + '?include_user_tasks=true'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)

user_tasks = r.json()["entity"]["user_tasks"]
for i in user_tasks :
    if i["metadata"]["workflow_id"] == workflow_id :
        task_id = i["metadata"]["task_id"]

urlSuffix='/v3/workflow_user_tasks/' + task_id + '/actions'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
payload = {'action': 'complete', 'form_properties': [{'id': 'action', 'value': '#publish'}]}
r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, json=payload)

if r.status_code == 202 or r.status_code == 204 :
    print("Publish Successful, Code = ", r.status_code)
else :
    print("Error in publishing artifacts, Code = ", r.status_code)
    print(r.text)


### 4. Create Business Terms

#### 4.a. Add new Business Terms

In [None]:
print("---- Create Business Terms from CSV----")

IMPORT_CSV_FILE = "artifacts/governance-business-terms.csv"
IMPORT_CSV_FILE = LOCAL_DIR_PREFIX + IMPORT_CSV_FILE

urlSuffix='/v3/governance_artifact_types/glossary_term/import?merge_option=all'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
files = {'file': (IMPORT_CSV_FILE, open(IMPORT_CSV_FILE, 'rb'), 'text/csv')}

r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, files=files)

if r.status_code == 200 :
    status = r.json()["status"]
    print("Import finished. Status = ", status)

elif r.status_code == 202 :
    process_id = r.json()["process_id"]
    print(f"----- Import process started: {process_id} ----- ")
    print("----- Entering wait loop ------")
    urlSuffix='/v3/governance_artifact_types/import/status/' + process_id
    headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}

    while True :
        r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)
        if r.status_code != 200 :
            print("Error with the request. Code: ", r.status_code)
            print(r.text)
            break
        status = r.json()["status"]
        if status != "IN_PROGRESS" :
            break
        else :
            print ("Import in progess, please wait")
            time.sleep(5)
else :
    print("Error with the request. Code: ", r.status_code)
    print(r.text)

workflow_id = r.json()["workflow_id"]

#### 4.b. Publish the changes


In [None]:
urlSuffix='/v3/workflows/' + workflow_id + '?include_user_tasks=true'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)

user_tasks = r.json()["entity"]["user_tasks"]
for i in user_tasks :
    if i["metadata"]["workflow_id"] == workflow_id :
        task_id = i["metadata"]["task_id"]

urlSuffix='/v3/workflow_user_tasks/' + task_id + '/actions'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
payload = {'action': 'complete', 'form_properties': [{'id': 'action', 'value': '#publish'}]}
r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, json=payload)

if r.status_code == 202 or r.status_code == 204 :
    print("Publish Successful, Code = ", r.status_code)
else :
    print("Error in publishing artifacts, Code = ", r.status_code)
    print(r.text)


### 5. Create Reference Data

#### 5.a Add new Reference Data

In [None]:
print("---- Create Reference Data from CSV----")

IMPORT_CSV_FILE = "artifacts/governance-reference-data.csv"
IMPORT_CSV_FILE = LOCAL_DIR_PREFIX + IMPORT_CSV_FILE

urlSuffix='/v3/governance_artifact_types/reference_data/import?merge_option=all'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
files = {'file': (IMPORT_CSV_FILE, open(IMPORT_CSV_FILE, 'rb'), 'text/csv')}

r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, files=files)

if r.status_code == 200 :
    status = r.json()["status"]
    print("Import finished. Status = ", status)

elif r.status_code == 202 :
    process_id = r.json()["process_id"]
    print(f"----- Import process started: {process_id} ----- ")
    print("----- Entering wait loop ------")
    urlSuffix='/v3/governance_artifact_types/import/status/' + process_id
    headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}

    while True :
        r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)
        if r.status_code != 200 :
            print("Error with the request. Code: ", r.status_code)
            print(r.text)
            break
        status = r.json()["status"]
        if status != "IN_PROGRESS" :
            break
        else :
            print ("Import in progess, please wait")
            time.sleep(5)
else :
    print("Error with the request. Code: ", r.status_code)
    print(r.text)

workflow_id = r.json()["workflow_id"]

#### 5.b Publish the changes


In [None]:
urlSuffix='/v3/workflows/' + workflow_id + '?include_user_tasks=true'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)

user_tasks = r.json()["entity"]["user_tasks"]
for i in user_tasks :
    if i["metadata"]["workflow_id"] == workflow_id :
        task_id = i["metadata"]["task_id"]

urlSuffix='/v3/workflow_user_tasks/' + task_id + '/actions'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
payload = {'action': 'complete', 'form_properties': [{'id': 'action', 'value': '#publish'}]}
r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, json=payload)

if r.status_code == 202 or r.status_code == 204 :
    print("Publish Successful, Code = ", r.status_code)
else :
    print("Error in publishing artifacts, Code = ", r.status_code)
    print(r.text)

### 6. Load Department Lookup Data


#### 6.a Add the lookup data

In [None]:
print("---- Load Department Lookup Data from CSV----")

IMPORT_CSV_FILE = "artifacts/governance-reference-department.csv"
IMPORT_CSV_FILE = LOCAL_DIR_PREFIX + IMPORT_CSV_FILE

artifact_id = None
version_id = None
urlSuffix='/v3/governance_artifact_types/reference_data?workflow_status=published&limit=200'
headers = {"accept": "application/json" ,"Authorization" : "Bearer " + access_token}

r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)

if r.status_code == 200 :
    for i in r.json()["resources"] :
        if i["name"] == "Department Lookup" :
            artifact_id = i["artifact_id"]
            version_id = i["version_id"]
            print("artifact_id = ", artifact_id, " version_id = " , version_id)
            break
else :
    print("Error in retrieving reference data artifacts, Code = ", r.status_code)
    print(r.text)

if artifact_id is None or version_id is None:
    print("Department Lookup not found")
else :    
    urlSuffix='/v4/reference_data_sets/' + artifact_id + '/versions/' + version_id + '/value_imports'
    headers = {"Authorization" : "Bearer " + access_token }
    import_parameters = {
        "artifact_id_mode": False,
        "code": "DEPARTMENT_CODE",
        "first_row_header": True,
        "import_relationships_only": False,
        "skip_workflow_if_possible": False, 
        "trim_white_spaces": True,
        "value": "DEPARTMENT_EN",
        "value_conflicts": "OVERWRITE" 
    }
    files={
        'import_csv_file'   : ('import_csv_file', open(IMPORT_CSV_FILE,'rb') ),
        'import_parameters' : (None, str(import_parameters))   
    }
    
    r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, files=files)

    if r.status_code == 202 :
        import_id = r.json()["import_info"]["import_id"]
        print(f"----- Import process started: {import_id} ----- ")
        print("----- Entering wait loop ------")
        urlSuffix='/v4/reference_data_sets/' + artifact_id + '/versions/' + version_id + '/value_imports/' + import_id
        headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}

        workflow_id = r.json()["workflow_id"]

        while True :
            r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)
            if r.status_code != 200 :
                print("Error with the request. Code: ", r.status_code)
                print(r.text)
                break
            status = r.json()["import_info"]["import_state"]
            if status != "IN_PROGRESS" :
                break
            else :
                print ("Import in progess, please wait")
                time.sleep(5)
        print("Import finished. Status = ", r.status_code)
    else :
        print("Error with the request. Code: ", r.status_code)
        print(r.text)

#### 6.b. Publish the changes

In [None]:
urlSuffix='/v3/workflows/' + workflow_id + '?include_user_tasks=true'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)

user_tasks = r.json()["entity"]["user_tasks"]
for i in user_tasks :
    if i["metadata"]["workflow_id"] == workflow_id :
        task_id = i["metadata"]["task_id"]

urlSuffix='/v3/workflow_user_tasks/' + task_id + '/actions'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
payload = {'action': 'complete', 'form_properties': [{'id': 'action', 'value': '#publish'}]}
r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, json=payload)

if r.status_code == 202 or r.status_code == 204 :
    print("Publish Successful, Code = ", r.status_code)
else :
    print("Error in publishing artifacts, Code = ", r.status_code)
    print(r.text)

### 7. Load Position Lookup Data

#### 7.a. Add the new data

In [None]:

print("---- Load Position Lookup Data from CSV----")

IMPORT_CSV_FILE = "artifacts/governance-reference-position.csv"
IMPORT_CSV_FILE = LOCAL_DIR_PREFIX + IMPORT_CSV_FILE

artifact_id = None
version_id = None
urlSuffix='/v3/governance_artifact_types/reference_data?workflow_status=published&limit=200'
headers = {"accept": "application/json" ,"Authorization" : "Bearer " + access_token}

r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)

if r.status_code == 200 :
    for i in r.json()["resources"] :
        if i["name"] == "Position Lookup" :
            artifact_id = i["artifact_id"]
            version_id = i["version_id"]
            print("artifact_id = ", artifact_id, " version_id = " , version_id)
            break
else :
    print("Error in retrieving reference data artifacts, Code = ", r.status_code)
    print(r.text)

if artifact_id is None or version_id is None:
    print("Position Lookup not found")
else :    
    urlSuffix='/v4/reference_data_sets/' + artifact_id + '/versions/' + version_id + '/value_imports'
    headers = {"Authorization" : "Bearer " + access_token }
    import_parameters = {
        "artifact_id_mode": False,
        "code": "POSITION_CODE",
        "first_row_header": True,
        "import_relationships_only": False,
        "skip_workflow_if_possible": False, 
        "trim_white_spaces": True,
        "value": "POSITION_EN",
        "value_conflicts": "OVERWRITE" 
    }
    files={
        'import_csv_file'   : ('import_csv_file', open(IMPORT_CSV_FILE,'rb') ),
        'import_parameters' : (None, str(import_parameters))   
    }
    
    r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, files=files)

    if r.status_code == 202 :
        import_id = r.json()["import_info"]["import_id"]
        print(f"----- Import process started: {import_id} ----- ")
        print("----- Entering wait loop ------")
        urlSuffix='/v4/reference_data_sets/' + artifact_id + '/versions/' + version_id + '/value_imports/' + import_id
        headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}

        workflow_id = r.json()["workflow_id"]

        while True :
            r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)
            if r.status_code != 200 :
                print("Error with the request. Code: ", r.status_code)
                print(r.text)
                break
            status = r.json()["import_info"]["import_state"]
            if status != "IN_PROGRESS" :
                break
            else :
                print ("Import in progess, please wait")
                time.sleep(5)
        print("Import finished. Status = ", r.status_code)
    else :
        print("Error with the request. Code: ", r.status_code)
        print(r.text)

#### 7.b. Publish the draft

In [None]:
urlSuffix='/v3/workflows/' + workflow_id + '?include_user_tasks=true'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
r = requests.get(myconn.urlRequest(urlSuffix), headers=headers)

user_tasks = r.json()["entity"]["user_tasks"]
for i in user_tasks :
    if i["metadata"]["workflow_id"] == workflow_id :
        task_id = i["metadata"]["task_id"]

urlSuffix='/v3/workflow_user_tasks/' + task_id + '/actions'
headers = {"accept": "application/json", "Authorization" : "Bearer " + access_token}
payload = {'action': 'complete', 'form_properties': [{'id': 'action', 'value': '#publish'}]}
r = requests.post(myconn.urlRequest(urlSuffix), headers=headers, json=payload)

if r.status_code == 202 or r.status_code == 204 :
    print("Publish Successful, Code = ", r.status_code)
else :
    print("Error in publishing artifacts, Code = ", r.status_code)
    print(r.text)