## Create persons



In [1]:
# import relevant packages
from getpass import getpass
import requests
import os
import json
import glob
import pandas as pd
import openMINDS
import openMINDS.version_manager

In [None]:
# Place the script in the same folder as the csv file or define Location of the files
cwd = os.getcwd()
answer = input("Is this where your files are stored: " + cwd + "? yes (y) or no (n) " ) 

if answer == "y":
    fpath = cwd
elif answer == "n":
    fpath = input("Please define you path: ")

# Load information for the persons
person_file = input("What is name of the file with the person information? ")
personList = pd.read_excel(os.path.join(cwd, person_file + '.xlsx'), sheet_name = 'P')

kg_prefix = "https://kg.ebrains.eu/api/instances/"

output_path = os.path.join(fpath, "instances_" + person_file)

### Create instances for persons

To create instances that are conform with openMINDS, we make use of the openMINDS python package.

In [2]:
# Initialise the local copy of openMINDS
openMINDS.version_manager.init()
openMINDS.version_manager.version_selection('v3')
helper = openMINDS.Helper()

openMINDS directory exists
Checking for updates


In [5]:
mycol.add_core_contactInformation(email="openMINDS@ebrains.eu")

'https://localhost/contactInformation/af204e80-faa4-11ec-adb2-50eb718d929d'

In [None]:
# Function to create person instances including email address and orcid instances (if available)
def createInstances(df): 
    """
    
    Parameters
    ----------
    df : pandas DataFrame
        DataFrame with information to person instances including email address and orcid instances

    Returns
    -------
    data : pandas DataFrame
        Overview of all information and newly created instances.

    """

    person_dict = {}
    email_dict = {}
    orcid_dict = {}
    data = pd.DataFrame([])
    for num in range(len(df)):

        print("Creating person " + str(df.firstName[num]) + str(df.lastName) + "\n")

        # initiate the collection into which you will store all metadata instances
        mycol = helper.create_collection()
        
        # create a metadata instance for (e.g.) the openMINDS Person schema
        person_dict[df.firstName[num]] = mycol.add_core_person(givenName = df.firstName[num])

        # add more metadata to a created instance
        mycol.get(person_dict[df.firstName[num]]).familyName = df.lastName[num]
        
        # if an email address exists, make the contact information and then add to person instance
        if pd.isnull(df.email[num]):
            email_dict[df.firstName[num]] = None
        else: 
            email_dict[df.firstName[num]] = mycol.add_core_contactInformation(email=df.email[num])
        
        mycol.get(person_dict[df.firstName[num]]).contactInformation = email_dict[df.firstName[num]]

        # if orcid exists, make orcid and then add to person instance
        if pd.isnull(df.ORCID[num]):
            orcid_dict[df.firstName[num]] = None
        else: 
            orcid_dict[df.firstName[num]] = mycol.add_core_ORCID(identifier=df.ORCID[num])
        
        mycol.get(person_dict[df.firstName[num]]).digitalIdentifier = orcid_dict[df.firstName[num]]


        data = data.append(pd.DataFrame({"firstName" : df.firstName[num],
            "lastName" : df.lastName[num],
            "person_uuid" : person_dict[df.firstName[num]].split("/")[-1],  
            "email" : df.email[num],
            "email_uuid" : email_dict[df.firstName[num]].split("/")[-1],
            "orcid" : df.email[num],
            "orcid_uuid" : email_dict[df.firstName[num]].split("/")[-1]},                
                        index=[0]), ignore_index=True)

        mycol.save(os.path.join(output_path, ""))  
    
    return data


Run the cell below to create the instances and save an overview file in the output folder

In [None]:
# Create instances and save them    
data = createInstances(personList)

if data:
    savedInstances = os.path.join(output_path, "createdPersons.csv")
    data.to_csv(savedInstances, index = False, header=True)

### Authentication

To be able to upload the newly created instances directly to the Knowledge Graph editor via the API, you need an access token. To request a token, follow this link: https://nexus-iam.humanbrainproject.org/v0/oauth2/authorize or copy your token from the Knowledge Graph Editor (if you have access).

In [None]:
# Function to upload the instances to the KGE
def upload(instances_fnames, token, space_name):
    """
    
    Parameters
    ----------
    instances_fnames : List 
        list of file paths to instances that need to be uploaded
    token : string
        Authorisation token to get access to the KGE
    space_name : string
        Space that the instances needs to be uploaded to, e.g. "dataset", "common", etc.

    Returns
    -------
    response : dictionary
        For each UUID as response is stored that indications if the upload 
        was successful

    """
    
    hed = {"accept": "*/*",
           "Authorization": "Bearer " + token,
           "Content-Type": "application/json"
           }
    
    # Prefix to upload to the right space
    url = "https://core.kg.ebrains.eu/v3-beta/instances/{}?space=" + space_name
    kg_prefix = "https://kg.ebrains.eu/api/instances/"
    
    new_instances = []
    for fname in instances_fnames:
        with open(fname, 'r') as f:
            new_instances.append(json.load(f))
        f.close()
    
    # Correct the capitalisation in the openMINDS package
    for instance in new_instances:
        atid = kg_prefix + instance["@id"].split("/")[-1] #only take the UUID 
        instance["@id"] = atid
        if "openDataIn" in instance.keys():
            atid = kg_prefix + instance["openDataIn"][0]["@id"].split("/")[-1] #only take the UUID 
            instance["openDataIn"][0]["@id"] = atid
        if instance["@type"].endswith("Servicelink"):
            splittype = instance["@type"].split("/")[:-1]
            splittype.append("ServiceLink")
            instance["@type"] = "/".join(splittype)
        if instance["@type"].endswith("Url"):
            splittype = instance["@type"].split("/")[:-1]
            splittype.append("URL")
            instance["@type"] = "/".join(splittype)
    
    # Upload to the KGE
    print("\nUploading instances now:\n")
    
    count = 0
    response = {}    
    for instance in new_instances:
        count += 1
        print("Posting instance " + str(count)+"/"+str(len(new_instances)))
        atid = instance["@id"].split("/")[-1] 
        response[atid] = requests.post(url.format(atid), json=instance, headers=hed)
        if response[atid].status_code == 200:
            print(response[atid], "OK!" )
        elif response[atid].status_code == 409:
            print(response[atid], "Instance already exists")
        elif response[atid].status_code == 401:
            print(response[atid], "Token not valid, authorisation not successful")
        else:
            print(response[atid])
        
        
    return response  