# Install and Import required modules

In [1]:
pip install prov prov-db-connector requests pydot pydot-ng graphviz OpenSpecimenAPIconnector

Collecting prov
  Downloading prov-2.0.0-py3-none-any.whl (421 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m421.5/421.5 KB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting prov-db-connector
  Downloading prov-db-connector-0.5.1.tar.gz (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.6/46.6 KB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting pydot
  Downloading pydot-1.4.2-py2.py3-none-any.whl (21 kB)
Collecting pydot-ng
  Downloading pydot_ng-2.0.0-py2.py3-none-any.whl (20 kB)
Collecting graphviz
  Downloading graphviz-0.20-py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.0/47.0 KB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting OpenSpecimenAPIconnector
  Downloading OpenSpecimenAPIconnector-0.9.3-py3-none-any.whl (86 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.4/86.4 KB[0m [31m6.4 MB/s

### OpenSpecimenAPI Conector dependencies

In [2]:
#pip install xlsxwriter openpyxl

Note: you may need to restart the kernel to use updated packages.


## Imports

In [2]:
import datetime
import json
import logging
import uuid
from pprint import pprint

import OpenSpecimenAPIconnector as OSconn
import OpenSpecimenAPIconnector.os_core as os_core
import OpenSpecimenAPIconnector.os_util as os_util
import pandas as pd
from prov.model import ProvDocument
from provdbconnector import ProvDb
from provdbconnector.db_adapters.neo4j.neo4jadapter import Neo4jAdapter

# URL and authentication for OpenSpecimen and Neo4j

In [3]:
os_baseurl = 'http://openspecimen:8080/openspecimen/rest/ng'
loginname = "admin"
password = "Login@123"
os_auth = (loginname, password)
neo4j_auth = {"user_name":"neo4j", "user_password":"admin", "host":"pap-neo4j:7687"}

# Namespaces

In [4]:
NAMESPACE_OS_INSTANCE = "ExampleInstance"

NAMESPACE_OS_SPECIMEN = "SPECIMEN"
NAMESPACE_OS_AGENT = "OS-AGENT"
NAMESPACE_OS_ACTION = "OS-ACTION"

NAMESPACE_ENITIES = "ns_biobank_specimens"
NAMESPACE_OTHER = "ns_biobank_other_attributes"
NAMESPACE_DATETIME = "ns_biobank_datetime"
NAMESPACE_AGENTS = "ns_biobank_agent"
NAMESPACE_ACTIONS = "ns_biobank_activity"
NAMESPACE_BUNDLES = "ns_biobank_bundle"

NAMESPACE_SENDER = 'ns_pathology'

NAMESPACE_COMMON_MODEL = "cpm"
NAMESPACE_DCT = "dct"

# Helper Functions

In [23]:
def get_all_cps():
    return os_cp.get_all_collection_protocols()


def get_all_cprs(params):
    return os_participant.get_participant_matches(params=params)


def create_dummy_data(num_primary_samples=1, derivates_per_samples=1, aliqutes_per_derivate=1, num_agents=1):
    doc = ProvDocument()
    doc.add_namespace(NAMESPACE_ENITIES, "http://biobank_URI/specimens/")
    doc.add_namespace(NAMESPACE_AGENTS, "http://biobank_URI/personal/")
    doc.add_namespace(NAMESPACE_ACTIONS, "http://biobank_URI/SOPs/")
    doc.add_namespace(NAMESPACE_BUNDLES, "http://biobank_URI/bundle/")
    doc.add_namespace("snomed", 'http://purl.bioontology.org/ontology/SNOMEDCT')

    derivations = []
    aliqoutations = []
    for a in range(num_agents):
        derivation = f"{NAMESPACE_ACTIONS}:DERIVATION_BY_AGENT_{a}"
        derivations.append(derivation)
        aliqoutation = f"{NAMESPACE_ACTIONS}:ALIQUTATION_BY_AGENT_{a}"
        aliqoutations.append(aliqoutation)
        doc.activity(derivation)
        doc.activity(aliqoutation)
        agent = f"{NAMESPACE_AGENTS}:AGENT_{a}"
        doc.association(activity=derivation, agent=agent)
        doc.association(activity=aliqoutation, agent=agent)

    # create entities
    entities = []
    for ps in range(num_primary_samples):
        ps_entity = f"{NAMESPACE_ENITIES}:PrimarySample_{ps}"
        entities.append(ps_entity)
        doc.entity(ps_entity)
        for d in range(derivates_per_samples):
            derivate_entity = f"{NAMESPACE_ENITIES}:Derivate_{ps}.{d}"
            entities.append(derivate_entity)
            doc.wasGeneratedBy(derivate_entity, derivations[ps % num_agents])
            doc.used(derivations[ps % num_agents], ps_entity)
            doc.wasDerivedFrom(derivate_entity, ps_entity)
            for a in range(aliqutes_per_derivate):
                aliquote_entity = f"{NAMESPACE_ENITIES}:Aliquote_{ps}.{d}.{a}"
                entities.append(aliquote_entity)
                doc.wasGeneratedBy(aliquote_entity, aliqoutations[ps % num_agents])
                doc.used(aliqoutations[ps % num_agents], derivate_entity)
                doc.wasDerivedFrom(aliquote_entity, derivate_entity)

    return doc

# TODO use identifier directly (already includes namespace)
def get_prov_for_sample(prov_api, identifier, bundleName, bundle_to_fill=None, namespace_dict=None,
                        prov_type='prov:Entity', namespace='http://biobank_URI/specimens/', exclude_Bundles=True,
                        max_steps=1, stop_at_entity=None, getAgents=False):
    if bundle_to_fill is None:
        doc = ProvDocument()
        if isinstance(namespace_dict, dict):
            for key, value in namespace_dict.items():
                doc.add_namespace(key, value)
        #        doc.add_namespace(NAMESPACE_ENITIES, "http://biobank_URI/specimens/")
        #        doc.add_namespace(NAMESPACE_AGENTS, "http://biobank_URI/personal/")
        #        doc.add_namespace(NAMESPACE_ACTIONS, "http://biobank_URI/SOPs/")
        #        doc.add_namespace(NAMESPACE_BUNDLES, "http://biobank_URI/bundle/")
        #        doc.add_namespace("snomed",'http://purl.bioontology.org/ontology/SNOMEDCT')

        bundle_to_fill = doc.bundle(f"{NAMESPACE_BUNDLES}:{bundleName}")

    print(f"all neighbors for {namespace}{identifier}")
    bundle_to_fill.entity(f"{namespace}{identifier}")

    rec_list = prov_api._adapter.get_records_tail(metadata_dict={'prov_type': f'{prov_type}',
                                                                 'identifier': f'{namespace}{identifier}'}, depth=1)
    #print(f"Namespace: {namespace}")
    if len(rec_list) == 0:
        return bundle_to_fill

    nss = namespace_dict

    for record in rec_list:
        if record.metadata['prov_type'] == 'prov:Entity':
            continue
        # TODO check if prov_type correct
        if exclude_Bundles and 'prov:type' in record.attributes and record.attributes['prov:type'] == 'prov:Bundle':
            continue
        # print(inv_nss)
        if not isinstance(nss, dict):
            nss = {}
            print(record.metadata['namespaces'])
            if isinstance(record.metadata['namespaces'], list):
                for names in record.metadata['namespaces']:
                    nss.update(json.loads(names))
            nss = json.loads(names)
        inv_nss = {v: k for k, v in nss.items()}

        if record.metadata['prov_type'] == 'prov:Derivation' and record.attributes[
            'prov:generatedEntity'] != f"{inv_nss[namespace]}:{identifier}":
            # print(f"{inv_nss[namespace]}:{identifier}")
            # print(record)
            continue
        if (max_steps == 0 or identifier == stop_at_entity) and record.metadata['prov_type'] == 'prov:Derivation':
            continue
        #        print(record)
        if getAgents and record.metadata['prov_type'] == 'prov:Activity':
            activity_id, ns = get_record_id_and_namspace(record)
            if activity_id == identifier:
                continue
            # get_prov_for_sample(prov_api=prov_api,
            #                     identifier=id,
            #                     bundleName=bundleName,
            #                     bundle_to_fill=bundle_to_fill,
            #                     prov_type=record.metadata['prov_type'],
            #                     namespace=ns,
            #                     namespace_dict=nss,
            #                     max_steps=1,
            #                     stop_at_entity=stop_at_entity)
            act_neigh_rec_list = prov_api._adapter.get_records_tail(metadata_dict={'prov_type': f'{record.metadata["prov_type"]}',
                                                                         'identifier': f'{ns}{activity_id}'},
                                                                    depth=1)
            agent_recs = filter_records_list(act_neigh_rec_list,metadata_dict={'prov_type':'prov:Agent'})
            for agent_rec in agent_recs:
                agent_id, agent_ns = get_record_id_and_namspace(agent_rec)
                assoc_recs = filter_records_list(act_neigh_rec_list, attribute_dict={'prov:agent':f'{inv_nss[agent_ns]}:{agent_id}',
                                                                                    'prov:activity':f'{inv_nss[ns]}:{activity_id}'},
                                                 metadata_dict={'prov_type':'prov:Association'})
                if len(assoc_recs) > 1:
                    logging.WARNING(f"More than 1 Association between Agent {inv_nss[agent_ns]}:{agent_id} and Activity {inv_nss[ns]}{activity_id}: using only the first!")
                if len(assoc_recs) == 1:
                    prov_api._parse_record(bundle_to_fill, assoc_recs[0])

                prov_api._parse_record(bundle_to_fill, agent_rec)

            continue
        prov_api._parse_record(bundle_to_fill, record)
        if record.metadata['prov_type'] == 'prov:Derivation':
            #id = record.attributes['prov:usedEntity']
            # pprint(bundle_to_fill.get_provn())
            #            nss=[]
            #            print(f"Derivation Record {record}")
            # TODO wie viel sinn macht es den indentifier_original namespace z uverwenden wenn ich es eh immer zurück mappen muss?? vllt gleich von idnetifiere?
            record_id, record_ns = get_record_id_and_namspace(record,id_to_clean=record.attributes['prov:usedEntity'])
            #for sub in nss.keys():
            #    if f'{sub}:' in id:
            #        id = id.replace(sub + ':', '')
            #        ns = nss[sub]
            #        break
            if record_id == identifier:
                continue
            get_prov_for_sample(prov_api=prov_api,
                                identifier=record_id,
                                bundleName=bundleName,
                                bundle_to_fill=bundle_to_fill,
                                prov_type='prov:Entity',
                                namespace=record_ns,
                                namespace_dict=nss,
                                max_steps=max_steps - 1,
                                stop_at_entity=stop_at_entity,
                                getAgents=getAgents)
    return bundle_to_fill


def get_known_agents():
    filter = {
        'prov_type': 'prov:Agent'
    }
    return prov_api._adapter.get_records_by_filter(metadata_dict=filter)


def get_record_id_and_namspace(record,id_to_clean=None):
    nss={}
    if isinstance(record.metadata['namespaces'], dict):
        nss = record.metadata['namespaces']
    elif isinstance(record.metadata['namespaces'], list):
        nss = {k: v for d in record.metadata['namespaces'] for k, v in json.loads(d).items()} #
    elif isinstance(record.metadata['namespaces'], str):
        nss = json.loads(record.metadata['namespaces'])
    rec_id = record.metadata['identifier_original']
    if id_to_clean is not None and isinstance(id_to_clean,str):
        rec_id = id_to_clean
    for sub in nss.keys():
        if f'{sub}:' in rec_id:
            rec_id = rec_id.replace(sub + ':', '')
            ns = nss[sub]
            break

    return rec_id, ns

def filter_records_list(record_list, attribute_dict=None, metadata_dict=None):
    newlist = []
    for record in record_list:
        include = True
        if attribute_dict is not None and isinstance(attribute_dict, dict):
            for key, value in attribute_dict.items():
                if key not in record.attributes.keys() or record.attributes[key] != value:
                    include = False
        if metadata_dict is not None and isinstance(metadata_dict, dict):
            for key, value in metadata_dict.items():
                if key not in record.metadata.keys() or record.metadata[key] != value:
                    include = False
        if include:
            newlist.append(record)
    return newlist


def get_latest_instance_for_extid(external_id):
    filter_attributes = {
        'ns_biobank_other_attributes:externalID': external_id,
    }
    # TODO figure out why metadata filter for prov_type has no effect
    filter_metadata = {
        'prov_type': 'prov:Entity'
    }

    r = prov_api._adapter.get_records_by_filter(attributes_dict=filter_attributes, metadata_dict=filter_metadata)
    newlist = []

    for elem in r:
        if elem.metadata['prov_type'] == 'prov:Entity':
            newlist.append(elem)



    newlist.sort(key=lambda x: x.attributes['ns_biobank_datetime:inXSDDateTime'], reverse=True)
    identifier_string = newlist[0].metadata['identifier_original']
    return identifier_string[identifier_string.find(':')+1:]


def get_agent_provid(external_id, known_agents):
    # check if agent already exists
    for agent in known_agents:
        if agent.attributes['ns_biobank_other_attributes:externalID'] == external_id:
            return agent.metadata['identifier']

    # if agent does not exist, create new provid
    agent_provid = f"{NAMESPACE_AGENTS}:{str(uuid.uuid4())}"
    return agent_provid


def create_blank_prov_doc():
    doc = ProvDocument()
    doc.add_namespace(NAMESPACE_ENITIES, "http://biobank_URI/specimens/")
    doc.add_namespace(NAMESPACE_OTHER, "http://biobank_URI/other_attributes/")
    doc.add_namespace(NAMESPACE_DATETIME, "http://www.w3.org/TR/owl-time")
    doc.add_namespace(NAMESPACE_AGENTS, "http://biobank_URI/personal/")
    doc.add_namespace(NAMESPACE_ACTIONS, "http://biobank_URI/SOPs/")
    doc.add_namespace(NAMESPACE_BUNDLES, "http://biobank_URI/bundle/")
    doc.add_namespace("snomed", 'http://purl.bioontology.org/ontology/SNOMEDCT')
    return doc


def create_event_df(specimen_events):
    event_list = []
    for event in specimen_events:
        event_name = event['name']
        for record in event['records']:
            dict = {
                'EventType': event_name,
                'eventId': event['id'],
                'recordId': record['recordId'],
                'UserId': record['user']['id'],
                'TimeStamp': record['fieldValues'][1]['value'],
            }
            event_list.append(dict)
    event_df = pd.DataFrame(event_list)
    event_df = event_df.sort_values(by=['TimeStamp'])
    return event_df


def prov_specimencreationevent(row, specimen_id):
    global parent_entity_id
    print('working on SpecimenCollectionEvent')
    doc = create_blank_prov_doc()
    event_details = os_form.get_form_details(row['eventId'], row['recordId'], True)
    procedure = ''
    user = ''

    for detail in event_details['fields']:
        if detail['name'] == 'procedure':
            procedure = detail['displayValue']
        elif detail['name'] == 'user':
            user = f"{detail['displayValue']}-{detail['value']}"

    # ACTION
    action = f"{NAMESPACE_ACTIONS}:{str(uuid.uuid4())}"
    action_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_ACTION}-{row['EventType']}-{row['recordId']}",
        f"{NAMESPACE_OTHER}:type": f"{row['EventType']}",
        f"{NAMESPACE_OTHER}:procedure": f"{procedure}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.activity(action, other_attributes=action_other_attributes)

    # AGENT
    agent_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_AGENT}-{user}"
    }
    agent = get_agent_provid(agent_other_attributes[f"{NAMESPACE_OTHER}:externalID"], known_agents)
    doc.agent(agent, other_attributes=agent_other_attributes)

    # ENTITY
    entity = f"{NAMESPACE_ENITIES}:{str(uuid.uuid4())}"
    entity_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_SPECIMEN}-{specimen_id}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.entity(entity, other_attributes=entity_other_attributes)

    # RELATION
    doc.association(activity=action, agent=agent)
    doc.wasGeneratedBy(entity, action)

    # SAVE DOC
    # print(doc.get_provn())
    try:
        parent_entity_id = entity
        prov_doc_id = prov_api.save_document(doc)
    except Exception as e:
        print(f"\n ## Error while saving doc. {e}")


def prov_specimenreceivedevent(row, specimen_id):
    global parent_entity_id
    print('working on SpecimenReceivedEvent')
    doc = create_blank_prov_doc()
    event_details = os_form.get_form_details(row['eventId'], row['recordId'], True)
    quality = ''
    user = ''

    for detail in event_details['fields']:
        if detail['name'] == 'user':
            user = f"{detail['displayValue']}-{detail['value']}"
        elif detail['name'] == 'quality':
            quality = detail['displayValue']

    # ACTION
    action = f"{NAMESPACE_ACTIONS}:{str(uuid.uuid4())}"
    action_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_ACTION}-{row['EventType']}-{row['recordId']}",
        f"{NAMESPACE_OTHER}:type": f"{row['EventType']}",
        f"{NAMESPACE_OTHER}:quality": f"{quality}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.activity(action, other_attributes=action_other_attributes)

    # AGENT
    agent_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_AGENT}-{user}"
    }
    agent = get_agent_provid(agent_other_attributes[f"{NAMESPACE_OTHER}:externalID"], known_agents)
    doc.agent(agent, other_attributes=agent_other_attributes)

    # ENTITY
    entity = f"{NAMESPACE_ENITIES}:{str(uuid.uuid4())}"
    entity_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_SPECIMEN}-{specimen_id}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.entity(entity, other_attributes=entity_other_attributes)

    # RELATION
    doc.association(activity=action, agent=agent)
    doc.wasGeneratedBy(entity, action)
    doc.used(action, parent_entity_id)
    doc.wasInvalidatedBy(parent_entity_id, action)
    doc.wasDerivedFrom(entity, parent_entity_id)

    # SAVE DOC
    # print(doc.get_provn())
    try:
        parent_entity_id = entity
        prov_doc_id = prov_api.save_document(doc)
    except Exception as e:
        print(f"\n ## Error while saving doc. {e}")


def prov_specimentransferevent(row, specimen_id):
    global parent_entity_id
    print('working on SpecimenTransferEvent')
    doc = create_blank_prov_doc()
    event_details = os_form.get_form_details(row['eventId'], row['recordId'], True)
    from_container = ''
    to_container = ''
    user = ''

    for detail in event_details['fields']:
        if detail['name'] == 'user':
            user = f"{detail['displayValue']}-{detail['value']}"
        elif detail['name'] == 'fromContainer':
            from_container = detail['displayValue']
        elif detail['name'] == 'toContainer':
            to_container = detail['displayValue']

    # ACTION
    action = f"{NAMESPACE_ACTIONS}:{str(uuid.uuid4())}"
    action_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_ACTION}-{row['EventType']}-{row['recordId']}",
        f"{NAMESPACE_OTHER}:type": f"{row['EventType']}",
        f"{NAMESPACE_OTHER}:from_container": f"{from_container}",
        f"{NAMESPACE_OTHER}:to_container": f"{to_container}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.activity(action, other_attributes=action_other_attributes)

    # AGENT
    agent_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_AGENT}-{user}"
    }
    agent = get_agent_provid(agent_other_attributes[f"{NAMESPACE_OTHER}:externalID"], known_agents)
    doc.agent(agent, other_attributes=agent_other_attributes)

    # ENTITY
    entity = f"{NAMESPACE_ENITIES}:{str(uuid.uuid4())}"
    entity_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_SPECIMEN}-{specimen_id}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.entity(entity, other_attributes=entity_other_attributes)

    # RELATION
    doc.association(activity=action, agent=agent)
    doc.wasGeneratedBy(entity, action)
    doc.used(action, parent_entity_id)
    doc.wasInvalidatedBy(parent_entity_id, action)
    doc.wasDerivedFrom(entity, parent_entity_id)

    # SAVE DOC
    # print(doc.get_provn())
    try:
        parent_entity_id = entity
        prov_doc_id = prov_api.save_document(doc)
    except Exception as e:
        print(f"\n ## Error while saving doc. {e}")


def prov_specimenshipmentshippedevent(row, specimen_id):
    global parent_entity_id
    print('working on SpecimenShipmentShippedEvent')
    doc = create_blank_prov_doc()
    event_details = os_form.get_form_details(row['eventId'], row['recordId'], True)
    shipment = ''
    sending_site = ''
    user = ''

    for detail in event_details['fields']:
        if detail['name'] == 'user':
            user = f"{detail['displayValue']}-{detail['value']}"
        elif detail['name'] == 'name':
            shipment = detail['value']
        elif detail['name'] == 'sendingSite':
            sending_site = detail['displayValue']

    # ACTION
    action = f"{NAMESPACE_ACTIONS}:{str(uuid.uuid4())}"
    action_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_ACTION}-{row['EventType']}-{row['recordId']}",
        f"{NAMESPACE_OTHER}:type": f"{row['EventType']}",
        f"{NAMESPACE_OTHER}:shipment": f"{shipment}",
        f"{NAMESPACE_OTHER}:sending_site": f"{sending_site}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.activity(action, other_attributes=action_other_attributes)

    # AGENT
    agent_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_AGENT}-{user}"
    }
    agent = get_agent_provid(agent_other_attributes[f"{NAMESPACE_OTHER}:externalID"], known_agents)
    doc.agent(agent, other_attributes=agent_other_attributes)

    # ENTITY
    entity = f"{NAMESPACE_ENITIES}:{str(uuid.uuid4())}"
    entity_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_SPECIMEN}-{specimen_id}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.entity(entity, other_attributes=entity_other_attributes)

    # RELATION
    doc.association(activity=action, agent=agent)
    doc.wasGeneratedBy(entity, action)
    doc.used(action, parent_entity_id)
    doc.wasInvalidatedBy(parent_entity_id, action)
    doc.wasDerivedFrom(entity, parent_entity_id)

    # SAVE DOC
    # print(doc.get_provn())
    try:
        parent_entity_id = entity
        prov_doc_id = prov_api.save_document(doc)
    except Exception as e:
        print(f"\n ## Error while saving doc. {e}")


def prov_specimenshipmentreceivedevent(row, specimen_id):
    global parent_entity_id
    print('working on SpecimenShipmentReceivedEvent')
    doc = create_blank_prov_doc()
    event_details = os_form.get_form_details(row['eventId'], row['recordId'], True)
    shipment = ''
    receiving_site = ''
    quality = ''
    user = ''

    for detail in event_details['fields']:
        if detail['name'] == 'user':
            user = f"{detail['displayValue']}-{detail['value']}"
        elif detail['name'] == 'name':
            shipment = detail['value']
        elif detail['name'] == 'receivingSite':
            receiving_site = detail['displayValue']
        elif detail['name'] == 'receivedQuality':
            quality = detail['displayValue']

    # ACTION
    action = f"{NAMESPACE_ACTIONS}:{str(uuid.uuid4())}"
    action_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_ACTION}-{row['EventType']}-{row['recordId']}",
        f"{NAMESPACE_OTHER}:type": f"{row['EventType']}",
        f"{NAMESPACE_OTHER}:shipment": f"{shipment}",
        f"{NAMESPACE_OTHER}:receiving_site": f"{receiving_site}",
        f"{NAMESPACE_OTHER}:received_quality": f"{quality}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.activity(action, other_attributes=action_other_attributes)

    # AGENT
    agent_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_AGENT}-{user}"
    }
    agent = get_agent_provid(agent_other_attributes[f"{NAMESPACE_OTHER}:externalID"], known_agents)
    doc.agent(agent, other_attributes=agent_other_attributes)

    # ENTITY
    entity = f"{NAMESPACE_ENITIES}:{str(uuid.uuid4())}"
    entity_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_SPECIMEN}-{specimen_id}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.entity(entity, other_attributes=entity_other_attributes)

    # RELATION
    doc.association(activity=action, agent=agent)
    doc.wasGeneratedBy(entity, action)
    doc.used(action, parent_entity_id)
    doc.wasInvalidatedBy(parent_entity_id, action)
    doc.wasDerivedFrom(entity, parent_entity_id)

    # SAVE DOC
    # print(doc.get_provn())
    try:
        parent_entity_id = entity
        prov_doc_id = prov_api.save_document(doc)
    except Exception as e:
        print(f"\n ## Error while saving doc. {e}")


def prov_specimenchildrenevent(row, specimen_id):
    global parent_entity_id
    print('working on SpecimenChildrenEvent')
    doc = create_blank_prov_doc()
    event_details = os_form.get_form_details(row['eventId'], row['recordId'], True)
    lineage = ''
    child_specimen_id = ''
    user = ''

    for detail in event_details['fields']:
        if detail['name'] == 'lineage':
            lineage = detail['value']
        elif detail['name'] == 'user':
            user = detail['displayValue']
        elif detail['name'] == 'children':
            child_specimen_id = detail['value'][0]['id']


    # ACTION
    action = f"{NAMESPACE_ACTIONS}:{str(uuid.uuid4())}"
    action_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_ACTION}-{row['EventType']}-{row['recordId']}",
        f"{NAMESPACE_OTHER}:type": f"{row['EventType']}",
        f"{NAMESPACE_OTHER}:lineage": f"{lineage}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.activity(action, other_attributes=action_other_attributes)

    # AGENT
    agent_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_AGENT}-{user}"
    }
    agent = get_agent_provid(agent_other_attributes[f"{NAMESPACE_OTHER}:externalID"], known_agents)
    doc.agent(agent, other_attributes=agent_other_attributes)

    # ENTITY
    parent_entity = f"{NAMESPACE_ENITIES}:{str(uuid.uuid4())}"
    parent_entity_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_SPECIMEN}-{specimen_id}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.entity(parent_entity, other_attributes=parent_entity_other_attributes)

    child_entity = f"{NAMESPACE_ENITIES}:{str(uuid.uuid4())}"
    child_entity_other_attributes = {
        f"{NAMESPACE_OTHER}:externalID": f"{NAMESPACE_OS_INSTANCE}-{NAMESPACE_OS_SPECIMEN}-{child_specimen_id}",
        f"{NAMESPACE_DATETIME}:inXSDDateTime": datetime.datetime.fromtimestamp(
            int(row['TimeStamp']) / 1000).isoformat()
    }
    doc.entity(child_entity, other_attributes=child_entity_other_attributes)

    # RELATION
    doc.association(activity=action, agent=agent)
    doc.wasGeneratedBy(parent_entity, action)
    doc.wasGeneratedBy(child_entity, action)
    doc.used(action, parent_entity_id)
    doc.wasInvalidatedBy(parent_entity_id, action)
    doc.wasDerivedFrom(parent_entity, parent_entity_id)
    doc.wasDerivedFrom(child_entity, parent_entity_id)

    # SAVE DOC
    # print(doc.get_provn())
    try:
        # TODO new parent_entity_id is the leftover from the old parent
        parent_entity_id = parent_entity
        prov_doc_id = prov_api.save_document(doc)
    except Exception as e:
        print(f"\n ## Error while saving doc. {e}")


# LogIn and Initialize Modules

In [5]:
OSconn.config_manager.set_login(url=os_baseurl, auth=os_auth)
prov_api = ProvDb(adapter=Neo4jAdapter, auth_info=neo4j_auth)

os_specimen_core = os_core.specimen()
os_participant = os_core.participant()
os_specimen_util = os_util.specimen_util()
os_specimen_events = os_core.specimen_event()
os_cp = os_core.collection_protocol()
os_form = os_core.form()

## Some tests

In [12]:
#OSconn.config_manager.get_url()
#from OpenSpecimenAPIconnector import config_manager
#config_manager.get_url()
#OSconn.specimen()
#os_specimen_core = os_core.specimen()

# Write Provenance for single specimen

In [None]:
specimen_id = 2
parent_entity_id = ''

specimen_events = os_specimen_events.get_specimen_events(specimen_id)
event_df = create_event_df(specimen_events)

for index, row in event_df.iterrows():
    known_agents = get_known_agents()
    if row['EventType'] == 'SpecimenCollectionEvent':
        prov_specimencreationevent(row, specimen_id)
    elif row['EventType'] == 'SpecimenReceivedEvent':
        prov_specimenreceivedevent(row, specimen_id)
    elif row['EventType'] == 'SpecimenTransferEvent':
        prov_specimentransferevent(row, specimen_id)
    elif row['EventType'] == 'SpecimenShipmentShippedEvent':
        prov_specimenshipmentshippedevent(row, specimen_id)
    elif row['EventType'] == 'SpecimenShipmentReceivedEvent':
        prov_specimenshipmentreceivedevent(row, specimen_id)
    elif row['EventType'] == 'SpecimenChildrenEvent':
        prov_specimenchildrenevent(row, specimen_id)

# Get Provenance Bundle for single specimen

In [26]:
    prov_id = get_latest_instance_for_extid('ExampleInstance-SPECIMEN-2')


    namespace_dict = {NAMESPACE_ENITIES: "http://biobank_URI/specimens/",
                      NAMESPACE_AGENTS: "http://biobank_URI/personal/",
                      NAMESPACE_ACTIONS: "http://biobank_URI/SOPs/",
                      NAMESPACE_BUNDLES: "http://biobank_URI/bundle/",
                      "snomed": 'http://purl.bioontology.org/ontology/SNOMEDCT'
                      }


    bundle = get_prov_for_sample(prov_api=prov_api,
                                      identifier=prov_id,
                                      bundleName="TestBundle",
                                      namespace_dict=namespace_dict,
                                      max_steps=50,
                                      stop_at_entity='',
                                      getAgents=True)



    print(f"Prov.ID of the entitiy of interest: {prov_id}")
    # bundle_id = prov_api.save_bundle(bundle)
    # This would save the bundle as an entity in the Neo4j database
    pprint(bundle.get_provn())

all neighbors for http://biobank_URI/specimens/cc848e6e-4410-4f58-8ed3-b8c6d30158e4
all neighbors for http://biobank_URI/specimens/66dfbf30-1ead-493b-80f2-79b859285a94
all neighbors for http://biobank_URI/specimens/5529e0cb-a415-4fd5-bf14-a7f1753cad4e
all neighbors for http://biobank_URI/specimens/f8daa610-9ac5-4a53-87ee-341d988aff79
all neighbors for http://biobank_URI/specimens/d6c23777-f3a8-4b0a-9745-5b5eb7d385f0
all neighbors for http://biobank_URI/specimens/d0c57951-901f-4287-bb29-24fbc821bc94
all neighbors for http://biobank_URI/specimens/b06bf5be-b80c-480e-809c-5cad817709db
Prov.ID of the entitiy of interest: cc848e6e-4410-4f58-8ed3-b8c6d30158e4
('bundle ns_biobank_bundle:TestBundle\n'
 '  prefix ns_biobank_activity <http://biobank_URI/SOPs/>\n'
 '  prefix ns_biobank_agent <http://biobank_URI/personal/>\n'
 '  prefix ns_biobank_other_attributes <http://biobank_URI/other_attributes/>\n'
 '  prefix ns_biobank_specimens <http://biobank_URI/specimens/>\n'
 '  \n'
 '  entity(ns_bioba

# Other Tests - EJ

In [16]:
# this is dangerous when doc gets changes I might not expect this... 
def _create_association(doc, mem1, mem2):
    doc.association(activity=mem1, agent=mem2)
    return doc

def _create_wasGeneratedBy(doc, mem1, mem2):
    doc.wasGeneratedBy(mem1, mem2)
    return doc

def _derivedFrom(doc, mem1, mem2):
    doc.wasDerivedFrom(mem1,mem2)
    return doc

def _used(doc, mem1, mem2):
    doc.used(mem1,mem2)
    return doc
def ex_os_prov_to_neo():

    process = "aliquotize"
    process2 = "derivate"
    agent_str1 = "Patio"
    agent_str2 = "Maxio"
    entity_str = "specimen_01"
    entity_str2 = "aliquot_01"
    entity_str3 = "derivative_01"
     
    activity1 = "{}:{}".format(NAMESPACE_ACTIONS, process)
    activity2 = "{}:{}".format(NAMESPACE_ACTIONS, process2)
    agent1 = "{}:{}".format(NAMESPACE_AGENTS, agent_str1)
    agent2 = "{}:{}".format(NAMESPACE_AGENTS, agent_str2)
    entity = "{}:{}".format(NAMESPACE_ENITIES, entity_str)
    entity2 = "{}:{}".format(NAMESPACE_ENITIES, entity_str2)
    entity3 = "{}:{}".format(NAMESPACE_ENITIES, entity_str3)
    doc = ProvDocument()
    #doc.set_default_namespace("http://biobank_URI/default/")
    #important '/' at end because it will be concatinated with locpath of nodes
    doc.add_namespace(NAMESPACE_ENITIES, "http://biobank_URI/specimens/")
    doc.add_namespace(NAMESPACE_AGENTS, "http://biobank_URI/personal/")
    doc.add_namespace(NAMESPACE_ACTIONS, "http://biobank_URI/SOPs/")
    doc.add_namespace(NAMESPACE_BUNDLES, "http://biobank_URI/bundle/")
    doc.add_namespace("snomed",'http://purl.bioontology.org/ontology/SNOMEDCT')
    
    #prov_document.association(activity1,agent1)
    #b0 = doc.bundle(f"{NAMESPACE_BUNDLES}:bundle0")
    b1 = doc.bundle(f"{NAMESPACE_BUNDLES}:bundle1")
    b2 = doc.bundle(f"{NAMESPACE_BUNDLES}:bundle2")
    #doc.entity(f"{NAMESPACE_BUNDLES}:bundle0")
    #b0.entity(f"{NAMESPACE_BUNDLES}:bundle1")
    #b0.entity(f"{NAMESPACE_BUNDLES}:bundle2")
    
    doc.agent(agent1)
    doc.agent(agent2)
    doc.activity(activity1)
    doc.activity(activity2)
    doc.entity(entity)
    doc.entity(entity2)
    doc.entity(entity3)
    
    # create bundle
    #b1 = doc.bundle(f"{NAMESPACE_BUNDLES}:bundle1")
    #b1.agent(f"{NAMESPACE_AGENTS}:Yoda")

    #b2 = doc.bundle(f"{NAMESPACE_BUNDLES}:bundle2")
    #b2.agent(f"{NAMESPACE_AGENTS}:Jabba the Hutt")
    
    b1.agent(agent1,other_attributes={"snomed:age": 69})
    b2.agent(agent2,other_attributes={"snomed:age": 420})
    b1.activity(activity1)
    b2.activity(activity2)
    b1.entity(entity)
    b1.entity(entity2)
    b2.entity(entity2)
    b2.entity(entity3)
    
    b1.association(activity=activity1, agent=agent1)
    b1.wasGeneratedBy(entity2, activity1)
    b1.used(activity1, entity)
    b1.wasDerivedFrom(entity2, entity)
    
    b2.association(activity=activity2, agent=agent2)
    b2.wasGeneratedBy(entity3, activity2)
    b2.used(activity2, entity2)
    b2.wasDerivedFrom(entity3, entity2)
    
    
    _create_association(doc, activity1, agent1)
    _create_association(doc, activity2, agent2)
    _create_wasGeneratedBy(doc, entity2, activity1)
    _create_wasGeneratedBy(doc, entity3, activity2)
    _used(doc, activity1, entity)
    _used(doc, activity2, entity2)
    _derivedFrom(doc, entity2, entity)
    _derivedFrom(doc, entity3, entity2)

    #doc = _create_association(doc, activity1, agent1)
    #doc = _create_association(doc, activity2, agent2)
    #doc = _create_wasGeneratedBy(doc, entity2, activity1)
    #doc = _create_wasGeneratedBy(doc, entity3, activity2)
    #doc = _used(doc, activity1, entity)
    #doc = _used(doc, activity2, entity2)
    #doc = _derivedFrom(doc, entity2, entity)
    #doc = _derivedFrom(doc, entity3, entity2)


    return doc

In [17]:
# Delete aff in new4j: MATCH (n) DETACH DELETE n

docu =  ex_os_prov_to_neo()
id = prov_api.save_document(docu)
print(id)

ab13a43d-f272-466f-8320-d54f90e32238


In [9]:
#prov_json = json.loads(prov_api.get_document_as_json(document_id=id))
#print(prov_json)
pprint(prov_api.get_document_as_json(document_id=id))

('{"prefix": {"ns_biobank_sop": "http://biobank_URI/SOPs/", '
 '"ns_biobank_personal": "http://biobank_URI/personal/", '
 '"ns_biobank_specimens": "http://biobank_URI/specimens/", '
 '"ns_biobank_bundle": "http://biobank_URI/bundle/", "snomed": '
 '"http://purl.bioontology.org/ontology/SNOMEDCT"}, "wasAssociatedWith": '
 '{"_:id1": {"prov:agent": "ns_biobank_personal:Patio", "prov:activity": '
 '"ns_biobank_sop:aliquotize"}, "_:id2": {"prov:agent": '
 '"ns_biobank_personal:Maxio", "prov:activity": "ns_biobank_sop:derivate"}}, '
 '"used": {"_:id3": {"prov:entity": "ns_biobank_specimens:specimen_01", '
 '"prov:activity": "ns_biobank_sop:aliquotize"}, "_:id5": {"prov:entity": '
 '"ns_biobank_specimens:aliquot_01", "prov:activity": '
 '"ns_biobank_sop:derivate"}}, "wasGeneratedBy": {"_:id4": {"prov:entity": '
 '"ns_biobank_specimens:aliquot_01", "prov:activity": '
 '"ns_biobank_sop:aliquotize"}, "_:id6": {"prov:entity": '
 '"ns_biobank_specimens:derivative_01", "prov:activity": '
 '"ns_bio

In [10]:
pprint(prov_api.get_document_as_json(document_id="http://biobank_URI/bundle/bundle0"))

'{}'


In [19]:
kk = QualifiedName(docu._namespaces[NAMESPACE_BUNDLES], "bundle2")
#print(kk)

#prov_element=prov_api.get_element(kk)
#print(prov_element.get_provn())
print(prov_api.get_bundle(kk).get_provn())

entity(ns_biobank_bundle:bundle2, [prov:type='prov:Bundle'])
bundle ns_biobank_bundle:bundle2
  prefix ns_biobank_personal <http://biobank_URI/personal/>
  prefix snomed <http://purl.bioontology.org/ontology/SNOMEDCT>
  prefix ns_biobank_sop <http://biobank_URI/SOPs/>
  prefix ns_biobank_specimens <http://biobank_URI/specimens/>
  
  agent(ns_biobank_personal:Maxio, [snomed:age=420])
  activity(ns_biobank_sop:derivate, -, -)
  entity(ns_biobank_specimens:aliquot_01)
  entity(ns_biobank_specimens:derivative_01)
  wasAssociatedWith(ns_biobank_sop:derivate, ns_biobank_personal:Maxio, -)
  used(ns_biobank_sop:derivate, ns_biobank_specimens:aliquot_01, -)
  wasGeneratedBy(ns_biobank_specimens:derivative_01, ns_biobank_sop:derivate, -)
  wasDerivedFrom(ns_biobank_specimens:derivative_01, ns_biobank_specimens:aliquot_01, -, -, -)
endBundle


### update a entity

In [190]:
#Create the second prov document and merge the ex:Bob entry
second_prov_document = ProvDocument()
second_prov_document.add_namespace("snomed",'http://purl.bioontology.org/ontology/SNOMEDCT')
second_prov_document.add_namespace(NAMESPACE_ENITIES, "http://biobank_URI/specimens/")
       
second_prov_document.entity(f"{NAMESPACE_ENITIES}:aliquot_01", other_attributes={"snomed:Specimen": "Slide"})

second_document_id = prov_api.save_document(second_prov_document)

In [191]:
kk = QualifiedName(docu._namespaces[NAMESPACE_BUNDLES], "bundle2")
#print(kk)

prov_element=prov_api.get_element(kk)
#print(prov_element.get_provn())
print(prov_api.get_bundle(kk).get_provn())

bundle ns_biobank_bundle:bundle2
  prefix ns_biobank_personal <http://biobank_URI/personal/>
  prefix snomed <http://purl.bioontology.org/ontology/SNOMEDCT>
  prefix ns_biobank_sop <http://biobank_URI/SOPs/>
  prefix ns_biobank_specimens <http://biobank_URI/specimens/>
  
  agent(ns_biobank_personal:Maxio, [snomed:age=420])
  activity(ns_biobank_sop:derivate, -, -)
  entity(ns_biobank_specimens:aliquot_01, [snomed:Specimen="Slide"])
  entity(ns_biobank_specimens:derivative_01)
  wasAssociatedWith(ns_biobank_sop:derivate, ns_biobank_personal:Maxio, -)
  used(ns_biobank_sop:derivate, ns_biobank_specimens:aliquot_01, -)
  wasGeneratedBy(ns_biobank_specimens:derivative_01, ns_biobank_sop:derivate, -)
  wasDerivedFrom(ns_biobank_specimens:derivative_01, ns_biobank_specimens:aliquot_01, -, -, -)
endBundle


In [19]:
#docu.test="123"
docu.uri= 'ns_biobabank:biobank_URI'

In [33]:
docu =  ex_os_prov_to_neo()


In [155]:
docu.get_provn()

'document\n  prefix ns_biobank <http://biobank_URI>\n  \n  agent(ns_biobank:user2)\n  activity(ns_biobank:aliquotize, -, -)\n  activity(ns_biobank:derivate, -, -)\n  entity(ns_biobank:specimen)\n  entity(ns_biobank:aliquot)\n  entity(ns_biobank:derivative)\n  wasAssociatedWith(ns_biobank:aliquotize, ns_biobank:user2, -)\n  wasAssociatedWith(ns_biobank:derivate, ns_biobank:user2, -)\n  wasGeneratedBy(ns_biobank:aliquot, ns_biobank:aliquotize, -)\n  wasGeneratedBy(ns_biobank:derivative, ns_biobank:derivate, -)\n  used(ns_biobank:aliquotize, ns_biobank:specimen, -)\n  used(ns_biobank:derivate, ns_biobank:aliquot, -)\n  wasDerivedFrom(ns_biobank:aliquot, ns_biobank:specimen, -, -, -)\n  wasDerivedFrom(ns_biobank:derivative, ns_biobank:aliquot, -, -, -)\nendDocument'

In [141]:
#doc = ProvDocument()

kk = QualifiedName(docu._namespaces['ns_biobank'], "aliquot")
#kk = QualifiedName(docu._namespaces['prov'], "agent")
print(kk)





ns_biobank:aliquot


In [146]:
prov_element=prov_api.get_element(kk)

In [147]:
prov_element.get_provn()

'entity(ns_biobank:aliquot)'

In [150]:
prov_api.get_bundle(kk).get_provn()

'bundle ns_biobank:aliquot\nendBundle'

### Test it step by step
#### get_element

In [126]:
from provdbconnector.db_adapters.baseadapter import METADATA_KEY_IDENTIFIER
# Include namespace uri into the identifier to support e.g. different default namespaces
global_identifier = kk.namespace.uri + kk.localpart

# Setup filter
meta_filter = dict()
meta_filter.update({METADATA_KEY_IDENTIFIER: global_identifier})
print(meta_filter)
# Get the result
#results = ._adapter.get_records_by_filter(metadata_dict=meta_filter)

{'identifier': 'http://biobank_URIuser'}


#### self._adapter.get_records_by_filter (neo4j adapter)

###### some stupid functions

In [123]:
def encode_string_value_to_primitive(value):
    """
    Convert a value into one of the following types:
    - dict
    - str
    - float
    - int
    - list
    :param value:
    :return:
    """
    if isinstance(value, str):
        return value
    elif type(value) is int:
        return value
    elif type(value) is float:
        return value
    elif type(value) is bool:
        return value
    elif type(value) is list:
        return value
    elif type(value) is dict:
        io = StringIO()
        json.dump(value, io)
        return io.getvalue()
    return str(value)


#### Back to self._adapter.get_records_by_filter (neo4j adapter)

In [127]:
attributes_dict = dict()
metadata_dict = meta_filter
# (encoded_params, cypher_str) = self._get_cypher_filter_params(attributes_dict, metadata_dict)
#{
properties_dict = attributes_dict
metadata_dict_prefixed = {"meta:{}".format(k): v for k, v in metadata_dict.items()}
# Merge the 2 dicts into one
filter = properties_dict.copy()
filter.update(metadata_dict_prefixed)
#encoded_params = encode_dict_values_to_primitive(filter)
#{
encoded_params = dict()
for key, value in filter.items():
    key_simple = str(key)
    encoded_params.update({key_simple: encode_string_value_to_primitive(value)})

#}
#cypher_str = self._get_attributes_identifiers_cypher_string(filter.keys())
#{
key_list = filter.keys()
db_attributes_identifiers = map(lambda key: "`{}`: {{`{}`}}".format(key, key), key_list)
cypher_str = ",".join(db_attributes_identifiers)
#}
#}
print(encoded_params)
print(cypher_str)

{'meta:identifier': 'http://biobank_URIuser'}
`meta:identifier`: {`meta:identifier`}


In [129]:
session = prov_api._adapter._create_session()

In [132]:
result_set = session.run(cypher_commands.NEO4J_GET_RECORDS_BY_PROPERTY_DICT.format(filter_dict=cypher_str), encoded_params)
for result in result_set:
    record = result["re"]
    print(record)

### END Test

In [118]:
import provdbconnector.db_adapters.neo4j.cypher_commands as cypher_commands
cypher_commands.NEO4J_GET_RECORDS_BY_PROPERTY_DICT.format(filter_dict=cypher_str)

'\n                            CYPHER 3.5 \n                            MATCH (d {`meta:identifier`: {`meta:identifier`}} )-[r]-(x {`meta:identifier`: {`meta:identifier`}})\n                            RETURN DISTINCT r as re\n                            //Get all nodes that are alone without connections to other nodes\n                            UNION\n                            MATCH (a {`meta:identifier`: {`meta:identifier`}})\n                            RETURN DISTINCT a as re\n                        '

In [107]:
meta_filter

{'identifier': 'http://biobank_URIuser'}

In [68]:
import os
os.
#os.environ["PATH"] += os.pathsep + 'D:/Program Files (x86)/Graphviz2.38/bin/'

dot = prov_to_dot(docu, direction ='RL')
dot.write_png('article-prov.png')
#Image('article-prov.png')
#Image(dot)


FileNotFoundError: [Errno 2] "dot" not found in path.

In [86]:
docu.get_registered_namespaces()
docu._namespaces['ns_biobabank'].uri

'http://biobank_URI'

In [100]:
docu._namespaces

{'prov': <Namespace: prov {http://www.w3.org/ns/prov#}>,
 'xsd': <Namespace: xsd {http://www.w3.org/2001/XMLSchema#}>,
 'xsi': <Namespace: xsi {http://www.w3.org/2001/XMLSchema-instance}>,
 '': <Namespace:  {http://biobank_URI/default}>,
 'ns_biobabank': <Namespace: ns_biobabank {http://biobank_URI}>}