## Notebook to populate and customize the graph database

##### Check if database is already populated  
Running this notebook will duplicate the content in the graph database

In [83]:
from py2neo import Graph,Node,Relationship,Subgraph
graph = Graph("neo4j+s://60584a5d.databases.neo4j.io",password='p9LwMsdHinSdQ4o3aplF5mokAJxmJ1hjjeJ5pKp5tE0',name="neo4j",)

In [84]:
# Count nodes
node_count_query = "MATCH (n) RETURN count(n) AS nodeCount"
node_count_result = graph.run(node_count_query).data()
nodes_count = node_count_result[0]['nodeCount']

# Count edges (relationships)
edge_count_query = "MATCH ()-[r]->() RETURN count(r) AS edgeCount"
edge_count_result = graph.run(edge_count_query).data()
edges_count = edge_count_result[0]['edgeCount']

print(f"Number of nodes: {nodes_count}")
print(f"Number of edges: {edges_count}")

Number of nodes: 0
Number of edges: 0


In [85]:
# Delete all relationships first (edges must be deleted before nodes)
delete_edges_query = "MATCH ()-[r]->() DELETE r"
delete_edges_result = graph.run(delete_edges_query)
print("All relationships (edges) deleted")

# Delete all nodes
delete_nodes_query = "MATCH (n) DELETE n"
delete_nodes_result = graph.run(delete_nodes_query)
print("All nodes deleted")

# Verify deletion - count remaining nodes and edges
node_count_query = "MATCH (n) RETURN count(n) AS nodeCount"
node_count_result = graph.run(node_count_query).data()
remaining_nodes = node_count_result[0]['nodeCount']

edge_count_query = "MATCH ()-[r]->() RETURN count(r) AS edgeCount"
edge_count_result = graph.run(edge_count_query).data()
remaining_edges = edge_count_result[0]['edgeCount']

print(f"Remaining nodes: {remaining_nodes}")
print(f"Remaining edges: {remaining_edges}")

All relationships (edges) deleted
All nodes deleted
Remaining nodes: 0
Remaining edges: 0


## Creating nodes and relationship

##### Loading XML file and parsing into python dictionary object

In [86]:
import xmltodict

## XML file path
file_name = "E:\\aop-network-visualizer\\aop-visualizer-clean\\data\\aop-wiki-xml-2025-07-01\\aop-wiki-xml-2025-07-01"

### converting xml file to dictionary
import json
with open(file_name,encoding="utf-8") as xml_file:
    data_dict = xmltodict.parse(xml_file.read())

## Data to be used for parsing
data = data_dict["data"]

##### Creating refrence for KE,stressor,KER and AOP

In [87]:
### creating refrence of AOP number with ID
aops = []
for dat in data["vendor-specific"]["aop-reference"]:
    ids = dat["@id"]
    num = dat["@aop-wiki-id"]
    aops.append({"ref-num":ids,"ID":num})

### Creating Refrence of KeyEvent
kes = []
for dat in data["vendor-specific"]["key-event-reference"]:
    ids = dat["@id"]
    num = dat["@aop-wiki-id"]
    kes.append({"ref-num":ids,"ID":num})

### creating Refrence for stressors
stressors = []
for dat in data["vendor-specific"]["stressor-reference"]:
    ids = dat["@id"]
    num = dat["@aop-wiki-id"]
    stressors.append({"ref-num":ids,"ID":num})


### Creating Refrence for Key-event-relationship
kers = []
for dat in data["vendor-specific"]["key-event-relationship-reference"]:
    ids = dat["@id"]
    num = dat["@aop-wiki-id"]
    kers.append({"ref-num":ids,"ID":num})

##### Utilties to clean the html tags from text

In [88]:
## Function which will help in cleaning HTML tags
import re
CLEANR = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});') ## The regex can be updated with more filters

## Function to clean the HTML tags
def cleanhtml(raw_html):
    if raw_html :
        cleantext = re.sub(CLEANR, '', raw_html)
        return cleantext.replace("\\n","")
    else:
        raw_html

## Creating nodes of graph

In [89]:
## Using py2Neo Node and Relationship Constructor
from py2neo import Node, Relationship

## NOTE py2neo is depricated

#### 1. AOP nodes

In [90]:
## Function defined to enrich the AOP with further iformation
## Also need to change the time format, so it will be easily accessible

def enricher(aop,enriched_info):
    aop["name"] = enriched_info.get("title")
    aop["short_name"] = enriched_info.get("short-name")
    aop["authors"] = cleanhtml(enriched_info.get("authors")) ## clean it with HTML as well
    aop["oecd_project"] = enriched_info.get("oecd-project")
    aop["abstract"] = cleanhtml(enriched_info.get("abstract")) ## cleaning Html tags
    aop["potential_applications"] = cleanhtml(enriched_info.get("potential-applications")) ## Clean HTML tags for application
    aop["references"] = cleanhtml(enriched_info.get("references")) ## clean html tags for refrences as well
    aop["essentiality-support"] =cleanhtml(enriched_info.get("essentiality-support"))
    aop["source"] = enriched_info.get("source")
    aop["background"] = cleanhtml(enriched_info.get("background"))
    aop["creation_timestamp"] = enriched_info.get("creation-timestamp")
    aop["last_modification_timestamp"] = enriched_info.get("last-modification-timestamp")
    
    return aop

In [91]:
## Building nodes for AOP with all the properties

## Dictionaries to hold the aop nodes
enriched_aop_list = {}

## looping over all the aops
for single_aop in aops:
    id_ = single_aop["ref-num"]

    # Loop through AOP descriptions
    for aop_desrip in data["aop"]:
        
        ## checking if Id is equal
        if aop_desrip["@id"] == id_:
            
            aop_info_dict = {}
            
            aop_info_dict["id"] = single_aop["ID"]
            aop_info_dict["url"] = f"https://aopwiki.org/aops/{single_aop['ID']}"

            # Enrich the AOP data
            enriched_aop = enricher(aop_info_dict, aop_desrip)

            # Create AOP node
            aop_node = Node("AOP", **enriched_aop)
            enriched_aop_list.setdefault(id_,aop_node)
            #enriched_aop_list.setdefault(id_,enriched_aop)

## list of aops
aop_list = [aop for ref,aop in enriched_aop_list.items()]

**Curating raw text of AOP**
1. It can be used for identification of genes, protein, chemical mentioed in Aops
2. The collected text can also be converted into embedding vector, semantic search with context

**Content merged together** 

1. Name of AOP: as it contain the most basric info, whic user's look for
2. Abstract of AOP: summarized info for aop, to find relevant aop
3. potential application of aop
4. background: 
5. short name

In [93]:
### Create a loop to generate the combine raw text
aop_raw_text = {}

## aop_list
for aop in aop_list:
    aop_id = int(aop.get("id")) ## fetching aop id
    aop_text = f""" AOP with id: {aop.get("id")} has name: {aop.get("name")}\n with short_name: {aop.get("short_name")}\n The abstract of aop {aop.get("name")}: {aop.get("abstract")}\n background:{aop.get("background")}\n potential_application: {aop.get("potential_application")}"""
    
    aop_raw_text.setdefault(aop_id,aop_text)

In [94]:
### function to perform NER on AOP info

## using bern2 for named entity recogination
NER_URL = r"http://bern2.korea.ac.kr/plain"

error_key = [] ## The AOPs where error occured
import requests
def query_plain(key,text, url=NER_URL):
    try:
        res=  requests.post(url, json={'text': text})
        return res.json()
    except:
        error_key.append(key)

In [11]:
## sending request to server
#from tqdm import tqdm
#annotated_aop = {}
#for key,value in tqdm(aop_raw_text.items()):
    #annotated_aop[key] = query_plain(key,value)

In [12]:
## store this annotation as pkl file
## import pickle
## with open("aop_ner.pkl","wb") as file:
##  pickle.dump(annotated_aop,file)

In [95]:
## Loading annoatation from pkl file
import pickle
with open("aop_ner.pkl","rb") as file:
    annotated_aop = pickle.load(file)

In [98]:
##NOTE : Need to check, why we have this error
## AOPs having error with Annotations
for key,value in annotated_aop.items():
    try:
         value.get("annotations")
    except:
        print("Error occured while NER with AOP",key)

Error occured while NER with AOP 43
Error occured while NER with AOP 282
Error occured while NER with AOP 314
Error occured while NER with AOP 443
Error occured while NER with AOP 517
Error occured while NER with AOP 522
Error occured while NER with AOP 563


In [99]:
## assigning annotations in aop nodes
whole_aop = {}
for key,value in annotated_aop.items(): ## loop through the items and extract the key and value from it.
    if value:

        entites = value.get("annotations",[])

        aops_ann = []
        for j in entites:
            
            if j.get("obj") == "drug":
                entity_type = "chemical"
            else:
                entity_type = j.get("obj")


            entity_dict =  {"name":j.get("mention"),
                            "type":entity_type,
                            "identifier": j.get("id")}

            aops_ann.append(entity_dict)
        
        whole_aop[key] = aops_ann

In [101]:
### creating node and then their relationship with biological entites

## Entites_node
entity_ref = {} ## Here we will keep identifier and value will be same
entity_nodes = []
entity_rels =[]

for key,value in whole_aop.items():

    ## fetch the aop using the key
    for aop_key,aop_value in enriched_aop_list.items():
        
        ## this capture the same keu of whole_aop
        if enriched_aop_list[aop_key].get("id") == str(key):

            ## Fetch the aop node
            aop_node = enriched_aop_list[aop_key]

            ## List of annotations
            annotation_list = whole_aop[key]

            for annotated in annotation_list:
                if annotated:
                    entity_type = annotated.get("type").lower()

                    if entity_type in ["gene","chemical","disease","drug"]: ## Restricting entites types


                        ## Filter the entity here if its starts with MIE or kE and AOP itself.
                        ## Note: aop,ke and mie is also considered as biologcal concepts need to refine and remove them
                        if not (annotated.get("name").lower().startswith("mie")) and  not((annotated.get("name").lower().startswith("ke"))) and not(annotated.get("name").lower().startswith("aop")):

                            ## Build the node
                            entity_node = Node(entity_type.upper(),**annotated)

                            entity_identifier = entity_node.get("identifier")

                            if isinstance(entity_identifier, list):
                                identifier_to_use = entity_identifier[0] if entity_identifier else None
                            else:
                                identifier_to_use = entity_identifier

                            ## Refrence for node and stored 
                            entity_ref.setdefault(identifier_to_use,entity_node)

                            ## Build the relation with Nodes
                            if entity_type == "gene":
                                entity_aop_rel = Relationship(aop_node,"HAS_GENE",entity_ref.get(identifier_to_use))
                                entity_aop_rel["relation_type"] = "HAS_GENE"
                                entity_rels.append(entity_aop_rel)

                            if entity_type == "chemical":
                                entity_aop_rel = Relationship(aop_node,"HAS_CHEMICAL",entity_ref.get(identifier_to_use))
                                entity_aop_rel["relation_type"] = "HAS_CHEMICAL"
                                entity_rels.append(entity_aop_rel)

                            if entity_type == "disease":
                                entity_aop_rel = Relationship(aop_node,"HAS_DISEASE",entity_ref.get(identifier_to_use))
                                entity_aop_rel["relation_type"] = "HAS_DISEASE"
                                entity_rels.append(entity_aop_rel)
                    

In [103]:
## storing captured biological entites in CSV file
type_list = []
for key,value in entity_ref.items():
    info_ = {
        "name":value.get("name"),
        "type":value.get("type"),
        "identifier":value.get("identifier")
    }
    type_list.append(info_)

## saving entites in the csv
import pandas as pd
pd.DataFrame(type_list).to_csv("Biological Entites.csv")

In [104]:
## checking here, if any of the entites names start with aop, ke or mie
wrong_ner_key = []
for key,value in entity_ref.items():
    if value.get("name").lower().startswith("mie") or (value.get("name").lower().startswith("ke")) or value.get("name").lower().startswith("aop"):
        wrong_ner_key.append(key)

assert len(wrong_ner_key) == 0, "inconsistent entites captured in NER"

**NOTE**: openai embeddings can also be generated and stored in nodes for semantic search, upto the user, if these functionlity need by community put a feature request

In [19]:
## From csv file import embedding
#embedding_data = pd.read_csv("data/aop_embedding.csv",
                             #converters={'aop_embedding': pd.eval}).drop("Unnamed: 0",
                                                                         #axis=1)

## converting data into records
#embedding_data_ = embedding_data.to_dict(orient="records")

## creating numerical embedding
#numerical_embedding = {}
#for i in embedding_data_:
    
    ## 
    #key = i.get("AOP")
    #embd = list(i.get("aop_embedding"))
    
    ## create numerical embedding
    #numerical_embedding.setdefault(key,embd)


In [20]:
### In AOP node, we need to update the embeddings for searching
#for node in aop_list:
    
    #node_id = int(node.get("id")) ## fetching id of the node
    
    ## with this id, retriving the embedding of the text
    #embd = numerical_embedding.get(node_id)
    
    ## updating the node with embedding info
    #node["embedding"] = embd

##### AOPs and their relation with applicability
1. Taxonomy
2. sex
3. Life-stages

In [105]:
## Taxonomy node list

taxonomy_list = {}

for tax in data["taxonomy"]:
    
    ## refrence
    tax_ref = tax.get("@id")

    ## creating taxonomy node
    node_data = {"name":tax.get("name"),
                 "source":tax.get("source"),
                 "source_id":tax.get("source-id")}
    
    tax_node = Node("TAXONOMY",**node_data)

    taxonomy_list.setdefault(tax_ref,tax_node)

## raw taxonomy list
taxonomy_raw_list = list(set([tax.get("name") for tax in taxonomy_list.values()]))

## taxonmy id list
taxonomy_id_list = list(set([tax.get("source_id") for tax in taxonomy_list.values()]))

In [106]:
## taxonmy and their id mapping
id_Tax = {}
for tax in taxonomy_list.values():
    id = tax.get("source_id")
    name = tax.get("name")

    ## match the id
    if id_Tax.get("id") == id:
        id_Tax.get(id).append(name)
    else:
        id_Tax.setdefault(id,[name])    

In [107]:
## check, weather some id associated with more than 1 name

id_with_two_name = filter(lambda x: len(id_Tax[x])>1,id_Tax)

assert len(list(id_with_two_name)) == 0, "Id assigned to two different taxonomy"

**Taxonomy having single id will be linked with AOPS**

In [109]:
## Building taxonomy and AOP relation
from py2neo import Relationship

## stroing the taxonomy of aop
aop_taxonomy_relation = []

for aop in data["aop"]:

    ## getting refrence of aop
    ref = aop.get("@id")

    ## getting applicability of aop
    if aop.get("applicability"):
        taxo = aop.get("applicability").get("taxonomy") ## taxonomy

        if taxo:

            if isinstance(taxo,list):

                for tax in taxo:
                    
                    tax_id = tax.get("@taxonomy-id")

                    ## Fetch taxonomy node
                    taxonomy_node = taxonomy_list.get(tax_id)

                    ## Fetch aop node
                    aop_node = enriched_aop_list.get(ref)

                    ## Relation strenght
                    evidence_strength = tax.get("evidence")

                    ## Build the relation between them and store them
                    relation = Relationship(aop_node,"IS_APPLICABLE",taxonomy_node)
                    relation["evidence_strength"]= evidence_strength
                    relation["relation_type"] = "IS_APPLICABLE"

                    aop_taxonomy_relation.append(relation)
            
            if isinstance(taxo,dict):
                tax_id = taxo.get("@taxonomy-id")

                ## Fetch taxonomy node
                taxonomy_node = taxonomy_list.get(tax_id)

                ## Fetch aop node
                aop_node = enriched_aop_list.get(ref)

                ## Relation strenght
                evidence_strength = taxo.get("evidence")

                ## Build the relation between them and store them
                relation = Relationship(aop_node,"IS_APPLICABLE",taxonomy_node)
                relation["evidence_strength"]= evidence_strength
                relation["relation_type"] = "IS_APPLICABLE"
                aop_taxonomy_relation.append(relation)


In [110]:
## Extracting unique sex label types and creating their nodes
a = []
for aop in data["aop"]:

    ## Getting refrence of AOP
    ref = aop.get("@id")
    
    if aop.get("applicability"):
        a.append(aop.get("applicability"))

sex_info = []
for i in a:
    if i.get("sex"):
        sex_info.append(i.get("sex"))

sex_name = []
for i in sex_info:
    if isinstance(i,dict):
        sex_name.append(i.get("sex"))
    if isinstance(i,list):
        for j in i:
            sex_name.append(j.get("sex"))

## set of sex_name
sex_name = list(set(sex_name))

## Create Nodes, with label sex
sex_nodes = {}
for sex_n in sex_name:
    sex_node = Node("SEX",name=sex_n,relation_type="IS_APPLICABLE")
    sex_nodes.setdefault(sex_n,sex_node)

In [112]:
## This is part applicability of sex in AOP
## Building relation between sex and AOP

all_sex_rel = []
for aop in data["aop"]:

    ## take the id of aop as well
    ref_id = aop.get("@id")

    applicability = aop.get("applicability")

    ## if applicability
    if applicability:

        ## Then look for sex data
        sex_info = applicability.get("sex")

        ## I sex info in their, need to check it is list or dictionary
        if sex_info:
            aop_node = enriched_aop_list.get(ref_id)

            if isinstance(sex_info,list):
                for sexi in sex_info:
                    
                    ## capturing sex node
                    sex_node = sex_nodes.get(sexi.get("sex"))
                    evidence_strength = sexi.get("evidence")
                    
                    sex_aop_rel = Relationship(aop_node,"IS_APPLICABLE",sex_node)
                    sex_aop_rel["evidence_strength"] = evidence_strength
                    sex_aop_rel["relation_type"] = "IS_APPLICABLE"

                    all_sex_rel.append(sex_aop_rel)

            if isinstance(sex_info,dict):
                    sex_node = sex_nodes.get(sex_info.get("sex"))
                    evidence_strength = sex_info.get("evidence")

                    sex_aop_rel = Relationship(aop_node,"IS_APPLICABLE",sex_node)
                    sex_aop_rel["evidence_strength"] = evidence_strength
                    sex_aop_rel["relation_type"] = "IS_APPLICABLE"
                    
                    all_sex_rel.append(sex_aop_rel)


#### AOP and life stage relations  

Life stage in AOP wiki is not normalized, so we collected the mentioed life stages and try to linked it with ontologies  
Life stages mentioned in KER is also included

In [113]:
### Build node for the Life stages as well
with open("life_stages.json", "r", encoding="utf-8") as file:
    life_stage_info = json.load(file)

In [114]:
### creading Node of life stage
## NOTE: life stage for KE and KER also change, need to update it
## All life stage node has been created
## Two life stages Adults and fetal are havinf duplicate name

life_stage_node = {}
life_stages_list = list(set(life_stage_info.keys())) ## taking all life stages and making nodes
life_stage_normalized = life_stages_list

for i in life_stage_normalized:

    ## These two terms, can be handled manually
    if (i=="Adults"):
        i = "Adult"
    
    if (i=="Fetal"):
        i = "Foetal"

    node_value = life_stage_info.get(i)

    life_node = Node("LIFE_STAGE",**node_value)

    life_stage_node.setdefault(i,life_node)

In [116]:
## Attach this node with AOP's and KE

## Building relation between life stages, key event and AOPs 

life_stage_rel = []

for i in data["aop"]:

    applicablility = i.get("applicability")

    if applicablility:

        life_stage = applicablility.get("life-stage")

        if life_stage:

            aop_node = enriched_aop_list.get(i.get("@id"))

            if isinstance(life_stage,list):

                for j in life_stage:

                    if (j.get("life-stage") == "Adults"):
                        stage_name = "Adult"

                    elif (j.get("life-stage") == "Fetal"):
                        stage_name = "Foetal"
                    
                        ## Stage name
                    else:
                        stage_name = j.get("life-stage")
                    
                    ## EVIDENCE
                    evidence_name = j.get("evidence")

                    ## Life stage nodes
                    stage_node = life_stage_node.get(stage_name) 


                    aop_stage_rel = Relationship(aop_node,"IS_APPLICABLE",stage_node)
                    aop_stage_rel["evidence"] = evidence_name
                    aop_stage_rel["relation_type"] = "IS_APPLICABLE"

                    life_stage_rel.append(aop_stage_rel)
                    
            if isinstance(life_stage,dict):

                    if (life_stage.get("life-stage") == "Adults"):
                        stage_name = "Adult"

                    elif (life_stage.get("life-stage") == "Fetal"):
                        stage_name = "Foetal"
                    
                    ## Stage name
                    else:
                        stage_name = life_stage.get("life-stage")
                    
                    ## EVIDENCE
                    evidence_name = life_stage.get("evidence")

                    stage_node = life_stage_node.get(stage_name) ## node of life_stage

                    aop_stage_rel = Relationship(aop_node,"IS_APPLICABLE",stage_node)
                    aop_stage_rel["evidence"] = evidence_name
                    aop_stage_rel["relation_type"] = "IS_APPLICABLE"


                    life_stage_rel.append(aop_stage_rel)

#### Creating AOP assesment relationship

In [117]:
## AOP assesment relationships
aop_assesment_relationship = []
for dat in data["aop"]:

    ## capturing the aop id
    aop_ref = dat.get("@id") 

    ## capturing assesment filed
    assesment = dat.get("overall-assessment") 

    if assesment: ## checking is assesment available or not

        aop_node = enriched_aop_list.get(aop_ref) ## getting aop node

        ## generating node data for assesment- and clean the HTML content tags from it
        node_data = {
            "applicability": cleanhtml(assesment.get("applicability")),
            "description": cleanhtml(assesment.get("description")),
            "weight_of_evidence_summary": cleanhtml(assesment.get("weight_of_evidence_summary")),
            "quantitative_considerations": cleanhtml(assesment.get("quantitative_considerations")),
            "key_event_essentiality_summary": cleanhtml(assesment.get("key-event-essentiality-summary"))
        }

        ## Creating assesment node
        assesment_node = Node("ASSESMENT",**node_data)

        ## Creating relation with the node
        aop_asses_relation = Relationship(aop_node,"HAS_ASSESMENT",assesment_node)
        aop_asses_relation["relation_type"] = "HAS_ASSESMENT"

        aop_assesment_relationship.append(aop_asses_relation)


##### Building key event node

In [118]:
### Storing Key events
processed_keyEvent = {}

for key_event in kes:
    # Capturing the reference number and event ID of the key-event
    ref_num = key_event["ref-num"]
    event_id = key_event["ID"]
    
    # Loop to match the reference number and update property
    for event_info in data["key-event"]:
        
        # Checking whether the reference number matches or not
        if event_info["@id"] == ref_num:
            
            key_event_info = {}
            
            key_event_info["id"] = event_id
            key_event_info["name"] = event_info.get("title")
            key_event_info["url"] = f"https://aopwiki.org/events/{event_id}"
            key_event_info["short_name"] = event_info.get("short-name")
            key_event_info["description"] = cleanhtml(event_info.get("description"))
            key_event_info["measurement_methodology"] = cleanhtml(event_info.get("measurement-methodology"))
            key_event_info["evidence_supporting_taxonomic_applicability"] = cleanhtml(event_info.get("evidence-supporting-taxonomic-applicability"))
            key_event_info["references"] = event_info.get("references")
            key_event_info["source"] = event_info.get("source")
            key_event_info["creation_timestamp"] = event_info.get("creation-timestamp")
            key_event_info["last_modification_timestamp"] = event_info.get("last-modification-timestamp")

            # Create key-event node
            key_event_node = Node("KEY_EVENT", **key_event_info)

            # Update the list with enrichment
            processed_keyEvent.setdefault(ref_num,key_event_node)

**Key event also, holds a very rich text of information in the their description and abstract, NER can be used identify the biological concepts and linking can be performed.**

In [104]:
## updated_desc = """Upon the Wnt signaling activation, beta-catenin is stabilized and activated via inhibition of the phosphorylation by GSK3beta (Huang et al., 2019).Once the beta-catenin is stabilized, it translocates into the nucleus and enhances the expression of target genes of Wnt/beta-catenin signaling pathway (Huang et al., 2019).Beta-catenin activation is related to cancer a positive regulator of Wnt signaling, forms the complex with FZD and leads to trigger the Wnt signaling together with Wnt coreceptor low-density lipoprotein (LDL) receptor-related protein 6 (LRP6) (Clevers  Nusse, 2012; Jiang, et al., 2015). DVL, however, has a controversial role to promote Wnt receptor degradation (Jiang et al., 2015). Meanwhile, DVL-dependent regulation of FZD level is involved in mTORC1 signaling suppression via Wnt/beta-catenin signaling (Zeng et al., 2018). The recruitment of Axin to the DVL-FZD complex induces the beta-catenin stabilization and activation. The stabilized beta-catenin translocates into the nucleus, which forms the complex with TCF to induce the up-regulated expression of proliferation-related genes. """

In [105]:
## ke_list = [event for ref,event in processed_keyEvent.items()]

## updated the ke
## for ke in ke_list:
    ## if ke.get("id") == "1755":
        ## ke.update({"description":updated_desc})
            

In [106]:
### Create a loop to generate the text for embedding

# key_event_raw_text = {}

## aop_list
# for ke in ke_list:
    #ke_id = int(ke.get("id")) ## fetching aop id
    #ke_text = f""" Key event with id: {ke.get("id")} has name: {ke.get("name")}\n with short_name: {ke.get("short_name")}\n The decription of key event {ke.get("name")}: {cleanhtml(ke.get("description"))}\n measurement_methodology:{cleanhtml(ke.get("measurement_methodology"))}\n evidence_supporting_taxonomic_applicability: {cleanhtml(aop.get("evidence_supporting_taxonomic_applicability"))}"""
    
    #key_event_raw_text.setdefault(ke_id,ke_text)

##### Building relation betweek key events and it's applicability with

1. Taxonomy

In [119]:
### KEY event and taxonomy relation
ke_taxonomy_rel = []

for key_eve in data["key-event"]:
    
    ### Parsing key event node
    key_event_id = key_eve.get("@id")
    key_event_node_ = processed_keyEvent.get(key_event_id)

    ## Parse taxonomy from it
    applicability_ke = key_eve.get("applicability")

    if applicability_ke:

        ## looking for taxonomy KE
        taxonomy_ke = applicability_ke.get("taxonomy")

        ### Here we need to choose whether it is a list or dict

        if isinstance(taxonomy_ke,dict):
            tax_id = taxonomy_ke.get("@taxonomy-id")
            tax_node = taxonomy_list.get(tax_id) ## nodes

            ## Build Relation
            taxonomy_ke_rel = Relationship(key_event_node_,"IS_APPLICABLE",tax_node)
            taxonomy_ke_rel["evidence"] = taxonomy_ke.get("evidence")
            taxonomy_ke_rel["relation_type"] = "IS_APPLICABLE"

            ke_taxonomy_rel.append(taxonomy_ke_rel)

        if isinstance(taxonomy_ke,list):

            for j in taxonomy_ke:
                tax_id = j.get("@taxonomy-id")
                tax_node = taxonomy_list.get(tax_id) ## nodes

                ## Build Relation
                taxonomy_ke_rel = Relationship(key_event_node_,"IS_APPLICABLE",tax_node)
                taxonomy_ke_rel["evidence"] = j.get("evidence")
                taxonomy_ke_rel["relation_type"] = "IS_APPLICABLE"

                ## Append in the list
                ke_taxonomy_rel.append(taxonomy_ke_rel)

2. SEX

In [120]:
### KE and sex relationship 
ke_sex_rel = []

for key_eve in data["key-event"]:
    
    ### Parsing key event node
    key_event_id = key_eve.get("@id")
    key_event_node_ = processed_keyEvent.get(key_event_id)

    ## Parse taxonomy from it
    applicability_ke = key_eve.get("applicability")

    if applicability_ke:

        ## looking for taxonomy KE
        sex_ke = applicability_ke.get("sex")

        ### Here we need to choose whether it is a list or dict
        if isinstance(sex_ke,dict):
            sex_id = sex_ke.get("sex")
            sex_node = sex_nodes.get(sex_id) ## nodes

            ## Build Relation
            sex_ke_rel = Relationship(key_event_node_,"IS_APPLICABLE",sex_node)
            sex_ke_rel["evidence"] = sex_ke.get("evidence")
            sex_ke_rel["relation_type"] = "IS_APPLICABLE"

            ## Append in the list
            ke_sex_rel.append(sex_ke_rel)


        if isinstance(sex_ke,list):

            for j in sex_ke:
                sex_id = j.get("sex")
                sex_node = sex_nodes.get(sex_id) ## nodes

                ## Build Relation
                sex_ke_rel = Relationship(key_event_node_,"IS_APPLICABLE",sex_node)
                sex_ke_rel["evidence"] = j.get("evidence")
                sex_ke_rel["relation_type"] = "IS_APPLICABLE"

                ## Append in the list
                ke_sex_rel.append(sex_ke_rel)


3. Life stages

In [121]:
## Attach this node with AOP's and KE
life_stage_rel_ke = []

for i in data["key-event"]:

    applicablility = i.get("applicability")

    if applicablility:

        life_stage = applicablility.get("life-stage")

        if life_stage:

            ke_nodee_ = processed_keyEvent.get(i.get("@id"))

            if isinstance(life_stage,list):

                for j in life_stage:

                    if (j.get("life-stage") == "Adults"):
                        stage_name = "Adult"

                    elif (j.get("life-stage") == "Fetal"):
                        stage_name = "Foetal"
                    
                        ## Stage name
                    else:
                        stage_name = j.get("life-stage")
                    
                    ## EVIDENCE
                    evidence_name = j.get("evidence")

                    stage_node = life_stage_node.get(stage_name) ## node of life_stage
                    try:
                        ke_stage_rel = Relationship(ke_nodee_,"IS_APPLICABLE",stage_node)
                    except:
                        print(stage_name)
                        print(ke_nodee_.get("short_name"),stage_node)
                    ke_stage_rel["evidence"] = evidence_name
                    ke_stage_rel["relation_type"] = "IS_APPLICABLE"

                    life_stage_rel_ke.append(ke_stage_rel)
                    
            if isinstance(life_stage,dict):

                    if (life_stage.get("life-stage") == "Adults"):
                        stage_name = "Adult"

                    elif (life_stage.get("life-stage") == "Fetal"):
                        stage_name = "Foetal"
                    
                    ## Stage name
                    else:
                        stage_name = life_stage.get("life-stage")
                    
                    ## EVIDENCE
                    evidence_name = life_stage.get("evidence")


                    stage_node = life_stage_node.get(stage_name) ## node of life_stage

                    try:
                        ke_stage_rel = Relationship(ke_nodee_,"IS_APPLICABLE",stage_node)
                    except:
                        print(stage_name)
                        print(ke_nodee_.get("short_name"),stage_node)
                    ke_stage_rel["evidence"] = evidence_name
                    ke_stage_rel["relation_type"] = "IS_APPLICABLE"


                    life_stage_rel_ke.append(ke_stage_rel)

3 to < 6 years
decreased, IQ None
6 to < 11 years
decreased, IQ None


##### Building relation between **key events** and **Biological organization level**

In [122]:
### Building Biological organization level nodes

## organization levels
levels = []
bio_level_node = {}

## Extracting level
for eve in data["key-event"]:
    level = eve.get("biological-organization-level")
    levels.append(level)

## Building nodes
for lev in levels:
    ## creating biological organization node
    biological_level_node = Node("ORGANIZATION_LEVEL",name=lev)
    bio_level_node.setdefault(lev,biological_level_node)

In [123]:
## Building relationships

level_ke_relations = []

for eve in data["key-event"]:
    level = eve.get("biological-organization-level")
    
    if level:
        ## extracting level node 
        level_node = bio_level_node.get(level)
        
        ## extracting event as well
        event_id=eve.get("@id")
        event_node = processed_keyEvent.get(event_id)
        
        ## Build relationship betweene level and keyevent
        level_ke_rel = Relationship(event_node,"OCCURS_AT",level_node)
        level_ke_rel["relation_type"] = "OCCURS_AT"
        level_ke_relations.append(level_ke_rel)

##### Building relation between **organ** and **key events**

In [125]:
### Create a node of unique organ mentioned in keyevents
organ_nodes = {}
for eve in data["key-event"]:
    term = eve.get("organ-term") ### getting the organ term
    
    if term: ## if term, then node will build relation with the key event
        
        organ_id = term.get("source-id")
        
        if organ_nodes.get(organ_id):
            continue
        else:
            organ_node = Node("ORGAN",**term)
            organ_nodes.setdefault(organ_id,organ_node)


In [126]:
## Attaching organ nodes with the key events

organ_event_rel = []
for eve in data["key-event"]:
    term = eve.get("organ-term")
    
    if term:
        organ_id = term.get("source-id") ## fetching ID
        
        organ_node = organ_nodes.get(organ_id) ## fetching organ node
        
        if organ_node:
            
            ## fetch the key event
            event_id = eve.get("@id")
            
            ## fetch the event node
            event_node = processed_keyEvent.get(event_id) ## fetching event
            
            ## Building relation between the events and the organ
            organ_event = Relationship(event_node,
                                       "OCCURS_IN_ORGAN",
                                       organ_node)
            ## Defining relation type
            organ_event["relation_type"] = "OCCURS_IN_ORGAN"
            
            organ_event_rel.append(organ_event)

##### Building relation between **cells** and **key events**

In [127]:
### Create a node of inique cell mentioned in AOP Wiki database

cell_nodes = {}
for eve in data["key-event"]:
    term = eve.get("cell-term") ### getting the organ term
    
    if term: ## if term, then node will build relation with the key event
        
        cell_id = term.get("source-id")
        
        if cell_nodes.get(cell_id):
            continue
        else:
            cell_node = Node("CELL",**term)
            cell_nodes.setdefault(cell_id,cell_node)

In [128]:
## Attaching organ nodes with the key events

cell_event_rel = []

for eve in data["key-event"]:
    term = eve.get("cell-term")
    
    if term:
        cell_id = term.get("source-id") ## fetching ID
        
        cell_node = cell_nodes.get(cell_id) ## fetching organ node
        
        if cell_node:
            
            ## fetch the key event
            event_id = eve.get("@id")
            
            ## fetch the event node
            event_node = processed_keyEvent.get(event_id) ## fetching event
            
            ## Building relation between the events and the organ
            cell_event = Relationship(event_node,
                                       "OCCURS_IN_CELL",
                                       cell_node)
            ## Relation type
            cell_event["relation_type"] = "OCCURS_IN_CELL"
            
            
            cell_event_rel.append(cell_event)

#### Building stressor nodes

In [129]:
def streesorEnricher(stressor,enriched_info,):
    
    ## match the chemical identifier and feed the
    
    ## stressor and enrich info will be passed
    stressor["name"] = enriched_info["name"]
    stressor["description"] = enriched_info["description"]
    stressor["creation_timestamp"] =enriched_info["creation-timestamp"]
    stressor["last_modification_timestamp"] =enriched_info["last-modification-timestamp"]
                
    return stressor

In [130]:
## Building stressors node

enriched_stressors_list =  {}

for single_stressor in stressors:
    
    ## capturing the refrence number and stressor ID
    ref_num = single_stressor["ref-num"]
    stressor_id = single_stressor["ID"]
    
    ## loop to match the refrence number and the update property
    for stressor_info in data["stressor"]:
        
        ## checking wether the refrence number matches or not
        if stressor_info["@id"] == ref_num:
            
            stressor_info_dict = {}
            stressor_info_dict["id"] = stressor_id
            stressor_info_dict["url"] = f"https://aopwiki.org/stressors/{stressor_id}"

            ## Updating the stressor with more information
            updated_stressors = streesorEnricher(stressor_info_dict,stressor_info)

            ## stressor node created
            stressor_node = Node("STRESSOR",**updated_stressors)

            ## appending the node list
            enriched_stressors_list.setdefault(ref_num,stressor_node)


##### Building **chemical** nodes associated with **stressors**

In [131]:
### Building node of chemical
### chemical ID will be matched with the stressor

enriched_chemical_list = {}
for chem in data["chemical"]:
    
    chem_id = chem.get("@id")
    chem_info = {
                    "casrn": chem.get("@casrn"),
                    "jchem_inchi_key": chem.get("jchem-inchi-key"),
                    "indigo-inchi_key": chem.get("indigo-inchi-key"),
                    "name": chem.get("preferred-name"),
                    "synonyms": chem.get("synonyms", {}).get("synonym"),
                    "dsstox_id": chem.get("dsstox-id")
                }
    
    ## chemical node created
    chemical_node = Node("CHEMICAL",**chem_info)

    ## appending the node list
    enriched_chemical_list.setdefault(chem_id,chemical_node)

### Nodes of chemical
chem_node = []
for key,value in enriched_chemical_list.items():
    chem_node.append(value)

##### **chemical** node and relationship between **stressors**

In [132]:
## Building chemical-stressor relationship
stressor_chem_relation = []

for stressors_data in data["stressor"]:
    stress_id = stressors_data.get("@id") ## fetching stressor ID
    stressors_node = enriched_stressors_list[stress_id] ## fetching stressor nodes

    ## extracting the chemicals from stressors list
    chemicals_list = stressors_data.get("chemicals",{}).get("chemical-initiator",None)

    if chemicals_list:

        if isinstance(chemicals_list,list):
                for chem in chemicals_list:
                    chemical_id = chem.get("@chemical-id")

                    if chemical_id:
                        chemical_node = enriched_chemical_list[chemical_id]

                        ## creating Relationship between stressors and chemicals
                        relation_obj = Relationship(stressors_node,"HAS_CHEMICAL",chemical_node)

                        ## Defining relationship
                        relation_obj["relation_type"] = "HAS_CHEMICAL"

                        ## creating stressor chem relations
                        stressor_chem_relation.append(relation_obj)

        if isinstance(chemicals_list,dict):
        
            chemical_id = chemicals_list.get("@chemical-id")

            if chemical_id:
                chemical_node = enriched_chemical_list[chemical_id]

                ## creating Relationship between stressors and chemicals
                relation_obj = Relationship(stressors_node,"HAS_CHEMICAL",chemical_node)

                ## Defining relationship
                relation_obj["relation_type"] = "HAS_CHEMICAL"

                ## creating stressor chem relations
                stressor_chem_relation.append(relation_obj)

In [133]:
stressor_chem_relation

[HAS_CHEMICAL(Node('STRESSOR', creation_timestamp='2016-11-29T18:42:23', id='50', last_modification_timestamp='2016-11-29T18:42:23', name='Rotenone', url='https://aopwiki.org/stressors/50'), Node('CHEMICAL', dsstox_id='DTXSID6021248', jchem_inchi_key='JUVIOZPCNVVQFO-HBGVWJBISA-N', name='Rotenone', synonyms=['(1)Benzopyrano(3,4-b)furo(2,3-h)(1)benzopyran-6(6a H)-one, 1,2,12,12a-tetrahydro-8,9-dimethoxy-2-(1-m ethylethenyl)-, (2R-(2.alpha.,6a.alpha.,12a.alpha. ))-', '[1]Benzopyrano[3,4-b]furo[2,3-h][1]benzopyran-6(6aH)-one, 1,2,12,12a-tetrahydro-8,9-dimethoxy-2-(1-methylethenyl)-, (2R,6aS,12aS)-', '(-)-cis-Rotenone', '(-)-Rotenone', '(2R,6aS,12aS)-1,2,6,6a,12,12a-hexahidro-2-isopropenil-8,9-dimetoxicromeno[3,4-b]furo[2,3-h]cromen-6-ona', '(2R,6aS,12aS)-1,2,6,6a,12,12a-Hexahydro-2-isopropenyl-8,9-dimethoxychromeno[3,4-b]furo[2,3-h]chromen-6-on', '(2R,6AS,12aS)-1,2,6,6a,12,12a-hexahydro-2-isopropenyl-8,9-dimethoxychromeno[3,4-b]furo[2,3-h]chromen-6-one', '(2R,6aS,12aS)-1,2,6,6a,12,12a-hexa

##### **Stressors** and their relation with **AOP**

In [134]:
### Building list of AOP stressors

aop_stressor_relation = []

for aop_desrip in data["aop"]:

    ## fetch the id
    aop_id = aop_desrip.get("@id")

    ## fetch the aop node
    aop_node = enriched_aop_list.get(aop_id)

    ## getting the aop_stressor and their
    aop_stressors = aop_desrip.get("aop-stressors", {}).get("aop-stressor", None)

    if aop_stressors:
        if isinstance(aop_stressors, list):
                        
                        # Multiple AOP stressors
                        for stressor in aop_stressors:
                            stressor_id = stressor.get("@stressor-id")
                            ## fetching the stressors
                            stress_n = enriched_stressors_list.get(stressor_id)

                            ## fetching evidence
                            evidence = stressor.get("evidence")

                            # Create relationship between AOP and stressor
                            aop_stressor_relation_obj = Relationship(aop_node, "HAS_STRESSOR", stress_n)
                            aop_stressor_relation_obj["evidence"] = [evidence]
                            aop_stressor_relation_obj["relation_type"] = "HAS_STRESSOR"

                            aop_stressor_relation.append(aop_stressor_relation_obj)
                                    

        elif isinstance(aop_stressors, dict):
                
                ## Getting stressors ID
                stressor_id = stressor.get("@stressor-id")

                ## fetching the stressors
                stress_n = enriched_stressors_list.get(stressor_id)

                ## fetching evidence
                evidence = stressor.get("evidence")

                # Create relationship between AOP and stressor
                aop_stressor_relation_obj = Relationship(aop_node,"HAS_STRESSOR", stress_n)
                aop_stressor_relation_obj["evidence"] = [evidence]
                aop_stressor_relation_obj["relation_type"] = "HAS_STRESSOR"
                
                aop_stressor_relation.append(aop_stressor_relation_obj)

##### AOP and their relation with key events

1. Key event can be catgorized as molecular-initiating event, key-event and adverse-outcome
2. Between the key events, we can have multiple relationship

In [135]:
### building relation between MIE and adverse outcome

aop_mie = []

for aop_detail in data["aop"]:

    ## fetch the id
    aop_id = aop_detail.get("@id")

    ## fetch the aop node
    aop_node = enriched_aop_list.get(aop_id)
    
    ## fetching key event id
    mie_ = aop_detail.get("molecular-initiating-event",)

    if mie_:

        if isinstance(mie_,dict):

            event_id = mie_.get("@key-event-id")

            ## fetch event node
            eve_node = processed_keyEvent.get(event_id)

            ## supporting evidence
            support_eve = mie_.get("evidence-supporting-chemical-initiation")

            ## Build relationship
            aop_mie_relation = Relationship(aop_node, "HAS_MOLECULAR_INITIATING_EVENT", eve_node)
            aop_mie_relation["evidence_supporting_chemical_initiation"] = support_eve

            aop_ke_relation = Relationship(aop_node,"HAS_KEY_EVENT",eve_node)
            aop_ke_relation["relation_type"] = "HAS_KEY_EVENT"

            ## appending the relation
            aop_mie.append(aop_mie_relation)
            aop_mie.append(aop_ke_relation)
            

        if isinstance(mie_,list):

            ## Loop through multiple event associated with the aop
            for eve in mie_:

                event_id = eve.get("@key-event-id")
                
                ## fetch event node
                eve_node = processed_keyEvent.get(event_id)

                ## supporting evidence
                support_eve = eve.get("evidence-supporting-chemical-initiation")

                ## Build relationship
                aop_mie_relation = Relationship(aop_node, "HAS_MOLECULAR_INITIATING_EVENT", eve_node)
                aop_mie_relation["evidence_supporting_chemical_initiation"] = support_eve

                aop_ke_relation = Relationship(aop_node,"HAS_KEY_EVENT",eve_node)
                aop_ke_relation["relation_type"] = "HAS_KEY_EVENT"

                ## appending the relation
                aop_mie.append(aop_mie_relation)
                aop_mie.append(aop_ke_relation)

##### AOP and their relation with adverse outcome

In [136]:
### building relation between MIE and adverse outcome

aop_ao = []

for aop_detail in data["aop"]:

    ## fetch the id
    aop_id = aop_detail.get("@id")

    ## fetch the aop node
    aop_node = enriched_aop_list.get(aop_id)
    
    ## fetching key event id
    ao_ = aop_detail.get("adverse-outcome",)

    if ao_:

        if isinstance(ao_,dict):

            ao_id = ao_.get("@key-event-id")

            ## fetch ao node
            ao_node = processed_keyEvent.get(ao_id)

            ## supporting evidence
            support_ao = ao_.get("examples")

            ## Build relationship
            aop_ao_relation = Relationship(aop_node, "HAS_ADVERSE_OUTCOME", ao_node)
            aop_ao_relation["examples"] = support_ao
            aop_ao_relation["relation_type"] = "HAS_ADVERSE_OUTCOME"

            aop_ke_relation = Relationship(aop_node,"HAS_KEY_EVENT",ao_node)
            aop_ke_relation["relation_type"] = "HAS_KEY_EVENT"
            aop_ao.append(aop_ke_relation)

            ## appending the relation
            aop_ao.append(aop_ao_relation)
            

        if isinstance(ao_,list):

            ## Loop through multiple event associated with the aop
            for ao in ao_:
                ao_id = ao.get("@key-event-id")

                ## fetch ao node
                ao_node = processed_keyEvent.get(ao_id)

                ## supporting evidence
                support_ao = ao.get("examples")

                ## Build relationship
                aop_ao_relation = Relationship(aop_node,
                                               "HAS_ADVERSE_OUTCOME",
                                               ao_node)
                aop_ao_relation["relation_type"] = "HAS_ADVERSE_OUTCOME"
                aop_ao_relation["examples"] = support_ao

                aop_ke_relation = Relationship(aop_node,"HAS_KEY_EVENT",ao_node)
                aop_ke_relation["relation_type"] = "HAS_KEY_EVENT"
                
                
                ## appending the relation
                aop_ao.append(aop_ao_relation)
                aop_ao.append(aop_ke_relation)


##### AOP and their relation with key event

In [137]:
### building relation between MIE and adverse outcome

aop_ke = []

for aop_detail in data["aop"]:

    ## fetch the id
    aop_id = aop_detail.get("@id")

    ## fetch the aop node
    aop_node = enriched_aop_list.get(aop_id)
    
    ## fetching key event id
    ke_ = aop_detail.get("key-events",{}).get("key-event")

    if ke_:

        if isinstance(ke_,dict):
            
            ## KE event ID 
            ke_id = ke_.get("@key-event-id")

            ## fetch AO node
            ke_node = processed_keyEvent.get(ke_id)

            ## Build relationship
            aop_KE_relation = Relationship(aop_node,"HAS_KEY_EVENT", ke_node)
            aop_KE_relation["relation_type"] = "HAS_KEY_EVENT"

            ## appending the relation
            aop_ke.append(aop_KE_relation)

    
        if isinstance(ke_,list):

            ## Loop through multiple event associated with the aop
            for ke in ke_:

                ## Fetching the key event ID
                ke_id = ke.get("@key-event-id")

                ## fetch ao node
                ke_node = processed_keyEvent.get(ke_id)

                ## Build relationship
                aop_KE_relation = Relationship(aop_node, "HAS_KEY_EVENT", ke_node)
                aop_KE_relation["relation_type"] = "HAS_KEY_EVENT"

                ## appending the relation
                aop_ke.append(aop_KE_relation)

#### Key event relationship

In [139]:
ker_info = []
for i in data["key-event-relationship"]:
    for key,value in i.items():
        ker_info.append(key)

In [138]:
### Title,source,refrences,creation_timestamp,last-modification-timestamp will be considered as property of the KERs
## Need to look for description, can it be used as a property?

In [140]:
## Creating Nodes of Key event relationship

ker_nodes = {}
for ker in data["key-event-relationship"]: ## Looping over KER
    
    ## Fetching internal_identifier as well
    internal_id = ker.get("@id")
    
    ## Fetching KER ID for both event
    ker_upstream = ker.get("title").get("upstream-id")
    ker_downstream = ker.get("title").get("downstream-id")
    
    ## Creating title using both the keyevents
    ker_name = f"""{processed_keyEvent.get(ker_upstream).get("name")},
                 {processed_keyEvent.get(ker_downstream).get("name")}"""
    
    ## Feeding Node with information
    ker_data = {
        "name":ker_name,
        "source":ker.get("source"),
        "refrences":cleanhtml(ker.get("references")),
        "creation_timestamp":ker.get("creation-timestamp"),
        "last_modification_timestamp":ker.get("last-modification-timestamp"),
        "description":cleanhtml(ker.get("Description")),
        "quantitative_understanding": cleanhtml(ker.get("quantitative-understanding",{}).get("description")),
        "evidence_supporting_taxonomic_applicability":cleanhtml(ker.get("evidence-supporting-taxonomic-applicability"))}
    
    ## Creating Node of KER
    ker_node = Node("KEY_EVENT_RELATIONSHIP",**ker_data)
    
    ## add the node in mapping
    ker_nodes.setdefault(internal_id,ker_node)


In [141]:
### Relation of KER node and AOPs

ker_aop = []
for ker in data["aop"]:
    
    ## Fetched aop node
    aop_id = ker.get("@id") 
    aop_node = enriched_aop_list.get(aop_id)
    
    ## fetched ker
    relations = ker.get("key-event-relationships",{}).get("relationship")
    
    if isinstance(relations,list):
    
        for idx, rel in enumerate(relations):
            ker_id = rel.get("@id")
            ker_node = ker_nodes.get(ker_id) ## fetched ker nodes

            ### Relationship prperty
            rel_info = {
                "adjacency":rel.get("adjacency"),
                "quantitative_understanding_value":rel.get("quantitative-understanding-value"),
                "evidence":rel.get("evidence"),
                "order":idx+1,
            }

            ### Generating relations
            ker_aop_rel = Relationship(aop_node,"HAS_KER",ker_node,**rel_info)
            ker_aop_rel["relation_type"] ="HAS_KER"

            ## appending raltions
            ker_aop.append(ker_aop_rel)
            
            ## Else you can apply to see what kind of info is rejected
        
    

In [142]:
### Relation of KER with KE will be of upstream and downstream types

ker_ke_rel = []
for ker in data["key-event-relationship"]: ## Loop through KER
    
    ## Fetch ker node
    ker_id = ker.get("@id")
    ker_node = ker_nodes.get(ker_id)
    
    ker_title = ker.get("title") ## it is a dictionary containing upstream and downstream Id
    
    if ker_title:
        
        ## Upstream and downstream node fetched
        upstream_node = processed_keyEvent.get(ker_title.get("upstream-id")) 
        downstream_node = processed_keyEvent.get(ker_title.get("downstream-id"))

        ## Build relation between upstream and and KER's
        upstream_rel = Relationship(ker_node,"HAS_UPSTREAM_EVENT",upstream_node)
        upstream_rel["relation_type"] = "HAS_UPSTREAM_EVENT"
        ker_ke_rel.append(upstream_rel)
        
        downstream_rel = Relationship(ker_node,"HAS_DOWNSTREAM_EVENT",downstream_node)
        downstream_rel["relation_type"] = "HAS_DOWNSTREAM_EVENT"
        ker_ke_rel.append(downstream_rel)
    

##### Applicability node has been divided into 3 nodes type
1. sex
2. life-stage
3. taxonomy

1. SEX

In [143]:
### KER relation with sex node

ker_sex_rel = []
for i in data["key-event-relationship"]:
    ## fetching ker node
    ker_id = i.get("@id")
    ker_node = ker_nodes.get(ker_id) ## fetching KER node
    
    applicable = i.get("applicability") ## check node
    
    if applicable:
        
        sex = applicable.get("sex")
        if sex:
            
            if isinstance(sex,list):
                ## list of sexes
                for j in sex:
                    sex_node = sex_nodes.get(j.get("sex")) ## fetched sex node
                    evidence = j.get("evidence") ## evidence for relation
                    
                    sex_ker = Relationship(ker_node,"IS_APPLICABLE",sex_node,evidence_strength=evidence)
                    sex_ker["relation_type"] = "IS_APPLICABLE"
                    
                    ker_sex_rel.append(sex_ker)
                                    
            if isinstance(sex,dict):
                ## dict of sexes
                    sex_node = sex_nodes.get(sex.get("sex")) ## fetched sex node
                    evidence = sex.get("evidence") ## evidence for relation
                    
                    sex_ker = Relationship(ker_node,"IS_APPLICABLE",sex_node,evidence_strength=evidence)
                    sex_ker["relation_type"] = "IS_APPLICABLE"
                    ker_sex_rel.append(sex_ker)

2. Taxonomy

In [144]:
## KER relation with taxonomy

## Loop through KER relation and generate

tax_ker_rel = []
for i in data["key-event-relationship"]:
    
    ## fetching ker node
    ker_id = i.get("@id")
    ker_node = ker_nodes.get(ker_id) ## fetching KER node
    
    
    applicable = i.get("applicability") ## check node
    
    if applicable:
        taxonomy = applicable.get("taxonomy")
        if taxonomy:
            
            if isinstance(taxonomy,list):
                ## list of taxonomy
                for j in taxonomy:
                    taxonomy_node = taxonomy_list.get(j.get("@taxonomy-id")) ## fetched sex node
                    evidence = j.get("evidence") ## evidence for relation
                    
                    taxonomy_ker = Relationship(ker_node,"IS_APPLICABLE",taxonomy_node,evidence_strength=evidence)
                    taxonomy_ker["relation_type"] = "IS_APPLICABLE"
                    
                    tax_ker_rel.append(taxonomy_ker)
                                    
            if isinstance(taxonomy,dict):
                ## dict of sexes
                
                    taxonomy_node = taxonomy_list.get(taxonomy.get("@taxonomy-id")) ## fetched sex node
                    evidence = taxonomy.get("evidence") ## evidence for relation
                    
                    taxonomy_ker = Relationship(ker_node,"IS_APPLICABLE",taxonomy_node,evidence_strength=evidence)
                    taxonomy_ker["relation_type"] = "IS_APPLICABLE"
                    
                    tax_ker_rel.append(taxonomy_ker)
    

3. Life stages

Life stages mentioned in KER might be different from KE. Below code can be used to analyze the that

In [None]:
# ### Make a list of life-stage mentioned in ker as well
# #staged = []
# for i in data["key-event-relationship"]:
#     applicable = i.get("applicability") ## check node

#     if applicable:

#         life_stage = applicable.get("life-stage")

#         if life_stage:

#             if isinstance(life_stage,list):

#                 for j in life_stage:
#                     staged.append(j)
#             if isinstance(life_stage,dict):
#                 staged.append(life_stage)

In [None]:
# ## normalizing stages
# ker_life_stage_list = []
# for stage_ in staged:
#     ker_life_stage_list.append(stage_.get("life-stage"))

# ### List of unique life stages
# stage_set = list(set(ker_life_stage_list))

# ## filter out which are not in aop life_Stage nodes
# filtered_list = filter(lambda x:life_stage_node.get(x) ,stage_set)

In [153]:
# ## Here basically two different life stages were extra mentioned
# set(stage_set).difference(set(filtered_list))

In [146]:
### KER relation with life-stage
life_stage_ker_rel = []
for i in data["key-event-relationship"]:
    
    ## fetching ker node
    ker_id = i.get("@id")
    ker_node = ker_nodes.get(ker_id) ## fetching KER node
    
    
    applicable = i.get("applicability") ## check node
    
    if applicable:
        life_stage = applicable.get("life-stage")
        if life_stage:
            
            if isinstance(life_stage,list):
                ## list of taxonomy
                for j in life_stage:

                    ## Here we will retrive the life stage nodes and link with the KER

                    if (j.get("life-stage") == "Adults"):
                        stage_name = "Adult"

                    elif (j.get("life-stage") == "Fetal"):
                        stage_name = "Foetal"
                    
                    ## Stage name
                    else:
                        stage_name = j.get("life-stage")


                    evidence = j.get("evidence") ## evidence for relation
                    

                    stage_node = life_stage_node.get(stage_name) ## node of life_stage

                    ## stage node
                    ker_stage_rel = Relationship(ker_node,"IS_APPLICABLE",stage_node)
                    ker_stage_rel["evidence"] = evidence
                    ker_stage_rel["relation_type"] = "IS_APPLICABLE"

                    life_stage_ker_rel.append(ker_stage_rel)

                                                        
            if isinstance(life_stage,dict):
                ## dict of LIFE_STAGES
                
                    if (life_stage.get("life-stage") == "Adults"):
                        stage_name = "Adult"

                    elif (life_stage.get("life-stage") == "Fetal"):
                        stage_name = "Foetal"
                    
                    ## Stage name
                    else:
                        stage_name = life_stage.get("life-stage")
                    
                    ## EVIDENCE
                    evidence_name = life_stage.get("evidence")

                    stage_node = life_stage_node.get(stage_name) ## node of life_stage
                    
                    evidence = life_stage.get("evidence") ## evidence for relation
                    
                    ## stage node
                    ker_stage_rel = Relationship(ker_node,"IS_APPLICABLE",stage_node)
                    ker_stage_rel["evidence"] = evidence
                    ker_stage_rel["relation_type"] = "IS_APPLICABLE"

                    life_stage_ker_rel.append(ker_stage_rel)

#### WOE and KER relationship

In [147]:
### ANalyzing weight of evidence linkage with KER
# 1. Weight of evidence node will contain lisf of textual properties
## Loop through KER and find the KER which has WOE and build node of WOE adn attach with KER node

woe_ker_rel = []
for i in data["key-event-relationship"]:
    
    ## fetching ker node
    ker_id = i.get("@id")
    ker_node = ker_nodes.get(ker_id) ## fetching KER node
    
    
    woe = i.get("weight-of-evidence") ## check node
    
    if woe:
        
        ## Node information winding
        woe_node_info = {"value":cleanhtml(woe.get("value")),
                        "biological_plausibility":cleanhtml(woe.get("biological-plausibility")),
                        "emperical_support_linkage":cleanhtml(woe.get("emperical-support-linkage")),
                        "uncertainties_or_inconsistencies":cleanhtml(woe.get("uncertainties-or-inconsistencies"))}
        
        ## creatinf node with this info
        woe_node = Node("WOE",**woe_node_info)
        
        ## attaching this node with respective KER
        ker_woe = Relationship(ker_node,"HAS_WEIGHT_OF_EVIDENCE",woe_node)
        ker_woe["relation_type"] = "HAS_WEIGHT_OF_EVIDENCE"
        
        ## appending evidences
        woe_ker_rel.append(ker_woe)


#### Nodes for biological event

In [149]:
def BioObject_maker(data,bio_object_id,):
    """ This function will help to make node and relation with bioObject and KeyEvent """

    ### Biological object node
    for object in data["biological-object"]:
        biological_object_id = object.get("@id")

        if biological_object_id == bio_object_id:
            object_data = {
                            "source_id":object.get("source-id"),
                            "source":object.get("source"),
                            "name":object.get("name")}
            
            ## Process Nodes
            bio_object_node = Node("BIO_OBJECT",**object_data)
            #object_nodes.append(bio_object_node)

            return bio_object_node

In [150]:
def BioAction_maker(data,bio_action_id,):
    """ This function will help to make node and relation with bioAction and KeyEvent """

    ### Biological action list
    for action in data["biological-action"]:
        biological_action_id = action.get("@id")
        
        if biological_action_id == bio_action_id:
                    action_data = {
                                    "source_id":action.get("source-id"),
                                    "source":action.get("source"),
                                    "name":action.get("name")}
                        
                    ## Process Nodes
                    bio_action_node = Node("BIO_ACTION",**action_data)
                    #action_nodes.append(bio_action_node)

                    
                    return bio_action_node

In [151]:
def BioProcess_maker(data,bio_process_id):
    """ This function will help to make node and relation with bioprocess and KeyEvent """

    ### Biological process list
    for process in data["biological-process"]:

        biological_process_id = process.get("@id")

        if biological_process_id == bio_process_id:
            process_data = {
                            "source_id":process.get("source-id"),
                            "source":process.get("source"),
                            "name":process.get("name")}
                        
            ## Process Nodes
            
            bio_process_node = Node("BIO_PROCESS",**process_data)
    
            return bio_process_node

In [152]:
## Key event wihtout any biological event mentioed
event_less = []
for i in data["key-event"]:
    if i.get("biological-events") is None:
        event_less.append(i)

print(f"out of {len(data['key-event'])} key events, {len(event_less)} are event less")
        

out of 1497 key events, 631 are event less


In [153]:
### creating nodes and relation of bioprocess

### Processed Nodes
process_nodes = {}
action_nodes = {}
object_nodes = {}

### Processed Relations (between key-event and)

event_process_relation = []
event_object_relation = []
event_action_relation = []

        
# Extract biological process, action, and object if available from the network
for event_info in data["key-event"]:
    key_event_ref = event_info.get("@id")

    ## extracting the biological events
    events = event_info.get("biological-events", {}).get("biological-event", {})
    
    if isinstance(events, list):
        
        for event in events:
            bio_process_id = event.get("@process-id")
            bio_action_id = event.get("@action-id")
            bio_object_id = event.get("@object-id")
            
            if bio_process_id:
                node_ = BioProcess_maker(data=data,bio_process_id=bio_process_id,)
                process_nodes.setdefault(bio_process_id,node_)
                event_process_relation.append((key_event_ref,node_))

            if bio_action_id:
                node_ = BioAction_maker(data=data,bio_action_id=bio_action_id,)
                action_nodes.setdefault(bio_action_id,node_)
                event_action_relation.append((key_event_ref,node_))

            if bio_object_id:
                node_ = BioObject_maker(data=data,bio_object_id=bio_object_id,)
                object_nodes.setdefault(bio_object_id,node_)
                event_object_relation.append((key_event_ref,node_))

    if isinstance(events, dict):
            bio_process_id = events.get("@process-id")
            bio_action_id = events.get("@action-id")
            bio_object_id = events.get("@object-id")

            if bio_process_id:
                node_ = BioProcess_maker(data=data,
                                         bio_process_id=bio_process_id,
                                         )
                
                process_nodes.setdefault(bio_process_id,
                                         node_)
                
                event_process_relation.append((key_event_ref,
                                               node_))

            if bio_action_id:
                node_ = BioAction_maker(data=data,
                                        bio_action_id=bio_action_id,
                                        )
                action_nodes.setdefault(bio_action_id,node_)
                event_action_relation.append((key_event_ref,node_))

            if bio_object_id:
                node_ = BioObject_maker(data=data,bio_object_id=bio_object_id,)
                object_nodes.setdefault(bio_object_id,node_)
                event_object_relation.append((key_event_ref,node_))

            

##### Building relation between key event and bioproces,bioobject and bioaction

In [154]:
## Relation between key event and bioprocess
ke_pro = []
for key,value in event_process_relation:
    kev = processed_keyEvent.get(key)

    relation = Relationship(kev,"HAS_BIOPROCESS",value)
    relation["relation_type"] = "HAS_BIOPROCESS"
    ke_pro.append(relation)

In [155]:
## Relation between key event and bioobject
ke_oboject = []
for key,value in event_object_relation:
    kev = processed_keyEvent.get(key)

    relation = Relationship(kev,"HAS_BIOOBJECT",value)
    relation["relation_type"] = "HAS_BIOOBJECT"
    ke_oboject.append(relation)

In [156]:
## Relation between key event and bioaction
ke_action = []
for key,value in event_action_relation:
    kev = processed_keyEvent.get(key)

    relation = Relationship(kev,"HAS_BIOACTION",value)
    relation["relation_type"] = "HAS_BIOACTION"
    ke_action.append(relation)

#### collecting all nodes and relationship for Network

In [157]:
### collecting all the nodes
aop_list = [aop for ref,aop in enriched_aop_list.items()]
stressor_list = [stressor for ref,stressor in enriched_stressors_list.items()]
chemical_list = [chemical for ref,chemical in enriched_chemical_list.items()]
ke_list = [event for ref,event in processed_keyEvent.items()]
process_list = [process for ref,process in process_nodes.items()]
object_list = [object for ref,object in object_nodes.items()]
action_list = [action for ref,action in action_nodes.items()]
taxonomy_ls = [taxon for ref,taxon in taxonomy_list.items()]
bio_level = [biolevel for ref,biolevel in bio_level_node.items()]
organ_level = [org for ref,org in organ_nodes.items()]
ker_list = [ker for ref,ker in ker_nodes.items()]
sex_list = [sex for ref,sex in sex_nodes.items()]
cell_list = [cell for ref,cell in cell_nodes.items()]
entites_list = [ent for ref,ent in entity_ref.items()]
life_satge_nodes = [ stage for ref,stage in life_stage_node.items()]

In [158]:
### merging all nodes into single node list
node_list = [*aop_list,
             *stressor_list,
             *chemical_list,
             *ke_list,
             *process_list,
             *object_list,
             *action_list,
             *taxonomy_ls,
             *bio_level,
             *ker_list,
             *sex_list,
            *organ_level,
            *cell_list,
            *entites_list,
            *life_satge_nodes]

In [159]:
### Merging relation list

relation_list = [*aop_stressor_relation,
                 *stressor_chem_relation,
                 *aop_ke,
                 *aop_mie,
                 *aop_ao,
                 *ke_pro,
                 *ke_oboject,
                 *ke_action,
                 *aop_taxonomy_relation,
                 *all_sex_rel,
                 *aop_assesment_relationship,
                 *level_ke_relations,
                 *organ_event_rel,
                 *woe_ker_rel,
                 *tax_ker_rel,
                 *ker_ke_rel,
                 *ker_aop,
                 *ker_sex_rel,
                 *cell_event_rel,
                 *entity_rels,
                 *life_stage_rel,
                 *life_stage_ker_rel,
                 *life_stage_rel_ke,
                 *ke_taxonomy_rel,
                 *ke_sex_rel
                ]

#### Feeding nodes and relationship into graph database

In [160]:
from py2neo import Subgraph
sub_graph = Subgraph(node_list,relation_list)

In [191]:
### Feed relation into neo4j graph
from py2neo import Graph,Node,Relationship,Subgraph
aop_graph = Graph("http://neo4j:7474",password='1234',name="neo4j",)

In [161]:
from py2neo import Graph,Node,Relationship,Subgraph
aop_graph = Graph("neo4j+s://60584a5d.databases.neo4j.io",password='p9LwMsdHinSdQ4o3aplF5mokAJxmJ1hjjeJ5pKp5tE0',name="neo4j",)

In [162]:
neo4j_connect = aop_graph.begin()

In [193]:
## NOTE: To delete all the information and reupdate it

#aop_graph.delete_all()

In [163]:
neo4j_connect.create(sub_graph)

In [164]:
neo4j_connect.commit()

  neo4j_connect.commit()
