# Extract Molecular Initiating Events from AOP-Wiki for nanomaterials

- Step 1 - Setup
- Step 2 - Extracting all Molecular Initiating Events
- Step 3 - Searching the data for Molecular Initiating Events based on literature search outcome
- Step 4 - Combine, clean and export output


## Step 1 - Setup
### Imports

In [20]:
import sys

!{sys.executable} -m pip install --upgrade pip 
!{sys.executable} -m pip install watermark

try:
    import pandas as pd
except ImportError:
    !{sys.executable} -m pip install pandas
    import pandas as pd

try:
    from SPARQLWrapper import SPARQLWrapper, JSON
except ImportError:
    !{sys.executable} -m pip install sparqlwrapper
    from SPARQLWrapper import SPARQLWrapper, JSON
#pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 99999)
pd.set_option('display.max_columns', 99999)



### Functions

In [9]:
def search(df,term):
    x = set()
    titl = []
    des = []
    for index,row in df.iterrows():
        if term.lower() in row['Title'].lower():
            titl.append(row['ID'])
            x.add(row['ID'])
        if term.lower() in row['Description'].lower():
            des.append(row['ID'])
            x.add(row['ID'])
    if x == set():
        print(term + " not found")
    if titl != []:
        print(term + " found in Title of " + str(','.join(titl)))
    if des != []:
        print(term + " found in Description of " + str(','.join(des)))
    return x

## Step 2 - Extracting all Molecular Initiating Events

In [23]:
sparqlendpoint = SPARQLWrapper("http://localhost:8890/sparql/")

In [30]:
sparqlquery = '''
    SELECT DISTINCT (fn:substring(?MIE_ID,4) as ?ID) (str(?MIE_Title) as ?Title) ?MIE
    WHERE {
    ?AOP a aopo:AdverseOutcomePathway; aopo:has_molecular_initiating_event ?MIE.
    ?MIE rdfs:label ?MIE_ID; dc:title ?MIE_Title
    }
    '''

sparqlendpoint.setQuery(sparqlquery)
sparqlendpoint.setReturnFormat(JSON)  
results = sparqlendpoint.query().convert()

df = pd.DataFrame(columns=["ID","Title","URL"])
for result in results["results"]["bindings"]:
    df.loc[len(df.index)] = [result["ID"]["value"],result["Title"]["value"],result["MIE"]["value"]]

df = df.sort_values(by=['ID'])

display(df)

Unnamed: 0,ID,Title,URL
0,1002,"Inhibition, Deiodinase 2",https://identifiers.org/aop.events/1002
1,1009,"Inhibition, Deiodinase 1",https://identifiers.org/aop.events/1009
44,1018,"Activation, Glutamate-gated chloride channels",https://identifiers.org/aop.events/1018
45,1028,"Activation of specific nuclear receptors, PPAR...",https://identifiers.org/aop.events/1028
46,103,"Increase, Ecdysone receptor agonism",https://identifiers.org/aop.events/103
47,1038,"Activation, beta-2 adrenergic receptor",https://identifiers.org/aop.events/1038
48,1064,"prepubertal increase, Estrogen receptor (ER) a...",https://identifiers.org/aop.events/1064
49,1103,"Inhibition, Cyclooxygenase 1 activity",https://identifiers.org/aop.events/1103
50,111,"Agonism, Estrogen receptor",https://identifiers.org/aop.events/111
51,1114,"Unknown, MIE",https://identifiers.org/aop.events/1114


In [31]:
len(df)

205

## Step 3 - Searching the data for Molecular Initiating Events based on literature search outcome

In [18]:
searchterms = ['reactive oxygen species','ROS formation','ROS generation','Cellular sensing of substance or substance induced damage','Induction of IL1β and TNFα','Cellular sensing of the substance-induced damage resulting in the release of danger signals','Lysosomal injury','No effect','ROS production','DNA damage','interruption of calcium sensing receptors','production of free radicals','induction of IL6','interference with signalling molecules','activation of signalling molecules','inhibition of cellular receptors (dopaminergic, adrenergic, cannabinoid)','DNA damage','apoptotic stimuli','decreased inflammatory response','histone modifications','Pulmonary inflammation','genotoxicity','cell cycle regulation/progression/control','apoptosis','proliferation','pulmonary and cardiac effects','carcinogenicity','ROS (increases in the levels of malondialdehyde (MDA))','mitochondrial dysfunction','the toxicity in the rats dosed with copper microparticles was negligible','ROS production through endoplasmic reticulum stress','activation of Nrf2 singling pathway and increased catalase activity','promoted activities of SOD and Gpx','activating the NLRP3 inflammasomesactivation of post-transcriptional mechanism','condensed cellular size with reduced numbers of protrusions','increased the number and length of protrusions','induction of oxidative stress','disrupt macrophage function','Direct stress of epithelium','Alveolar Macrophage activation','reactions with endogenous molecules','surface silanol disorganization','inducing membranolysis','Alter genome methylation','Interaction with cell membrane','DNA','cell death','mitochondrial','ROS ']

In [28]:
#for item in searchterms:
#    output = search(df,item)
#    if not output == set():
#        print(str(item) + ' gives ' + str(output))

In [7]:
## some are missing, because our SPARQL query filters the one that are indicated as MIE for AOPs. We could filter to be "molecular" instead for more results
