# Dataframes

In [1]:
import pandas as pd
import numpy as np
import re
!pip3 install rdflib sparqlwrapper pydotplus graphviz



In [2]:
df = pd.read_csv("./model_algorithm_added.csv") # read excel data - dataframe
df_nms=pd.read_csv("./nanomaterials_final_df.csv")
df_coatings=pd.read_csv("./compounds_all_new.csv")

In [3]:
df_nms['CID'] = df_nms['CID'].fillna(0).astype(int)
df.at[46,"algo"] = "Stepwise-MLR PLS"
df.at[46,"algoName"] = "Stepwise_Multiple_Linear_Regression Partial_Least_Squares"

## Namespaces

In [50]:
from rdflib import Graph, URIRef, Literal, BNode, Namespace
from rdflib.namespace import DC, RDFS, FOAF, DCTERMS, VOID, RDF, XSD, OWL, SKOS
# CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
# PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
# VOID, XMLNS, XSD

sbd = Namespace("https://www.sbd4nano.eu/rdf/#")
ECO = Namespace("https://evidenceontology.org/#")
kb = Namespace("https://h2020-sbd4nano.github.io/sbd-data-landscape/")
enm = Namespace("http://purl.enanomapper.org/onto/")
ncit = Namespace("http://purl.obolibrary.org/obo/NCIT")
sio = Namespace("http://semanticscience.org/resource/")
CHEBI = Namespace("http://purl.obolibrary.org/obo/")
STATO = Namespace("http://purl.obolibrary.org/obo/")
PATO = Namespace("http://purl.obolibrary.org/obo/") #Phenotype And Trait Ontology
MESH = Namespace("http://purl.bioontology.org/ontology/MESH/") #Medical Subject Headings
OBI = Namespace("http://purl.obolibrary.org/obo/") #Ontology for biomedical investigations
CL = Namespace("http://purl.obolibrary.org/obo/") #Cell Ontology
PUBCHEM = Namespace("https://pubchem.ncbi.nlm.nih.gov/compound/")
npo = Namespace("http://purl.bioontology.org/ontology/npo#")
BAO = Namespace("http://www.bioassayontology.org/bao#")
RCD = Namespace("http://purl.bioontology.org/ontology/RCD/") #Read Codes, Clinical TermsVersion 3 (CTV3)
AFO = Namespace("http://purl.allotrope.org/ontologies/result#")
OPB = Namespace("http://bhi.washington.edu/OPB#") #Ontology for Physics for Biology
NCI = Namespace("http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#") #thesaurus
OAE = Namespace("http://purl.obolibrary.org/obo/") #Ontology Adverse Effects
FIX = Namespace("http://purl.org/obo/owl/FIX#")
OBO = Namespace("http://purl.obolibrary.org/obo/")
STY = Namespace("http://purl.bioontology.org/ontology/STY/")

# Functions

## Size Node

In [51]:
# Methods data size extraction

def add_size_information(size_node, size_value, g):      
    g.add((nanoparticle, npo.has_quality, size_node)) #size
    g.add((size_node, RDF.type, PATO.PATO_0000117))
    g.add((size_node, npo.has_unit_of_measure, npo.NPO_1246))
    s = size_value.lower()
    if str(s)=="nan" or str(s)=="  ":
        return
    elif extract_size_kind(size_node, s, g):
        extract_size_kind(size_node, s, g)
    elif extract_medium(size_node, s, g):
        extract_medium(size_node, s, g)
    elif extract_data_type(size_node, s, g):
        extract_data_type(size_node, s, g)  
def extract_size_kind(size, data, g):
    bn = BNode()
    if any(re.sub(r'[\W_]+', '', word) in size_dict.keys() for word in str(data).split()): #if any word characters exist in dictionary
        k = [re.sub(r'[\W_]+', '', word) for word in str(data).split() if re.sub(r'[\W_]+', '', word) in size_dict.keys()][0]
        g.add((size, SKOS.narrower, size_dict[k][0]))
        g.add((size_dict[k][0], RDFS.label, Literal(size_dict[k][1], lang='en')))
    elif ") = " in size:
        k = 'diameter'
        g.add((size, SKOS.narrower, size_dict[k][0]))
        g.add((size_dict[k][0], RDFS.label, Literal(size_dict[k][1], lang='en')))
    else:
        k = 'particle size'
        g.add((size, SKOS.narrower, size_dict[k][0]))
        g.add((size_dict[k][0], RDFS.label, Literal(size_dict[k][1], lang='en')))

def extract_medium(size, data, g):
    if any(re.sub(r'[\W_]+', '', word) in medium_dict.keys() for word in str(data).split()):
        k = [re.sub(r'[\W_]+', '', word) for word in str(data).split() if re.sub(r'[\W_]+', '', word) in medium_dict.keys()][0]
        g.add((size, npo.contained_in, medium_dict[k][0]))
        g.add((medium_dict[k][0], RDFS.label, Literal(medium_dict[k][1], lang='en')))


def extract_data_type(size, data, g):
    bn = BNode()
    bn1 = BNode()
    if 'up to ' in data:
        upper = float(re.sub(r'[^0-9.]+', '', data))        #todo fix this
        g.add((size, sio.SIO_000300, bn))
        g.add((bn, RDF.type, bn1))
        g.add((bn1, RDF.type, data_dict['range'][0]))
        g.add((bn1, RDFS.label, data_dict['range'][1]))
        g.add((bn, sio.SIO_000300, Literal("<"+str(upper))))
        g.add((bn, sbd.upper_limit, Literal(str(upper))))
    elif 'grater than' in data:
        lower = re.sub(r'[^0-9.]+', '', data.split('-')[0])
        g.add((size, sio.SIO_000300, bn))
        g.add((bn, RDF.type, bn1))
        g.add((bn1, RDF.type, data_dict['range'][0]))
        g.add((bn1, RDFS.label, data_dict['range'][1]))
        g.add((bn, sio.SIO_000300, Literal(">"+str(lower))))
        g.add((bn, sbd.lower_limit, Literal(str(lower))))
    elif 'â±' in data:
        if 'tem' in data:
            return
        lower = float(re.sub(r'[^0-9.]+', '', data.split('â±')[0]))-float(re.sub(r'[^0-9.]+', '', data.split('â±')[1]))
        upper = float(re.sub(r'[^0-9.]+', '', data.split('â±')[0]))+float(re.sub(r'[^0-9.]+', '', data.split('â±')[1]))
        g.add((size, sio.SIO_000300, bn))
        g.add((bn, RDF.type, bn1))
        g.add((bn1, RDF.type, data_dict['range'][0]))
        g.add((bn1, RDFS.label, data_dict['range'][1]))
        g.add((bn, sio.SIO_000300, Literal(str(lower) + '-' + str(upper))))
        g.add((bn, sbd.lower_limit, Literal(str(lower))))
        g.add((bn, sbd.upper_limit, Literal(str(upper)))) 
    elif '-' in data:
        lower = re.sub(r'[^0-9.]+', '', data.split('-')[0])
        upper = re.sub(r'[^0-9.]+', '', data.split('-')[1])
        g.add((size, sio.SIO_000300, bn))
        g.add((bn, RDF.type, bn1))
        g.add((bn1, RDF.type, data_dict['range'][0]))
        g.add((bn1, RDFS.label, data_dict['range'][1]))
        g.add((bn, sio.SIO_000300, Literal(str(lower) + '-' + str(upper))))
        g.add((bn, sbd.lower_limit, Literal(str(lower))))
        g.add((bn, sbd.upper_limit, Literal(str(upper)))) 
    elif ',' in data:
        for d in data.split(','):
            g.add((size, sio.SIO_000300, bn))
            g.add((bn, RDF.type, bn1))
            g.add((bn1, RDF.type, data_dict['value'][0]))
            g.add((bn1, RDFS.label, data_dict['value'][1]))
            g.add((bn, sio.SIO_000300, Literal(re.sub(r'[^0-9.]+', '', d))))
    elif 'average' in data:  
        g.add((size, sio.SIO_000300, bn))
        g.add((bn, RDF.type, bn1))
        g.add((bn1, RDF.type, data_dict['average'][0]))
        g.add((bn1, RDFS.label, data_dict['average'][1]))
        g.add((bn, sio.SIO_000300, Literal(re.sub(r'[^0-9.]+', '', data))))
    else:
        g.add((size, sio.SIO_000300, bn))
        g.add((bn, RDF.type, bn1))
        g.add((bn1, RDF.type, data_dict['value'][0]))
        g.add((bn1, RDFS.label, data_dict['value'][1]))
        g.add((bn, sio.SIO_000300, Literal(re.sub(r'[^0-9.]+', '', data))))  # check if this works for everything


## Dictionaries

### Nanoparticles dictionaries

In [52]:
# Dictionaries nanoparticles
general_type_dict = {'metal oxide': [npo.NPO_1541, Literal("Metal Oxide", lang="en")], 
                     'metal': [npo.NPO_1384, Literal("Metal", lang="en")], 
                     'carbon-based': [npo.NPO_836, Literal("Carbon-based", lang="en")], #biodegradable -> no carbon-based onto, neither organic
                     'lipid-based': [npo.NPO_1552, Literal("Lipid-based", lang="en")], 
                     'polymeric': [npo.NPO_1375, Literal("Polymeric", lang="en")], 
                     'dendrimer': [npo.NPO_735, Literal("Dendrimer", lang="en")]}



nms_dict = {"Ag": [npo.NPO_1892, Literal("Ag nanoparticle")],
            "Ag2O": [MESH.C040225, Literal("Disilver Oxide Ag2O")], ###### MESH
            "Au": [npo.NPO_401, Literal("Au nanopartice")],
            "Al": [npo.NPO_103, Literal("Al nanoparticle")],
            "Fullerene C60": [npo.NPO_730, Literal("Fullerene C60")],
            "Fullerene C60 and C70": [npo.NPO_686, Literal("Fullerene C60 and C70")],
            "Fullerenes C60": [npo.NPO_730, Literal("Fullerene C60")],
            "(Fullerenes C60 and C70)": [npo.NPO_686, Literal("Fullerene C60 and C70")],
            "(Fe2O3)n(Fe3O4)m": [npo.NPO_729, Literal("(Fe2O3)n(Fe3O4)m")],
            "Fe2O3": [npo.NPO_1550, Literal("Fe2O3")],
            "Fe3O4": [npo.NPO_1548, Literal("Fe3O4")],
            "FeO": [npo.NPO_1150, Literal("FeO")],
            "(Single/Multiwalled nanotubes)": [npo.NPO_606, Literal("Single/Multiwalled nanotubes")],
            "CNT: Carbon nanotubes": [npo.NPO_606, Literal("CNT: Carbon nanotubes")],
            "MWCNT40nm-COOH": [npo.NPO_354, Literal("MWCNT40nm-COOH")],
            "MWCNTs (Multi-walled carbon nanotubes)": [npo.NPO_354, Literal("MWCNTs (Multi-walled carbon nanotubes)")],
            "R-TiO2": [npo.NPO_1486, Literal("R-TiO2")],
            "SWNTs: Single-walled nanotubes": [npo.NPO_943, Literal("SWNTs: Single-walled nanotubes")],
            "SiO2": [npo.NPO_1373, Literal("SiO2")],
            "TiO2": [npo.NPO_1486, Literal("TiO2")],
            "ZnO": [npo.NPO_1542, Literal("ZnO")],
            "Al2O3": [obo.CHEBI_30187, Literal("Al2O3")],
            "Co NPs": [enm.ENM_9000248, Literal("Cobalt nanoparticle")],
            "Co": [obo.CHEBI_33888, Literal("Cobalt molecular entity")],
            "CoO": [enm.ENM_0000118, Literal("Cobalt (II) oxide nanoparticle")],
            "Co3O4": [enm.ENM_9000254, Literal("Co3O4 nanoparticle")],
            "Co ions": [obo.CHEBI_23336, Literal("Cobalt cations")],
            "Co3O4": [enm.ENM_9000254, Literal("Co3O4 nanoparticle")],
            "CoFe2O4": [MESH.C569492, Literal("cobalt ferrite (CoFe2O4)")], ######### MESH
            "3Al2O3 2SiO2": [MESH.C049037, Literal("Mullite 3Al2O3 2SiO2")],
            "AlN": [obo.CHEBI_50884, Literal("Aluminium nitride")],
            "As2O5": [MESH.C042120, Literal("Arsenic pentoxide")],
            "BaO": [MESH.C0174148, Literal("Barium oxide")],
            "BeO": [MESH.C0053349, Literal("Beryyllium oxide")]}


### Size

In [53]:
size_dict = {'particle size': [npo.NPO_1694, "Particle size"], 
             'diameter': [obo.PATO_0001334, "Diameter"], 
             'radius': [obo.PATO_0002390, "Radius"], 
             'aggregation': [npo.NPO_1967, "Aggregation"], #class not realated to size
             'length': [obo.PATO_0000122, "Length"]} 

medium_dict = {'water': [obo.CHEBI_15377, Literal("Water")],
               'pbs': [npo.NPO_1846, Literal("Phosphate Buffer Saline")]}
#                'ccm': [Literal("No NPO term"), Literal("CCM")]}   ########### need to add an ontology

data_dict = {'value': [npo.NPO_1807, Literal("Size value")],
             'range': [STATO.STATO_0000035, Literal("Range")],
             'average': [npo.NPO_1800, Literal("Average")], #NPO_1822 mean particle size
             'median': [npo.NPO_1801, Literal("Median")]}

### Shape

In [54]:
shape_dict = {
#     "coarse porous surface with irregular pores": [Literal("NPO term not defined"), Literal("Not defined")], #no ontology/Irregular?
            "elliptical": [npo.NPO_850 , Literal("Elliptical")],
            "fiber": [npo.NPO_461 , Literal("Fiber")], #rod-shaped
#             "film": [Literal("NPO term not defined"), Literal("Film")], ########### need to add an ontology
            "hexagonal crystal system": [npo.NPO_858 , Literal("Hexagonal")], #polyhedral
#             "irregular": [Literal("NPO term not defined"), Literal("Irregular")], ########### need to add an ontology
            "needle": [npo.NPO_852, Literal("Needle")],
            "pyramidal": [npo.NPO_858 , Literal("Pyramidal")], #polyhedral
            "spherical": [npo.NPO_286 , Literal("Spherical")],
            "spherical, grains, rods, or needles": [npo.NPO_286 , Literal("Spherical")],########### NEED TO FIX THIS
            "sphericity and circularity": [npo.NPO_286 , Literal("Spherical")],
#             "strip": [Literal("NPO term not defined"), Literal("Strip")], ############# need to add an ontology
            "fiber (MWCNT)": [npo.NPO_461 , Literal("Fiber")], #rod-shaped
            "polyhedral": [npo.NPO_858 , Literal("Polyhedral")],
            "pseudo-spherical": [npo.NPO_286 , Literal("Pseudo-spherical")], #spherical
            "rod": [npo.NPO_461 , Literal("Rod")],
#             "slice-shaped": [Literal("NPO term not defined"), Literal("Slice-shaped")],  ########### need to add an ontology
            "spherical (C60)": [npo.NPO_286 , Literal("Spherical")]} #spherical

In [55]:
model_dict = {'QSAR': [MESH.D021281, Literal("QSAR", lang="en")], 
              'QSPR': [MESH.D021281, Literal("QSPR", lang="en")] }

### Endpoint Dictionaries

In [56]:
endpointlog_dict = {'Cell': [STY.T025, Literal("Cell", lang="en")], 
                     'Organism': [STY.T001, Literal("Organism", lang="en")] }

design_dict = {'In vitro': [obo.OBI_0001285, Literal("In vitro")], 
                 'In vivo': [obo.OBI_0100026, Literal("In vivo")]}

specific_endpoint_dict = { "Bacteria Escherichia Coli (E. Coli)": [MESH.D004926, "Bacteria Escherichia Coli (E. Coli)"],
                           "Escherichia coli (bacteria)": [MESH.D004926, "Bacteria Escherichia Coli (E. Coli)"],
                            "Immortalized rat L2 lung epithelial cells": [RCD.X7A8D, "Lung L2 (rat)"],
                            "A549 (H)": [MESH.D000072283, "An adenocarcinoma cell line (human lung epithelial cells)"],
                            "A549 human lung epithelial carcinoma cells": [MESH.D000072283, "An adenocarcinoma cell line (human lung epithelial cells)"],
                            "A549 human lung epithelial cells": [MESH.D000072283, "An adenocarcinoma cell line (human lung epithelial cells)"],
                            "Pancratic humane cancer cells (PaCa2)": [obo.CLO_0050101, "Pancratic humane cancer cells (PaCa2)"],
                            "Human keratinocyte cell line (HaCaT)": [MESH.D000084282, "Human keratinocyte cell line (HaCaT)"],
                            "Macrophage": [obo.CL_0000235, "Macrophage"]}
                                                                  
endpoint_dict = {"Cellular uptake": [npo.NPO_296, "Cellular uptake of extracellular material"],
    "Cytotoxicity": [BAO.BAO_0002993, "Cytotoxicity assay"],
    "Mutagenicity": [enm.ENM_0000042, "Mutagenicity"],
    "Cell differentiation response": [obo.OBI_0001429, "Cell differentiation"],
    "Exocytosis": [MESH.D005089, "Exocytosis"],
    "Solubility in organic solvents": [enm.ENM_9000082, "Solubility in organic solvents"], #<<<<<<< organic?
    "Solubility in water": [npo.NPO_500, "Solubility in water"],
    "Solubility in chlorobenzene": [enm.ENM_9000082, "Solubility in organic solvents"],
    "Poisson's ratio": [OPB.OPB_01022, "Poisson ratio"],
    "LC structure": [FIX.FIX_0000608, "LC - Liquid Chromatography structure" ],#<<<<<< straaaaange
    "Dispersion in organic solvents": [npo.NPO_1969, "Dispersion"],#<<<<<<
    "Adsorption / removal (%)": [NCI.C157206, "Adsorption"],
    "Zeta potential (Î¶)": [npo.NPO_1302, "Zeta potential"],
    "Rodent lung inflammation": [obo.OAE_000056, "Lung inflamation AE"], #<<< rodent..
    "Toxic Effect": [BAO.BAO_0002189, "Toxicity Assay"],
    "ROS": [obo.CHEBI_70982, "ROS - reactive oxygen species"]}

# Main Code

## RDF formation

In [62]:
from __future__ import print_function
g = Graph()
g.bind("dc", DC)
g.bind("rdfs", RDFS)
g.bind("foaf", FOAF)
g.bind("dcterms", DCTERMS)
g.bind("dc", DC)
g.bind("void", VOID)
g.bind("rdf", RDF)
g.bind("xsd", XSD)
g.bind("eco", ECO)
g.bind("sbd", sbd)
g.bind("kb", kb)
g.bind("enm", enm)
g.bind("ncit", ncit)
g.bind("owl", OWL)
g.bind("sio", sio)
g.bind("CHEBI", CHEBI)
g.bind("SKOS", SKOS)
g.bind("PATO", PATO)
g.bind("STATO", STATO)
g.bind("MESH", MESH)
g.bind("OBI", OBI)
g.bind("CL", CL)
g.bind("void", VOID)
g.bind("PUBCHEM", PUBCHEM)
g.bind("npo", npo)
g.bind("BAO", BAO)
g.bind("RCD", RCD)
g.bind("AFO", AFO)
g.bind("OPB", OPB)
g.bind("NCI", NCI)
g.bind("OAE", OAE)
g.bind("FIX", FIX)
g.bind("CLO", CLO)
g.bind("STY", STY)
g.bind("OBO", OBO)


void_source = URIRef("https://h2020-sbd4nano.github.io/sbd-data-landscape")
doi_list = []
df.at[46,"algo"] = "Stepwise-MLR PLS"
df.at[46,"algoName"] = "Stepwise_Multiple_Linear_Regression Partial_Least_Squares"


g.add((void_source, RDF.type, VOID.DatasetDescription))
g.add((void_source, FOAF.page, URIRef("https://www.sbd4nano.eu/")))
g.add((void_source, DC.source, URIRef("https://doi.org/10.1016/j.comtox.2018.12.002")))
g.add((void_source, DC.title, Literal("Computational models for the assessment of manufactured nanomaterials", lang="en"))) 

for row in df.index[45:46]:
    selected_row = df.loc[row]
    ref = str(selected_row['Ref'])
    
################# Main Node #################    
    
    model = URIRef(kb + "Model_"+ str(selected_row['Ref']))
    reference = URIRef(df.loc[row, 'DOI'])
    nanoparticle = URIRef(model + "/Nanoparticle")
    endpoint = URIRef(model + "/Endpoint")
    qsar = URIRef(model + "/QSAR")
    algo_bn= BNode()
    qsar_bn = BNode()

    g.add((model, RDF.type, sbd.model))
    g.add((model, SKOS.narrower, qsar_bn))
    g.add((qsar_bn, RDF.type, MESH.D021281))
    g.add((qsar_bn, RDFS.label, Literal(str(selected_row["QSAR/QSPR"]), lang='en')))

    g.add((model, RDF.type, sbd.resource))
    g.add((model, DCTERMS.source, reference)) 
    g.add((model, DCTERMS.subject, nanoparticle))
    
    g.add((model, MESH.mapped_from, algo_bn))
    g.add((algo_bn, RDF.type, NCI.C16275))
    g.add((NCI.C16275, RDFS.label, Literal("Algorithm", lang='en'))) #### has part, endpoint

    g.add((model, BAO.BAO_0090004, endpoint)) #### has part, endpoint
#     g.add((BAO.BAO_0090004, RDFS.label, Literal("has part", lang='en'))) #### has part, endpoint

    if isinstance(selected_row['Remarks'], str):
        remarks = selected_row['Remarks'].split()
        remarks = " ".join(remarks)
        g.add((model, RDFS.comment, Literal(remarks, lang='en')))

    
################ Resource Node ###################    

    if selected_row['DOI'] not in doi_list:
        doi_list.append(selected_row['DOI'])
        g.add((reference, RDF.type, sio.SIO_000176))
        g.add((sio.SIO_000176, RDFS.label, Literal("Reference")))
        g.add((reference, DCTERMS.issued, Literal(int(selected_row['Year']))))
        
        if isinstance(selected_row['Title'], str):
            g.add((reference, DCTERMS.title,  Literal(str(selected_row['Title']))))

        #Contact person
        for i, cp in enumerate(selected_row['Contact person'].split('\n ')):
            cp = cp.strip()
            if " "  in cp:
                contact_person_bnode = BNode()
                g.add((reference, DCTERMS.mediator, contact_person_bnode))
                g.add((contact_person_bnode, RDF.type, FOAF.Person))
                g.add((contact_person_bnode, FOAF.name, Literal(str(cp)))) 

        #Authors
        for i, author in enumerate(selected_row['Authors'].split('; ')):
            authors_bnode = BNode()
            g.add((reference, FOAF.maker, authors_bnode))
            g.add((authors_bnode, RDF.type, FOAF.Person))
            g.add((authors_bnode, FOAF.name, Literal(author)))
            
################ Endpoint Node ###################    
    g.add((endpoint, RDF.type, BAO.BAO_0000179))
    g.add((BAO.BAO_0000179, RDFS.label, Literal("Endpoint", lang='en')))

#Endpoint comment            
    if isinstance(selected_row['endpointdescription'], str):
        comment = selected_row['endpointdescription'].split()
        comment = " ".join(comment)
        g.add((endpoint, RDFS.comment, Literal(comment, lang='en')))
                  
    
#Endpoint level of organisation 
    if isinstance(selected_row['endpointlog'], str):
        ep_list = selected_row['endpointlog'].split("s and ")
        for i, ep in enumerate(ep_list):
            if ep in endpointlog_dict:
                g.add((endpoint, RDF.type, endpointlog_dict[ep][0]))
                g.add((endpointlog_dict[ep][0], RDFS.label, endpointlog_dict[ep][1]))

#Endpoint level of organisation details
    if isinstance(selected_row['endpointlogdetails'], str):
        epdet_list = selected_row['endpointlogdetails'].split("\n")
        for i, ep in enumerate(epdet_list):
            ep = ep.strip()
            if ep in specific_endpoint_dict:
                g.add((endpoint, SKOS.narrower, specific_endpoint_dict[ep][0]))
                g.add((specific_endpoint_dict[ep][0], RDFS.label, Literal(specific_endpoint_dict[ep][1], lang='en')))
            elif ep != "":
                g.add((endpoint, SKOS.narrower, Literal(ep)))
    
#Endpoint design
    if isinstance(selected_row['design'], str):
        design_list = selected_row['design'].split(" and ")
        for i, dg in enumerate(design_list):
            if dg in design_dict:
                g.add((endpoint, BAO.BAO_0095009, design_dict[dg][0])) #has assay design method
                g.add((design_dict[dg][0], RDFS.label, design_dict[dg][1]))

#Endpoint quality 
    if isinstance(selected_row['quality'], str):
        qual = selected_row['quality']
        quality_bnode= BNode()
        g.add((endpoint, BAO.BAO_0002662, quality_bnode))

        if qual in endpoint_dict :
            g.add((quality_bnode, RDF.type, endpoint_dict[qual][0]))
            g.add((quality_bnode, RDFS.label, Literal(endpoint_dict[qual][1], lang='en')))
            if isinstance(selected_row['comment'], str):
                g.add((quality_bnode, RDFS.comment, Literal(selected_row['comment'], lang='en')))
        else: 
            g.add((quality_bnode, RDF.type, Literal(qual)))
            if isinstance(selected_row['comment'], str):
                g.add((quality_bnode, RDFS.comment, Literal(selected_row['comment'], lang='en')))
        
                
                
################ Nanoparticle Node ###################  

    g.add((nanoparticle, RDF.type, npo.NPO_707))
    g.add((npo.NPO_707, RDFS.label, Literal("Nanoparticle", lang='en')))
    

# General type of NM
    if isinstance(selected_row['General type NMs'], str):
        general_type_list = str(selected_row['General type NMs']).split("\n ")
        for i, gen in enumerate(general_type_list):
            gen = gen.strip().lower()
            if gen in general_type_dict.keys():
                g.add((nanoparticle, RDF.type, general_type_dict[gen][0]))
                g.add((general_type_dict[gen][0], RDFS.label, general_type_dict[gen][1]))

# Shape type of NM
    if isinstance(selected_row['Shape NM'], str):
        shape_list = str(selected_row['Shape NM']).split("\n ")
        for i, shape in enumerate(shape_list):
            shape = shape.strip().lower()
            shape_bnode = BNode()
            shape_detail_bnode =BNode()
            g.add((nanoparticle, npo.has_quality, shape_bnode))
            g.add((shape_bnode, RDF.type, npo.NPO_274)) #shape
#             g.add((shape_bnode, SKOS.narrower, shape_detail_bnode))

            if shape in shape_dict.keys():
                g.add((shape_bnode, SKOS.narrower, shape_dict[shape][0]))
                g.add((shape_dict[shape][0], RDFS.label, shape_dict[shape][1]))
            else:
                g.add((shape_bnode, SKOS.narrower, Literal(shape, lang="en")))

#Coating of NM
    if isinstance(selected_row['Coating NM'], str):
        coating_list = str(selected_row['Coating NM']).split("\n ")
        for i, coating in enumerate(coating_list):
            coating = coating.strip()
            coating_bnode = BNode()

            if coating != "":
                g.add((nanoparticle, npo.has_part, coating_bnode))                   
                g.add((coating_bnode, RDF.type, npo.NPO_1962)) #shape
                g.add((npo.NPO_1962, RDFS.label, Literal("Surface coating",lang='en')))

                if coating in df_coatings['nm_unique'].values:

                    coating_row = df_coatings[df_coatings['nm_unique']==coating]
                    cid = str(coating_row['CID'].iloc[0])
                    if cid != "0":
                        g.add((coating_bnode, SKOS.narrower, URIRef(PUBCHEM + cid)))
                        g.add((URIRef(PUBCHEM + cid),RDFS.label, Literal(coating_row['IUPACName'].iloc[0])))
                else:
                    print(selected_row['Ref'],coating)
                    g.add((coating_bnode, SKOS.narrower, Literal(coating, lang='en')))

#Size of NM
    size = URIRef(nanoparticle + "/Size")

#     if isinstance(selected_row['Size (nm)'], str):
#         size_list = str(selected_row['Size (nm)']).split("\n ")
#         g.add((nanoparticle, npo.has_quality, obo.PATO_0000117)) #size
#         g.add((obo.PATO_0000117, RDFS.label, Literal("Size",lang='en')))
#         g.add((obo.PATO_0000117, npo.has_unit_of_measure, npo.NPO_1246))
#         g.add((npo.NPO_1246, RDFS.label, Literal("Nanometer",lang='en')))

#         if len(size_list)>1:
#             for i, value in enumerate(size_list):
#                 value = value.strip()
#                 if value != "":
#                     add_size_information(obo.PATO_0000117, value, g)
#         else:
#             add_size_information(obo.PATO_0000117, size_list[0], g)
            
            
    if isinstance(selected_row['Size (nm)'], str):
        size_list = str(selected_row['Size (nm)']).split("\n ")
        if len(size_list)>1:
            size_node_list = [URIRef(size + "_" + str(i)) for i in range(len(size_list))]
            for i, size_node in enumerate(size_node_list):
                size_list[i] = size_list[i].strip()
                if size_list[i] != "":

                    add_size_information(size_node, size_list[i], g)
        else:
            add_size_information(size, size_list[0], g)
            
#Kind of NM
        if isinstance(selected_row['List of NMs'], str):
            nms_list = str(selected_row['List of NMs']).split("\n ")
            for np_kind in nms_list:
                np_kind = np_kind.strip()   
                if np_kind in nms_dict.keys():
                    g.add((nanoparticle, SKOS.narrower, nms_dict[np_kind][0]))     
                    g.add((nms_dict[np_kind][0], RDFS.label, nms_dict[np_kind][1]))
                elif np_kind in df_nms['nm_unique'].values:
                    nms_row = df_nms[df_nms['nm_unique']==np_kind]
                    g.add((nanoparticle, SKOS.narrower, URIRef(PUBCHEM + str(nms_row['CID'].iloc[0]))))
                    g.add((URIRef(PUBCHEM + str(nms_row['CID'].iloc[0])), RDFS.label, Literal(nms_row['IUPACName'].iloc[0])))
                elif np_kind != "":
                    g.add((nanoparticle, SKOS.narrower, Literal(np_kind)))

                
################ QSAR Node ###################    
 
    if isinstance(selected_row['algoName'], str):
        algoName_list = selected_row['algoName'].split(" ")
        for i, algoName in enumerate(algoName_list):
            algoName = algoName.replace("_"," ")
            algoName = algoName.strip()
            g.add((algo_bn, SKOS.narrower, Literal(algoName, lang='en')))

#             g.add((NCI.C16275, SKOS.narrower, Literal(algoName, lang='en'))) #### has part, endpoint

            
    if isinstance(selected_row['software'], str):
        software = selected_row['software'].strip()
        software = software+" software"
        g.add((algo_bn, BAO.BAO_0090006, Literal(software, lang='en')))
#         g.add((BAO.BAO_0090006, RDFS.label, Literal("derives from", lang='en')))


Turtle = g.serialize(format='n3', destination='RDFnano.txt')

In [41]:
df.loc[df['Ref']=="25f"]


Unnamed: 0.1,Unnamed: 0,Ref,Model name,Contact person,Contact person email address,Authors,Year,Title,Journal,DOI,...,endpointlogdetails,endpoint,endpointdescription,design,quality,comment,algo,algoName,algocomment,software
45,45,25f,Ensemble Learning for predicting biological ac...,Kunwar P. Singh,kpsingh_52@yahoo.com\n \n kunwarpsingh@gmail...,K. P. Singh; S. Gupta,2014,Nano-QSAR modeling for predicting biological a...,RSC Advances,http://doi.org/10.1039/c4ra01274g,...,THP-1 (human monocytic cell line) differenciat...,In vitro - Cytotoxicity - measured as cellular...,Cellular viability was determined on the basis...,In vitro,Cytotoxicity,measured as cellular viability by determining ...,EL DTF DTB,Ensable_learning Decision_Tree_Forest Decision...,Both models were applied to obtain a regressio...,


In [None]:
df.loc[df['Ref']=="25f"]
