In [1]:
# Extract and clean the data required for annotation
# It is important that the user cleans the data appropiately.
# Here I have dropped the 'Process', 'Function' columns as well as 
# rows 3 and 6(later). Rowns 3 and 6 refer to exchangers which I haven't
# figured out how to annotate.

import pandas as pd

df = pd.read_csv("C:Detailed 12L Ion channel list - Annot_test.csv", skip_blank_lines=True)
df = df.drop(['Process'], axis=1)
df = df.drop(['Function'], axis=1)

source = df['Source']
source_is_part = df['Source_ispart']
sink = df['Sink']
sink_is_part = df['Sink_ispart']
med = df['Mediator']

source = source.dropna()
source = source.drop(index=3)

source_is_part = source_is_part.dropna()
source_is_part = source_is_part.drop(index=3)

sink = sink.dropna()
sink = sink.drop(index=3)

sink_is_part = sink_is_part.dropna()
sink_is_part = sink_is_part.drop(index = 3)

med = med.dropna()
med = med.drop(index = 3)

model_name = df['Main model elements']

model_name = model_name.dropna()
model_name = model_name.drop(index = 3)
model_name = model_name.replace(" ","")

In [None]:
## Fetch the cellml models from PMR
## I'm fairly certain there is a better way to do this

# import wget
# for model in model_name:
#     url = 'https://models.physiomeproject.org/workspace/692/rawfile/b98a10f69ee76a70243efdb648c598db5a866a2c/Components/'+model+'.cellml'
#     url1 = url.replace(" ", "")
#     wget.download(url1)
#     print("model ",model, "downloaded")

In [4]:
# Need to convert the df's to arrays to avoid missing index arising from
# dropping rows and columns

import numpy as np
med = np.array(med)
source = np.array(source)
source_is_part = np.array(source_is_part)
sink = np.array(sink)
sink_is_part = np.array(sink_is_part)


In [5]:
# remove xml version string. libomexmeta does not recognise this

l=[]
for model in model_name:
    model_n = model+'.cellml'
    model_n = model_n.replace(" ", "")
    model_new = model+'_new.cellml'
    model_new = model_new.replace(" ", "")
    
    l.append(model_new)
   
    try:
        with open(model_n,'r') as fr:
            lines = fr.readlines()

            with open(model_new,'w') as fw:
                for line in lines:
                    if line.strip('\n') != "<?xml version='1.0' encoding='UTF-8'?>":
                        fw.write(line)
        print('Deleted')
    except:
        print("String not found")
        
        

Deleted
Deleted
Deleted
Deleted
Deleted
Deleted
Deleted


In [6]:
df1 = pd.DataFrame(l,columns=['l'])

m_new = df1['l']
m_new = m_new.drop(index= 6)
print(m_new)

0      ICaL_new.cellml
1      ICaT_new.cellml
2       IKv_new.cellml
3     INS_K_new.cellml
4    INS_Na_new.cellml
5       INa_new.cellml
Name: l, dtype: object


In [7]:
# Add xmlns and cmeta:id needed for libomexmeta to
# read each cellml file

x=[]
for cellml in m_new:
    with open(cellml) as ft:
        contents = ft.readlines()
        res = ' '.join([str(item) for item in contents])
        x.append(res.replace('xmlns:cellml="http://www.cellml.org/cellml/1.1#"','xmlns:cmeta="http://www.cellml.org/metadata/1.0#" cmeta:id="cellml"'))
    
# print(x[1])

In [8]:
sauce = []
synk = []
for id in source_is_part:
    if id == "GO:0005615":
        sauce.append("extracellular")
        synk.append("cytosol")
    elif id == "GO:0005829":
        sauce.append("cytosol")
        synk.append("extracellular")

In [9]:
print(synk)

['cytosol', 'cytosol', 'extracellular', 'extracellular', 'extracellular', 'cytosol']


In [10]:
sauce_df = pd.DataFrame(sauce, columns=['sauce'])
sauce_new = sauce_df['sauce']

synk_df = pd.DataFrame(synk, columns=['synk'])
synk_new = synk_df['synk']

In [11]:
print(synk_new)

0          cytosol
1          cytosol
2    extracellular
3    extracellular
4    extracellular
5          cytosol
Name: synk, dtype: object


In [12]:
model_name_new = model_name.drop(index = 7)
model_name_new = np.array(model_name_new)
print(model_name_new)

['ICaL ' 'ICaT ' 'IKv' 'INS_K ' 'INS_Na ' 'INa']


In [13]:
from pyomexmeta import RDF, eUriType

for i in range(len(m_new)):
    rdf_graph1 = RDF()
    rdf_graph1.set_archive_uri("12L_annotations.omex")
    rdf_graph1.set_model_uri(m_new[i])
    annot_editor = rdf_graph1.to_editor(x[i], generate_new_metaids=False, sbml_semantic_extraction=False)

    # fma:14072 = smooth muscle cell
    # fma:7207 = Jejunum
    # fma:66836 = portion of cytosol

    with annot_editor.new_physical_entity() as cytosol:
        cytosol \
            .about("cytosol", eUriType.LOCAL_URI) \
            .identity("GO:0005829") \
            .is_part_of("FMA:14072") \
            .is_part_of("FMA:7207") 

    # fma:14072 = smooth muscle cell
    # fma:7207 = Jejunum
    # GO:0005615: extracellular space

    with annot_editor.new_physical_entity() as extracellular:
        extracellular \
            .about("extracellular", eUriType.LOCAL_URI) \
            .identity("GO:0005615") \
            .is_part_of("FMA:14072") \
            .is_part_of("FMA:7207") 

    # GO:1990454 = L-type voltage-gated calcium channel complex
    # GO:0005245 = voltage-gated calcium channel protein
    with annot_editor.new_physical_entity() as mediator:
        mediator\
            .about("mediator", eUriType.LOCAL_URI) \
            .identity(med[i]) \
            .is_part_of("cytosol",eUriType.LOCAL_URI)

    # CHEBI:39123 = Calcium cation
 
    
    with annot_editor.new_physical_entity() as source:
        source \
            .about("source", eUriType.LOCAL_URI) \
            .identity("CHEBI:39123") \
            .is_part_of(sauce_new[i], eUriType.LOCAL_URI)

# CHEBI:39123 = Calcium cation
    with annot_editor.new_physical_entity() as sink:
        sink \
            .about("sink", eUriType.LOCAL_URI) \
            .identity("CHEBI:39123") \
            .is_part_of(synk_new[i], eUriType.LOCAL_URI)

    # opb:OPB_00592 = chemical molar flow rate

    with annot_editor.new_physical_process() as reaction_rate:
        reaction_rate \
            .about("process", eUriType.LOCAL_URI) \
            .add_source("source", eUriType.LOCAL_URI, multiplier=1) \
            .add_sink("sink", eUriType.LOCAL_URI, multiplier=1) \
            .add_mediator("mediator", eUriType.LOCAL_URI) \
            .has_property(property_about=model_name_new[i], about_uri_type=eUriType.MODEL_URI, is_version_of="opb:OPB_00592")

#     print(rdf_graph1)

    print(rdf_graph1, file=open(model_name_new[i]+'.rdf', 'w'))