In [62]:
################################################################################################################################
################################################################################################################################
############                                -*-  RAPESEED METABOLIC NETWORK -*-                                    #############
################################################################################################################################
################################################################################################################################



################################################################################################################################
############                       -*- Apply Community standards for AraCore Model-*-                              #############
################################################################################################################################



################################################################################################################################
############                                  -*- Parameters of the pipeline -*-                                   #############
################################################################################################################################


#datadir


In [63]:
################################################################################################################################
############                                         -*- Pipeline -*-                                              #############
################################################################################################################################



####################################################################
################## I/ Parsing the file
####################################################################


##################
########### Goal : Etract the information we need to annotate the model : metabolites, reactions and genes
##################


import cobra
import pandas as pd


def parsing(fileName):
    model = cobra.io.read_sbml_model(fileName)
    print("Reactions")
    print("---------")
    for x in model.reactions:
        print("%s : %s" % (x.id, x.reaction))

    print("Metabolites")
    print("-----------")
    for x in model.metabolites:
        print("%9s : %s" % (x.id, x.formula))

    print("Genes")
    print("-----")
    for x in model.genes:
        associated_ids = (i.id for i in x.reactions)
        print("%s is associated with reactions: %s" %
        (x.id, "{" + ", ".join(associated_ids) + "}"))

parsing("2018-23-05-mb-genC3.sbml")



###################################################################
################## II/ Annotate the initial model
###################################################################


###################
############ Goal : Introducing new annotations to the initial model => use of MIRIAM standards for metabolites
###################


############
### 1st step : Create a mapping table for metabolites
############


model = cobra.io.read_sbml_model(fileName)

data = []
df = pd.DataFrame(
    {
        "IDs" : [x.id for x in model.metabolites],
        "Name" : [x.name for x in model.metabolites]
    })
df

Reactions
---------
PSII_h : 2.0 H2O_h + 4.0 H_h + 2.0 PQ_h + 4.0 hnu_h --> 4.0 H_l + O2_h + 2.0 PQH2_h
Cytb6f1_h : PCox_h + PQH2_h --> 2.0 H_l + PCrd_h + PQstar_h
Cytb6f2_h : 2.0 H_h + PCox_h + PQstar_h --> 2.0 H_l + PCrd_h + PQ_h
PGR5PGRL11_h : Fdrd_h + PGR5_PGRL1ox_h --> Fdox_h + PGR5_PGRL1rd_h
PGR5PGRL12_h : 4.0 H_h + 2.0 PGR5_PGRL1rd_h + PQ_h --> 2.0 PGR5_PGRL1ox_h + PQH2_h
NDH1_h : Fdrd_h + 2.0 H_h + NDHox_h --> Fdox_h + 2.0 H_l + NDHrd_h
NDH2_h : 4.0 H_h + 2.0 NDHrd_h + PQ_h --> 2.0 NDHox_h + PQH2_h
PSI_h : Fdox_h + PCrd_h + hnu_h --> Fdrd_h + PCox_h
Fd_DASH_NADPR_h : 2.0 Fdrd_h + H_h + NADP_h --> 2.0 Fdox_h + NADPH_h
ATPase_h : ADP_h + 4.0 H_l + Pi_h <=> ATP_h + H2O_h + 3.0 H_h
RBC_h : CO2_h + H2O_h + RuBP_h --> 2.0 H_h + 2.0 PGA_h
PGAK_h : ATP_h + PGA_h <=> ADP_h + DPGA_h
GAPDH1_h : DPGA_h + H_h + NADPH_h --> GAP_h + NADP_h + Pi_h
TPI_h : GAP_h <=> DHAP_h
FBPA_h : DHAP_h + GAP_h <=> FBP_h
FBPase_h : FBP_h + H2O_h --> F6P_h + Pi_h
FTK_h : F6P_h + GAP_h <=> E4P_h + X5P_h
SBPA_h 

AT5G03690 is associated with reactions: {FBPA_c}
AT1G43670 is associated with reactions: {FBPase_c}
AT1G07110 is associated with reactions: {F26BPPh_c, F6PK2_c}
AT5G42740 is associated with reactions: {PGI_c}
AT5G52560 is associated with reactions: {UGPase_c}
AT2G35020 is associated with reactions: {UGPase_c}
AT5G17310 is associated with reactions: {UGPase_c}
AT3G56040 is associated with reactions: {UGPase_c}
AT3G03250 is associated with reactions: {UGPase_c}
AT5G11110 is associated with reactions: {S6PS_c}
AT4G10120 is associated with reactions: {S6PS_c}
AT5G20280 is associated with reactions: {S6PS_c}
AT3G52340 is associated with reactions: {S6PPh_c}
AT2G35840 is associated with reactions: {S6PPh_c}
AT4G02280 is associated with reactions: {SucS_c}
AT5G49190 is associated with reactions: {SucS_c}
AT5G20830 is associated with reactions: {SucS_c}
AT1G73370 is associated with reactions: {SucS_c}
AT3G43190 is associated with reactions: {SucS_c}
AT5G37180 is associated with reactions: {Suc

Unnamed: 0,IDs,Name
0,hnu_h,Photon
1,PQ_h,Oxidized plastoquinone
2,H2O_h,"H2O, water"
3,H_h,"H+, proton"
4,PQH2_h,Reduced plastoquinone
...,...,...
408,PQstar_h,Plastoquinone radical
409,PGR5_PGRL1ox_h,oxidised proton gradient regulation 5 (PGR5)/P...
410,PGR5_PGRL1rd_h,reduced proton gradient regulation 5 (PGR5)/PG...
411,NDHox_h,oxidised NADH dehydrogenase-like (NDH) complex


In [64]:
fileName = "2018-23-05-mb-genC3.sbml"
model = cobra.io.read_sbml_model(fileName)
df_mapping_table = pd.DataFrame(
    {
        "IDs" : [x.id for x in model.metabolites],
        "Name" : [x.name for x in model.metabolites]
    })
df_mapping_table # mapping table 

Unnamed: 0,IDs,Name
0,hnu_h,Photon
1,PQ_h,Oxidized plastoquinone
2,H2O_h,"H2O, water"
3,H_h,"H+, proton"
4,PQH2_h,Reduced plastoquinone
...,...,...
408,PQstar_h,Plastoquinone radical
409,PGR5_PGRL1ox_h,oxidised proton gradient regulation 5 (PGR5)/P...
410,PGR5_PGRL1rd_h,reduced proton gradient regulation 5 (PGR5)/PG...
411,NDHox_h,oxidised NADH dehydrogenase-like (NDH) complex


In [65]:
# We take the column IDs as the index of the df => delete the column corresponding to 0 : 412
# If we have the universal name of the metabolites, we can automatically generate the URL linking to the metabolite in the BIGG 
# database
df_set_index = df_mapping_table.set_index('IDs')
df_set_index

Unnamed: 0_level_0,Name
IDs,Unnamed: 1_level_1
hnu_h,Photon
PQ_h,Oxidized plastoquinone
H2O_h,"H2O, water"
H_h,"H+, proton"
PQH2_h,Reduced plastoquinone
...,...
PQstar_h,Plastoquinone radical
PGR5_PGRL1ox_h,oxidised proton gradient regulation 5 (PGR5)/P...
PGR5_PGRL1rd_h,reduced proton gradient regulation 5 (PGR5)/PG...
NDHox_h,oxidised NADH dehydrogenase-like (NDH) complex


In [66]:
df_set_index.to_csv("mappingMetabolites.csv") # to save the table as CSV

In [67]:
df_mapping_table

Unnamed: 0,IDs,Name
0,hnu_h,Photon
1,PQ_h,Oxidized plastoquinone
2,H2O_h,"H2O, water"
3,H_h,"H+, proton"
4,PQH2_h,Reduced plastoquinone
...,...,...
408,PQstar_h,Plastoquinone radical
409,PGR5_PGRL1ox_h,oxidised proton gradient regulation 5 (PGR5)/P...
410,PGR5_PGRL1rd_h,reduced proton gradient regulation 5 (PGR5)/PG...
411,NDHox_h,oxidised NADH dehydrogenase-like (NDH) complex


In [68]:
df_mapping_metabolites = pd.read_csv("mappingMetabolites.csv", index_col = 0) # to open the table
df_mapping_metabolites["BIGG Model IDs"] = ""
df_mapping_metabolites.head()

Unnamed: 0_level_0,Name,BIGG Model IDs
IDs,Unnamed: 1_level_1,Unnamed: 2_level_1
hnu_h,Photon,
PQ_h,Oxidized plastoquinone,
H2O_h,"H2O, water",
H_h,"H+, proton",
PQH2_h,Reduced plastoquinone,


In [69]:
df_mapping_metabolites["ModelSEED IDs"] = ""
df_mapping_metabolites.head()

Unnamed: 0_level_0,Name,BIGG Model IDs,ModelSEED IDs
IDs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
hnu_h,Photon,,
PQ_h,Oxidized plastoquinone,,
H2O_h,"H2O, water",,
H_h,"H+, proton",,
PQH2_h,Reduced plastoquinone,,


In [70]:
df_mapping_metabolites.shape # to print the dimensions of the data frame

(413, 3)

In [71]:
# Obtaining IDs with BIGG Model database

# Import the BIGG Models metabolites file 
bigg_models_metabolites = pd.read_csv('bigg_models_metabolites.txt', sep = "\t")
bigg_models_metabolites.head()

Unnamed: 0,bigg_id,universal_bigg_id,name,model_list,database_links,old_bigg_ids
0,12dgr120_c,12dgr120,"1,2-Diacyl-sn-glycerol (didodecanoyl, n-C12:0)",iEC1364_W; iEC1349_Crooks; iEC1356_Bl21DE3; iM...,MetaNetX (MNX) Chemical: http://identifiers.or...,12dgr120; 12dgr120[c]; 12dgr120_c; _12dgr120_c
1,12dgr140_c,12dgr140,"1,2-Diacyl-sn-glycerol (ditetradecanoyl, n-C14:0)",iECNA114_1301; iECSE_1348; iECO111_1330; iECOK...,MetaNetX (MNX) Chemical: http://identifiers.or...,12dgr140; 12dgr140[c]; 12dgr140_c; _12dgr140_c
2,12dgr180_c,12dgr180,"1,2-Diacyl-sn-glycerol (dioctadecanoyl, n-C18:0)",iECB_1328; iECDH10B_1368; iEcE24377_1341; iECD...,MetaNetX (MNX) Chemical: http://identifiers.or...,12dgr180; 12dgr180[c]; 12dgr180_c; _12dgr180_c
3,14glucan_c,14glucan,"1,4-alpha-D-glucan",iSFxv_1172; iUTI89_1310; iSSON_1240; iSbBS512_...,BioCyc: http://identifiers.org/biocyc/META:1-4...,14glucan; 14glucan_c
4,15dap_c,15dap,"1,5-Diaminopentane",iECUMN_1333; iLF82_1304; iETEC_1333; iECSF_132...,KEGG Compound: http://identifiers.org/kegg.com...,15dap; 15dap[c]; 15dap_c


In [72]:
bigg_models_metabolites.shape # to print the dimensions of the data frame

(15724, 6)

In [73]:
# As we want to obtain universal BIGG IDs according to MIRIAM standards, we will adapt our notations to the ones presented in 
# the table bigg_models_metabolites, then merge the two tables

# Select the columns of interest in the table named bigg_models_metabolites
universal_bigg_id = bigg_models_metabolites.iloc[:, :2] # slicing : select all the rows and the 2 first columns
universal_bigg_id

Unnamed: 0,bigg_id,universal_bigg_id
0,12dgr120_c,12dgr120
1,12dgr140_c,12dgr140
2,12dgr180_c,12dgr180
3,14glucan_c,14glucan
4,15dap_c,15dap
...,...,...
15719,udcdpglcnac_a1_3_galnac_a1_4_glc_b1_6_fuc3nac_...,udcdpglcnac_a1_3_galnac_a1_4_glc_b1_6_fuc3nac_...
15720,udcdpgalnac_b1_3_glc_b1_2_glcnac_b1_4_gal_b1_4...,udcdpgalnac_b1_3_glc_b1_2_glcnac_b1_4_gal_b1_4...
15721,LPS51_VL_e,LPS51_VL
15722,LPS54_p,LPS54


In [74]:
df_mapping_table["ID_lower"] = df_mapping_table["IDs"].apply(str.lower) # WARNING! str.lower(): lower() function ALWAYS from a string !!! 
df_mapping_table

Unnamed: 0,IDs,Name,ID_lower
0,hnu_h,Photon,hnu_h
1,PQ_h,Oxidized plastoquinone,pq_h
2,H2O_h,"H2O, water",h2o_h
3,H_h,"H+, proton",h_h
4,PQH2_h,Reduced plastoquinone,pqh2_h
...,...,...,...
408,PQstar_h,Plastoquinone radical,pqstar_h
409,PGR5_PGRL1ox_h,oxidised proton gradient regulation 5 (PGR5)/P...,pgr5_pgrl1ox_h
410,PGR5_PGRL1rd_h,reduced proton gradient regulation 5 (PGR5)/PG...,pgr5_pgrl1rd_h
411,NDHox_h,oxidised NADH dehydrogenase-like (NDH) complex,ndhox_h


In [75]:
df_merge = df_mapping_table.merge(universal_bigg_id, how = 'left', left_on = "ID_lower", right_on = 'bigg_id')
df_merge

Unnamed: 0,IDs,Name,ID_lower,bigg_id,universal_bigg_id
0,hnu_h,Photon,hnu_h,,
1,PQ_h,Oxidized plastoquinone,pq_h,pq_h,pq
2,H2O_h,"H2O, water",h2o_h,h2o_h,h2o
3,H_h,"H+, proton",h_h,h_h,h
4,PQH2_h,Reduced plastoquinone,pqh2_h,pqh2_h,pqh2
...,...,...,...,...,...
408,PQstar_h,Plastoquinone radical,pqstar_h,,
409,PGR5_PGRL1ox_h,oxidised proton gradient regulation 5 (PGR5)/P...,pgr5_pgrl1ox_h,,
410,PGR5_PGRL1rd_h,reduced proton gradient regulation 5 (PGR5)/PG...,pgr5_pgrl1rd_h,,
411,NDHox_h,oxidised NADH dehydrogenase-like (NDH) complex,ndhox_h,,


In [76]:
df_merge.info() # info() function : to print a summary of the data frame obtained after the merge

<class 'pandas.core.frame.DataFrame'>
Int64Index: 413 entries, 0 to 412
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   IDs                413 non-null    object
 1   Name               413 non-null    object
 2   ID_lower           413 non-null    object
 3   bigg_id            152 non-null    object
 4   universal_bigg_id  152 non-null    object
dtypes: object(5)
memory usage: 19.4+ KB


In [77]:
# Obtaining the IDs according to ModelSEED database

compoundsModelSEED = pd.read_csv("compoundsModelSEED.txt", sep = "\t")
compoundsModelSEED.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,id,abbreviation,name,formula,mass,source,inchikey,charge,is_core,is_obsolete,...,is_cofactor,deltag,deltagerr,pka,pkb,abstract_compound,comprised_of,aliases,smiles,notes
0,cpd00001,h2o,H2O,H2O,18.0,Primary Database,XLYOFNOQVPJJNP-UHFFFAOYSA-N,0,1,0,...,0,-37.54,0.18,1:1:15.70,1:1:-1.80,,,Name: H20; H2O; H3O+; HO-; Hydroxide ion; OH; ...,O,GC|EQ|EQU
1,cpd00002,atp,ATP,C10H13N5O13P3,504.0,Primary Database,ZKHQWZAMYRWXGA-KQYNXXCUSA-K,-3,1,0,...,0,-548.85,0.36,1:14:12.60;1:22:3.29;1:26:0.90;1:29:7.42;1:30:...,1:6:-7.46;1:9:-1.06;1:14:-3.85;1:15:4.93,,,Name: ATP; Adenosine 5'-triphosphate; adenosin...,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O...,GC|EQ|EQU
2,cpd00003,nad,NAD,C21H26N7O14P2,662.0,Primary Database,BAWFJGJZGIEFAR-NNYOXOHSSA-M,-1,1,0,...,0,-286.41,1.59,1:6:11.94;1:17:1.85;1:18:2.28;1:25:11.38;1:35:...,1:6:-4.22;1:35:-3.85;1:37:-1.05;1:41:4.93;1:43...,,,Name: DPN; DPN+; DPN-ox; Diphosphopyridine nuc...,NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(...,GC|EQ|EQU
3,cpd00004,nadh,NADH,C21H27N7O14P2,663.0,Primary Database,BOPGDPNILDQYTO-NNYOXOHSSA-L,-2,1,0,...,0,-271.15,1.59,1:14:12.28;1:18:14.00;1:22:-7.46;1:26:-1.05;1:...,1:6:2.28;1:9:1.85;1:14:-3.85;1:15:4.93;1:18:-3...,,,Name: DPNH; NAD-reduced; NADH; NADH+H+; NADH2;...,NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)(...,GC|EQ|EQU
4,cpd00005,nadph,NADPH,C21H26N7O17P3,742.0,Primary Database,ACFIXJIJDZMPPO-NNYOXOHSSA-J,-4,1,0,...,0,-483.1,1.62,1:18:0.90;1:19:5.78;1:26:0.66;1:30:3.26;1:40:1...,1:11:-7.46;1:12:-1.06;1:22:4.87;1:40:-3.78,,,Name: NADP(H); NADP-red; NADP-reduced; NADPH; ...,NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)(...,GC|EQ|EQU


In [78]:
compoundsModelSEED.shape # to print the dimensions of the data frame

(33992, 21)

In [79]:
compoundsModelSEED.info() # to print the summary of the data frame

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33992 entries, 0 to 33991
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 33992 non-null  object 
 1   abbreviation       33990 non-null  object 
 2   name               33990 non-null  object 
 3   formula            30522 non-null  object 
 4   mass               33992 non-null  object 
 5   source             33962 non-null  object 
 6   inchikey           24253 non-null  object 
 7   charge             33992 non-null  int64  
 8   is_core            33992 non-null  int64  
 9   is_obsolete        33992 non-null  int64  
 10  linked_compound    67 non-null     object 
 11  is_cofactor        33992 non-null  int64  
 12  deltag             33992 non-null  float64
 13  deltagerr          33992 non-null  float64
 14  pka                24064 non-null  object 
 15  pkb                25548 non-null  object 
 16  abstract_compound  0 n

In [80]:
def remove_container(ID_lower): 
    # no for loop, because apply() function will call the remove_container function whenever necessary, so far each row of the data frame
    ID_lower_no_c = ID_lower[:-2] # to remove the container => so, the 2 last characters of the string
    return ID_lower_no_c # we want to return the ID_lower without the container, so without the 2 last characters of the string
df_merge["IDs_lower_no_c"] = df_merge["ID_lower"].apply(remove_container)
df_merge

Unnamed: 0,IDs,Name,ID_lower,bigg_id,universal_bigg_id,IDs_lower_no_c
0,hnu_h,Photon,hnu_h,,,hnu
1,PQ_h,Oxidized plastoquinone,pq_h,pq_h,pq,pq
2,H2O_h,"H2O, water",h2o_h,h2o_h,h2o,h2o
3,H_h,"H+, proton",h_h,h_h,h,h
4,PQH2_h,Reduced plastoquinone,pqh2_h,pqh2_h,pqh2,pqh2
...,...,...,...,...,...,...
408,PQstar_h,Plastoquinone radical,pqstar_h,,,pqstar
409,PGR5_PGRL1ox_h,oxidised proton gradient regulation 5 (PGR5)/P...,pgr5_pgrl1ox_h,,,pgr5_pgrl1ox
410,PGR5_PGRL1rd_h,reduced proton gradient regulation 5 (PGR5)/PG...,pgr5_pgrl1rd_h,,,pgr5_pgrl1rd
411,NDHox_h,oxidised NADH dehydrogenase-like (NDH) complex,ndhox_h,,,ndhox


In [81]:
df_merge_ModelSEED = df_merge.merge(compoundsModelSEED[["id", "abbreviation"]], how = 'left', left_on = 'IDs_lower_no_c', right_on = 'abbreviation')
df_merge_ModelSEED

Unnamed: 0,IDs,Name,ID_lower,bigg_id,universal_bigg_id,IDs_lower_no_c,id,abbreviation
0,hnu_h,Photon,hnu_h,,,hnu,,
1,PQ_h,Oxidized plastoquinone,pq_h,pq_h,pq,pq,,
2,H2O_h,"H2O, water",h2o_h,h2o_h,h2o,h2o,cpd00001,h2o
3,H_h,"H+, proton",h_h,h_h,h,h,cpd00067,h
4,PQH2_h,Reduced plastoquinone,pqh2_h,pqh2_h,pqh2,pqh2,,
...,...,...,...,...,...,...,...,...
408,PQstar_h,Plastoquinone radical,pqstar_h,,,pqstar,,
409,PGR5_PGRL1ox_h,oxidised proton gradient regulation 5 (PGR5)/P...,pgr5_pgrl1ox_h,,,pgr5_pgrl1ox,,
410,PGR5_PGRL1rd_h,reduced proton gradient regulation 5 (PGR5)/PG...,pgr5_pgrl1rd_h,,,pgr5_pgrl1rd,,
411,NDHox_h,oxidised NADH dehydrogenase-like (NDH) complex,ndhox_h,,,ndhox,,


In [82]:
df_merge_ModelSEED.info() # to have a summary of the data frame

<class 'pandas.core.frame.DataFrame'>
Int64Index: 413 entries, 0 to 412
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   IDs                413 non-null    object
 1   Name               413 non-null    object
 2   ID_lower           413 non-null    object
 3   bigg_id            152 non-null    object
 4   universal_bigg_id  152 non-null    object
 5   IDs_lower_no_c     413 non-null    object
 6   id                 156 non-null    object
 7   abbreviation       156 non-null    object
dtypes: object(8)
memory usage: 29.0+ KB


In [83]:
# Filtering specific rows from the data frame to look at names which have an ID both in BIGG Model and in ModelSEED
df_merge_noNAN = df_merge_ModelSEED[df_merge_ModelSEED["universal_bigg_id"].notna() & df_merge_ModelSEED["abbreviation"].notna()]
df_merge_noNAN

Unnamed: 0,IDs,Name,ID_lower,bigg_id,universal_bigg_id,IDs_lower_no_c,id,abbreviation
2,H2O_h,"H2O, water",h2o_h,h2o_h,h2o,h2o,cpd00001,h2o
3,H_h,"H+, proton",h_h,h_h,h,h,cpd00067,h
5,O2_h,"O2, oxygen",o2_h,o2_h,o2,o2,cpd00007,o2
6,H_l,"H+, proton",h_l,h_l,h,h,cpd00067,h
9,Fdox_h,Oxidized ferredoxin,fdox_h,fdox_h,fdox,fdox,cpd15876,fdox
...,...,...,...,...,...,...,...,...
361,dCTP_c,2-Deoxycytidine 5-triphosphate,dctp_c,dctp_c,dctp,dctp,cpd00356,dctp
362,dGTP_c,2-Deoxyguanosine 5-triphosphate,dgtp_c,dgtp_c,dgtp,dgtp,cpd00241,dgtp
363,dTTP_c,2-Deoxythymidine 5-triphosphate,dttp_c,dttp_c,dttp,dttp,cpd00357,dttp
364,SO4_c,Sulfate,so4_c,so4_c,so4,so4,cpd00048,so4


In [84]:
df_merge_noNAN.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 149 entries, 2 to 374
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   IDs                149 non-null    object
 1   Name               149 non-null    object
 2   ID_lower           149 non-null    object
 3   bigg_id            149 non-null    object
 4   universal_bigg_id  149 non-null    object
 5   IDs_lower_no_c     149 non-null    object
 6   id                 149 non-null    object
 7   abbreviation       149 non-null    object
dtypes: object(8)
memory usage: 10.5+ KB


In [85]:
# Filtering specific rows from the data frame to look at names which have an ID either in BIGG Model either in ModelSEED
# => XOR in pandas
df_merge_ID = df_merge_ModelSEED[df_merge_ModelSEED["universal_bigg_id"].notna() ^ df_merge_ModelSEED["abbreviation"].notna()]
df_merge_ID

Unnamed: 0,IDs,Name,ID_lower,bigg_id,universal_bigg_id,IDs_lower_no_c,id,abbreviation
1,PQ_h,Oxidized plastoquinone,pq_h,pq_h,pq,pq,,
4,PQH2_h,Reduced plastoquinone,pqh2_h,pqh2_h,pqh2,pqh2,,
41,Mas_c,Maltose,mas_c,mas_c,mas,mas,,
112,Q_m,Ubiquinone,q_m,,,q,cpd11669,q
114,QH2_m,Ubiquinol,qh2_m,,,qh2,cpd11665,qh2
124,GLX_p,Glyoxylate,glx_p,,,glx,cpd00040,glx
140,NADH_p,Nicotinamide adenine dinucleotide - reduced,nadh_p,,,nadh,cpd00004,nadh
143,NAD_p,Nicotinamide adenine dinucleotide,nad_p,,,nad,cpd00003,nad
147,GLP_h,Glucono-delta-lactone-6-phosphate,glp_h,,,glp,cpd12002,glp
166,Cit_h,Citrate,cit_h,,,cit,cpd00137,cit


In [86]:
df_merge_ModelSEED["bigg_id"] = "M_" + df_merge_ModelSEED["bigg_id"]

# The column corresponding to the universal_bigg_id seems to be the same as the bigg_id one
df_merge_ModelSEED = df_merge_ModelSEED.drop(columns = ["universal_bigg_id", "ID_lower", "IDs_lower_no_c"])
df_merge_ModelSEED

Unnamed: 0,IDs,Name,bigg_id,id,abbreviation
0,hnu_h,Photon,,,
1,PQ_h,Oxidized plastoquinone,M_pq_h,,
2,H2O_h,"H2O, water",M_h2o_h,cpd00001,h2o
3,H_h,"H+, proton",M_h_h,cpd00067,h
4,PQH2_h,Reduced plastoquinone,M_pqh2_h,,
...,...,...,...,...,...
408,PQstar_h,Plastoquinone radical,,,
409,PGR5_PGRL1ox_h,oxidised proton gradient regulation 5 (PGR5)/P...,,,
410,PGR5_PGRL1rd_h,reduced proton gradient regulation 5 (PGR5)/PG...,,,
411,NDHox_h,oxidised NADH dehydrogenase-like (NDH) complex,,,


In [91]:
df_final = df_merge_ModelSEED.rename(columns = {"bigg_id" : "universal_BIGG_id", "id" : "ModelSEED_id", "abbreviation" : "abbreviation_ModelSEED"})
df_final = df_final.set_index('IDs')
df_final

Unnamed: 0_level_0,Name,universal_BIGG_id,ModelSEED_id,abbreviation_ModelSEED
IDs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
hnu_h,Photon,,,
PQ_h,Oxidized plastoquinone,M_pq_h,,
H2O_h,"H2O, water",M_h2o_h,cpd00001,h2o
H_h,"H+, proton",M_h_h,cpd00067,h
PQH2_h,Reduced plastoquinone,M_pqh2_h,,
...,...,...,...,...
PQstar_h,Plastoquinone radical,,,
PGR5_PGRL1ox_h,oxidised proton gradient regulation 5 (PGR5)/P...,,,
PGR5_PGRL1rd_h,reduced proton gradient regulation 5 (PGR5)/PG...,,,
NDHox_h,oxidised NADH dehydrogenase-like (NDH) complex,,,


In [92]:
df_final.to_csv("df_id_bigg_modelseed.csv") # to save the table as CSV

In [88]:
############
### 2nd step : Create a mapping table for reactions
############

In [89]:
############
### 3rd step : Create a mapping table for genes
############

In [90]:
############
### 4th step : Use ModelPolisher to pull annotations from BIGG
############