# Install missing modules & load modules

In [None]:
!pip install cobra #install cobra only required for google colab 



In [None]:
import pandas as pd
import numpy as np
import cobra
import requests

# Load AraCore Model

In [None]:
#Get file from github
fileName = 'https://raw.githubusercontent.com/ma-blaetke/CBM_C3_C4_Metabolism/master/data/2018-23-05-mb-genC3.sbml'
r = requests.get(fileName)

In [None]:
#Create model
model = cobra.io.read_sbml_model(r.text)

# Correct Compartment Naming in AraCore Model according to BiGG naming conventions

In [None]:
model.compartments

{'c': 'Cytosol',
 'h': 'Chloroplast',
 'i': 'IntermembraneSpace',
 'l': 'Lumen',
 'm': 'Mitochondrion',
 'p': 'Peroxisome'}

In [None]:
bigg_compartments = {'c':	'cytosol',
'e':	'extracellular space',
'p':	'periplasm',
'm':	'mitochondria',
'x':	'peroxisome/glyoxysome',
'r':	'endoplasmic reticulum',
'v':	'vacuole',
'n':	'nucleus',
'g':	'golgi apparatus',
'u':	'thylakoid',
'l':	'lysosome',
'h':	'chloroplast',
'f':	'flagellum',
's':	'eyespot',
'im':	'intermembrane space of mitochondria',
'cx':	'carboxyzome',
'um':	'thylakoid membrane',
'cm':	'cytosolic membrane',
'i':	'inner mitochondrial compartment',
'mm':	'mitochondrial intermembrane',
'w':	'wildtype staph aureus',
'y':	'cytochrome complex'}

In [None]:
#http://bigg.ucsd.edu/compartments

#c	cytosol
#h	chloroplast
#m	mitochondria
#x	peroxisome/glyoxysome
#im	intermembrane space of mitochondria
#h	chloroplast
#ul thylakoid lumen <<< NEW


df_compartment_mapping = pd.Series(
  { 'c': 'c', #cytosol
  'h': 'h', #chloroplast
  'm': 'm', #mitochondria
  'p': 'x', #peroxisome/glyoxysome
  'i': 'im', #intermembrane space of mitochondria
  'l': 'ul', #thylakoid lumen <<< NEW, not in BiGG compartment list
   'e':'e', #extracellular space << NEW, not yet in model
  }
 )

# Create Reaction Table for AraCore Model

In [None]:
#Create mapping table
df_reactions_aracore = pd.DataFrame(
    {
        "aracore_ids" : [r_obj.id for r_obj in model.reactions],
        "aracore_name" : [r_obj.name for r_obj in model.reactions],
        "aracore_annotations" : [r_obj.annotation for r_obj in model.reactions]
    })
 
df_reactions_aracore.set_index("aracore_ids",inplace=True) #Set index to acacore_ids to make the renaming of certain labels easiers

df_reactions_aracore.head(25)

Unnamed: 0_level_0,aracore_name,aracore_annotations
aracore_ids,Unnamed: 1_level_1,Unnamed: 2_level_1
PSII_h,photosystem II,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c..."
Cytb6f1_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c..."
Cytb6f2_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c..."
PGR5PGRL11_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':..."
PGR5PGRL12_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':..."
NDH1_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':..."
NDH2_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':..."
PSI_h,photosystem I,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c..."
Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k..."
ATPase_h,ATPase,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c..."


In [None]:
#BiGG naming convention https://github.com/SBRG/bigg_models/wiki/BiGG-Models-ID-Specification-and-Guidelines
# - Reaction symbols all neeed to be uppercase
# - Only contain upper and lowercase letters, numbers, and underscores

#df.rename(index= {'name': 'new_name'})
#Rename reaction ids StS_h1, StS_h3, StS_h2
df_reactions_aracore.rename(index={'StS_h1':'StS1_h'}, inplace=True)
df_reactions_aracore.rename(index={'StS_h2':'StS2_h'}, inplace=True)
df_reactions_aracore.rename(index={'StS_h3':'StS3_h'}, inplace=True)

#Rename reaction ids CeS_c1, CeS_c2, CeS_c3
df_reactions_aracore.rename(index={'CeS_c1':'CeS1_c'}, inplace=True)
df_reactions_aracore.rename(index={'CeS_c2':'CeS2_c'}, inplace=True)
df_reactions_aracore.rename(index={'CeS_c3':'CeS3_c'}, inplace=True)

#Rename reaction id AGluK
df_reactions_aracore.rename(index={'AGluK':'AGluK_h'}, inplace=True)

#Rename all import reaction ids, add "_e" for extracellular space
for r_obj in model.reactions.query(lambda rxn: (rxn.id.startswith('Im_'))):
  df_reactions_aracore.rename(index={r_obj.id: f"{r_obj.id}_e"}, inplace=True)


#Rename all export reaction ids, add "_e" for extracellular space
for r_obj in model.reactions.query(lambda rxn: (rxn.id.startswith('Ex_'))):
    df_reactions_aracore.rename(index={r_obj.id: f"{r_obj.id}_e"}, inplace=True)

#Rename all transport reaction ids, add compartment symbol of first metabolite
for r_obj in model.reactions.query(lambda rxn: (rxn.id.startswith('Tr_'))):
  met_first_comp = list(r_obj.metabolites.keys())[0].compartment
  if r_obj.id.rsplit('_',1)[-1] in ['mc','hc']:
    r_id_new = f"{r_obj.id.rsplit('_',1)[0]}_{met_first_comp}"
  else:
    r_id_new = f"{r_obj.id}_{met_first_comp}"
  df_reactions_aracore.rename(index={r_obj.id: r_id_new}, inplace=True)


df_reactions_aracore.reset_index(inplace=True) #Reset index to numeric labels, to be able to use the apply function on 'aracore_updated_ids' which is not possible as index

#Update compartment symbols in metabolite ids and make metabolite ids lower case
df_reactions_aracore['aracore_updated_ids'] = df_reactions_aracore['aracore_ids'].apply(lambda r_id: f"{r_id.rsplit('_',1)[0].upper()}_{df_compartment_mapping[r_id.rsplit('_',1)[-1]]}")

#Create universal metabolite ids by removing compartment symbols
df_reactions_aracore['aracore_updated_universal_ids'] = df_reactions_aracore['aracore_updated_ids'].apply(lambda r_id: r_id.rsplit('_',1)[0])

df_reactions_aracore.head(25)

Unnamed: 0,aracore_ids,aracore_name,aracore_annotations,aracore_updated_ids,aracore_updated_universal_ids
0,PSII_h,photosystem II,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSII_h,PSII
1,Cytb6f1_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F1_h,CYTB6F1
2,Cytb6f2_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F2_h,CYTB6F2
3,PGR5PGRL11_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL11_h,PGR5PGRL11
4,PGR5PGRL12_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL12_h,PGR5PGRL12
5,NDH1_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH1_h,NDH1
6,NDH2_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH2_h,NDH2
7,PSI_h,photosystem I,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSI_h,PSI
8,Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k...",FD_DASH_NADPR_h,FD_DASH_NADPR
9,ATPase_h,ATPase,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",ATPASE_h,ATPASE


In [None]:
df_reactions_aracore[df_reactions_aracore['aracore_ids'].str.contains('DASH')]

Unnamed: 0,aracore_ids,aracore_name,aracore_annotations,aracore_updated_ids,aracore_updated_universal_ids
8,Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k...",FD_DASH_NADPR_H,FD_DASH_NADPR
227,Asp_DASH_SeADH_h,Asp-SeA dehydrogenase,"{'doi': '10.1006/prep.2001.1538', 'ec-code': '...",ASP_DASH_SEADH_H,ASP_DASH_SEADH
239,5M_DASH_THFOR_c,5M-THF oxidoreductase,"{'doi': '10.1074/jbc.274.51.36089', 'ec-code':...",5M_DASH_THFOR_C,5M_DASH_THFOR


In [None]:
# Extract Kegg Id from 'aracore_annotations' column (annotations are already provided as dictionaries => key "kegg.reactions")
df_reactions_aracore['kegg_id'] = df_reactions_aracore['aracore_annotations'].apply(lambda anno_dict: anno_dict['kegg.reaction'] if 'kegg.reaction' in anno_dict.keys() else None)

df_reactions_aracore.head(25)

Unnamed: 0,aracore_ids,aracore_name,aracore_annotations,aracore_updated_ids,aracore_updated_universal_ids,kegg_id
0,PSII_h,photosystem II,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSII_H,PSII,R09503
1,Cytb6f1_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F1_H,CYTB6F1,R03817
2,Cytb6f2_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F2_H,CYTB6F2,R03817
3,PGR5PGRL11_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL11_H,PGR5PGRL11,R03817
4,PGR5PGRL12_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL12_H,PGR5PGRL12,R03817
5,NDH1_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH1_H,NDH1,R03817
6,NDH2_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH2_H,NDH2,R03817
7,PSI_h,photosystem I,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSI_H,PSI,R09542
8,Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k...",FD_DASH_NADPR_H,FD_DASH_NADPR,R01195
9,ATPase_h,ATPase,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",ATPASE_H,ATPASE,R00086


In [None]:
# Extract Ec code from 'aracore_annotations' column (annotations are already provided as dictionaries => key "ec-code")
df_reactions_aracore['ec-code'] = df_reactions_aracore['aracore_annotations'].apply(lambda anno_dict: anno_dict['ec-code'] if 'ec-code' in anno_dict.keys() else None)

df_reactions_aracore.head(25)

Unnamed: 0,aracore_ids,aracore_name,aracore_annotations,aracore_updated_ids,aracore_updated_universal_ids,kegg_id,ec-code
0,PSII_h,photosystem II,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSII_H,PSII,R09503,1.10.3.9
1,Cytb6f1_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F1_H,CYTB6F1,R03817,1.10.9.1
2,Cytb6f2_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F2_H,CYTB6F2,R03817,1.10.9.1
3,PGR5PGRL11_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL11_H,PGR5PGRL11,R03817,
4,PGR5PGRL12_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL12_H,PGR5PGRL12,R03817,
5,NDH1_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH1_H,NDH1,R03817,
6,NDH2_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH2_H,NDH2,R03817,
7,PSI_h,photosystem I,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSI_H,PSI,R09542,1.97.1.12
8,Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k...",FD_DASH_NADPR_H,FD_DASH_NADPR,R01195,1.18.1.2
9,ATPase_h,ATPase,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",ATPASE_H,ATPASE,R00086,3.6.3.14


# BiGG Reactions
---



In [None]:
# Loag BIGG Reaction Table
bigg_reactions_url = 'http://bigg.ucsd.edu/static/namespace/bigg_models_reactions.txt'
df_reactions_bigg = pd.read_csv(bigg_reactions_url, sep='\t')
df_reactions_bigg.loc[170:200,:] # I just selected a range of reactions that will have a kegg identifier for sure, its only for demo

Unnamed: 0,bigg_id,name,reaction_string,model_list,database_links,old_bigg_ids
170,12DGR181tipp,"1,2 diacylglycerol transport via flipping (per...",12dgr181_p <-> 12dgr181_c,iYS1720; iEC1344_C; iEC1364_W; iEC1349_Crooks;...,MetaNetX (MNX) Equation: http://identifiers.or...,12DGR181tipp
171,12PPDStex,"(S)-Propane-1,2-diol transport via diffusion (...",12ppd__S_e <-> 12ppd__S_p,iEC1364_W; iEC1372_W3110; iEC1356_Bl21DE3; iEC...,MetaNetX (MNX) Equation: http://identifiers.or...,12PPDStex
172,14GLUCANabcpp,"1,4-alpha-D-glucan transport via ABC system (p...",atp_c + h2o_c + 14glucan_p <-> 14glucan_c + ad...,iYS1720; iEC1356_Bl21DE3; iEC1349_Crooks; iEC1...,MetaNetX (MNX) Equation: http://identifiers.or...,14GLUCANabcpp
173,23CCMPtex,23cCMP transport via diffusion (extracellular ...,23ccmp_e <-> 23ccmp_p,iEC1356_Bl21DE3; iEC1364_W; iEC1349_Crooks; iE...,MetaNetX (MNX) Equation: http://identifiers.or...,23CCMPtex
174,23CGMPtex,23cGMP transport via diffusion (extracellular ...,23cgmp_e <-> 23cgmp_p,iPC815; iECBD_1354; iE2348C_1286; iEcDH1_1363;...,MetaNetX (MNX) Equation: http://identifiers.or...,23CGMPtex
175,23PDE2pp,"2',3'-cyclic-nucleotide phosphodiesterase (UMP...",23cump_p + h2o_p <-> 3ump_p + h_p,iEC1364_W; iEC1368_DH5a; iEC1372_W3110; iEC135...,RHEA: http://identifiers.org/rhea/27878; RHEA:...,23PDE2pp
176,23PDE9pp,"2',3'-cyclic-nucleotide phosphodiesterase (GMP...",23cgmp_p + h2o_p <-> 3gmp_p + h_p,iEcHS_1320; iEcE24377_1341; iECS88_1305; iECOK...,RHEA: http://identifiers.org/rhea/27858; RHEA:...,23PDE9pp
177,2AGPA120tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C12:0) tr...,2ddecg3p_p <-> 2ddecg3p_c,iEC1344_C; iEC1372_W3110; iEC1364_W; iEC1349_C...,MetaNetX (MNX) Equation: http://identifiers.or...,2AGPA120tipp
178,2AGPA140tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C14:0) tr...,2tdecg3p_p <-> 2tdecg3p_c,iBWG_1329; iEC042_1314; iEC55989_1330; iAF1260...,MetaNetX (MNX) Equation: http://identifiers.or...,2AGPA140tipp
179,2AGPA180tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C18:0) tr...,2odecg3p_p <-> 2odecg3p_c,iEC1356_Bl21DE3; iEC1349_Crooks; iEC1364_W; iE...,MetaNetX (MNX) Equation: http://identifiers.or...,2AGPA180tipp


In [None]:
#Convert string of database links into dictionaries of database identifier/symbol (key) and database-specific metabolite/annotation id (value)
df_reactions_bigg['database_links'] = df_reactions_bigg['database_links'].apply(lambda str_links: {db_link.split(':',1)[-1].split('/')[-2]: db_link.split(':',1)[-1].split('/')[-1] for db_link in str_links.split(';')} if isinstance(str_links,str) else {})

In [None]:
#All database keys in database_links
np.unique(df_reactions_bigg['database_links'].apply(lambda x: list(x.keys())).sum())

array(['biocyc', 'ec-code', 'kegg.reaction', 'metanetx.reaction',
       'reactome', 'rhea', 'seed.reaction'], dtype='<U17')

In [None]:
#Extract kegg id from 'database_links' column (dictionary => key "kegg.reaction")
df_reactions_bigg['kegg.reaction'] = df_reactions_bigg['database_links'].apply(lambda dict_db_link:  dict_db_link['kegg.reaction'] if 'kegg.reaction' in dict_db_link.keys() else None)
df_reactions_bigg.loc[170:200]

Unnamed: 0,bigg_id,name,reaction_string,model_list,database_links,old_bigg_ids,kegg.reaction
170,12DGR181tipp,"1,2 diacylglycerol transport via flipping (per...",12dgr181_p <-> 12dgr181_c,iYS1720; iEC1344_C; iEC1364_W; iEC1349_Crooks;...,{'metanetx.reaction': 'MNXR94681'},12DGR181tipp,
171,12PPDStex,"(S)-Propane-1,2-diol transport via diffusion (...",12ppd__S_e <-> 12ppd__S_p,iEC1364_W; iEC1372_W3110; iEC1356_Bl21DE3; iEC...,{'metanetx.reaction': 'MNXR94684'},12PPDStex,
172,14GLUCANabcpp,"1,4-alpha-D-glucan transport via ABC system (p...",atp_c + h2o_c + 14glucan_p <-> 14glucan_c + ad...,iYS1720; iEC1356_Bl21DE3; iEC1349_Crooks; iEC1...,{'metanetx.reaction': 'MNXR94693'},14GLUCANabcpp,
173,23CCMPtex,23cCMP transport via diffusion (extracellular ...,23ccmp_e <-> 23ccmp_p,iEC1356_Bl21DE3; iEC1364_W; iEC1349_Crooks; iE...,"{'metanetx.reaction': 'MNXR94720', 'seed.react...",23CCMPtex,
174,23CGMPtex,23cGMP transport via diffusion (extracellular ...,23cgmp_e <-> 23cgmp_p,iPC815; iECBD_1354; iE2348C_1286; iEcDH1_1363;...,"{'metanetx.reaction': 'MNXR94721', 'seed.react...",23CGMPtex,
175,23PDE2pp,"2',3'-cyclic-nucleotide phosphodiesterase (UMP...",23cump_p + h2o_p <-> 3ump_p + h_p,iEC1364_W; iEC1368_DH5a; iEC1372_W3110; iEC135...,"{'rhea': '27881', 'ec-code': '3.1.4.16', 'bioc...",23PDE2pp,R03538
176,23PDE9pp,"2',3'-cyclic-nucleotide phosphodiesterase (GMP...",23cgmp_p + h2o_p <-> 3gmp_p + h_p,iEcHS_1320; iEcE24377_1341; iECS88_1305; iECOK...,"{'rhea': '27861', 'ec-code': '3.1.4.16', 'bioc...",23PDE9pp,R05135
177,2AGPA120tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C12:0) tr...,2ddecg3p_p <-> 2ddecg3p_c,iEC1344_C; iEC1372_W3110; iEC1364_W; iEC1349_C...,"{'metanetx.reaction': 'MNXR94743', 'seed.react...",2AGPA120tipp,
178,2AGPA140tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C14:0) tr...,2tdecg3p_p <-> 2tdecg3p_c,iBWG_1329; iEC042_1314; iEC55989_1330; iAF1260...,"{'metanetx.reaction': 'MNXR94744', 'seed.react...",2AGPA140tipp,
179,2AGPA180tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C18:0) tr...,2odecg3p_p <-> 2odecg3p_c,iEC1356_Bl21DE3; iEC1349_Crooks; iEC1364_W; iE...,"{'metanetx.reaction': 'MNXR94748', 'seed.react...",2AGPA180tipp,


In [None]:
#Extract ec-code from 'database_links' column (dictionary => key "ec-code")
df_reactions_bigg['ec-code'] = df_reactions_bigg['database_links'].apply(lambda dict_db_link:  dict_db_link['ec-code'] if 'ec-code' in dict_db_link.keys() else None)
df_reactions_bigg.loc[170:200]

Unnamed: 0,bigg_id,name,reaction_string,model_list,database_links,old_bigg_ids,kegg.reaction,ec-code
170,12DGR181tipp,"1,2 diacylglycerol transport via flipping (per...",12dgr181_p <-> 12dgr181_c,iYS1720; iEC1344_C; iEC1364_W; iEC1349_Crooks;...,{'metanetx.reaction': 'MNXR94681'},12DGR181tipp,,
171,12PPDStex,"(S)-Propane-1,2-diol transport via diffusion (...",12ppd__S_e <-> 12ppd__S_p,iEC1364_W; iEC1372_W3110; iEC1356_Bl21DE3; iEC...,{'metanetx.reaction': 'MNXR94684'},12PPDStex,,
172,14GLUCANabcpp,"1,4-alpha-D-glucan transport via ABC system (p...",atp_c + h2o_c + 14glucan_p <-> 14glucan_c + ad...,iYS1720; iEC1356_Bl21DE3; iEC1349_Crooks; iEC1...,{'metanetx.reaction': 'MNXR94693'},14GLUCANabcpp,,
173,23CCMPtex,23cCMP transport via diffusion (extracellular ...,23ccmp_e <-> 23ccmp_p,iEC1356_Bl21DE3; iEC1364_W; iEC1349_Crooks; iE...,"{'metanetx.reaction': 'MNXR94720', 'seed.react...",23CCMPtex,,
174,23CGMPtex,23cGMP transport via diffusion (extracellular ...,23cgmp_e <-> 23cgmp_p,iPC815; iECBD_1354; iE2348C_1286; iEcDH1_1363;...,"{'metanetx.reaction': 'MNXR94721', 'seed.react...",23CGMPtex,,
175,23PDE2pp,"2',3'-cyclic-nucleotide phosphodiesterase (UMP...",23cump_p + h2o_p <-> 3ump_p + h_p,iEC1364_W; iEC1368_DH5a; iEC1372_W3110; iEC135...,"{'rhea': '27881', 'ec-code': '3.1.4.16', 'bioc...",23PDE2pp,R03538,3.1.4.16
176,23PDE9pp,"2',3'-cyclic-nucleotide phosphodiesterase (GMP...",23cgmp_p + h2o_p <-> 3gmp_p + h_p,iEcHS_1320; iEcE24377_1341; iECS88_1305; iECOK...,"{'rhea': '27861', 'ec-code': '3.1.4.16', 'bioc...",23PDE9pp,R05135,3.1.4.16
177,2AGPA120tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C12:0) tr...,2ddecg3p_p <-> 2ddecg3p_c,iEC1344_C; iEC1372_W3110; iEC1364_W; iEC1349_C...,"{'metanetx.reaction': 'MNXR94743', 'seed.react...",2AGPA120tipp,,
178,2AGPA140tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C14:0) tr...,2tdecg3p_p <-> 2tdecg3p_c,iBWG_1329; iEC042_1314; iEC55989_1330; iAF1260...,"{'metanetx.reaction': 'MNXR94744', 'seed.react...",2AGPA140tipp,,
179,2AGPA180tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C18:0) tr...,2odecg3p_p <-> 2odecg3p_c,iEC1356_Bl21DE3; iEC1349_Crooks; iEC1364_W; iE...,"{'metanetx.reaction': 'MNXR94748', 'seed.react...",2AGPA180tipp,,


In [None]:
#Extract seed.reaction from 'database_links' column (dictionary => key "seed.reaction")
df_reactions_bigg['seed.reaction'] = df_reactions_bigg['database_links'].apply(lambda dict_db_link:  dict_db_link['seed.reaction'] if 'seed.reaction' in dict_db_link.keys() else None)
df_reactions_bigg.loc[170:200]

Unnamed: 0,bigg_id,name,reaction_string,model_list,database_links,old_bigg_ids,kegg.reaction,ec-code,seed.reaction
170,12DGR181tipp,"1,2 diacylglycerol transport via flipping (per...",12dgr181_p <-> 12dgr181_c,iYS1720; iEC1344_C; iEC1364_W; iEC1349_Crooks;...,{'metanetx.reaction': 'MNXR94681'},12DGR181tipp,,,
171,12PPDStex,"(S)-Propane-1,2-diol transport via diffusion (...",12ppd__S_e <-> 12ppd__S_p,iEC1364_W; iEC1372_W3110; iEC1356_Bl21DE3; iEC...,{'metanetx.reaction': 'MNXR94684'},12PPDStex,,,
172,14GLUCANabcpp,"1,4-alpha-D-glucan transport via ABC system (p...",atp_c + h2o_c + 14glucan_p <-> 14glucan_c + ad...,iYS1720; iEC1356_Bl21DE3; iEC1349_Crooks; iEC1...,{'metanetx.reaction': 'MNXR94693'},14GLUCANabcpp,,,
173,23CCMPtex,23cCMP transport via diffusion (extracellular ...,23ccmp_e <-> 23ccmp_p,iEC1356_Bl21DE3; iEC1364_W; iEC1349_Crooks; iE...,"{'metanetx.reaction': 'MNXR94720', 'seed.react...",23CCMPtex,,,rxn07926
174,23CGMPtex,23cGMP transport via diffusion (extracellular ...,23cgmp_e <-> 23cgmp_p,iPC815; iECBD_1354; iE2348C_1286; iEcDH1_1363;...,"{'metanetx.reaction': 'MNXR94721', 'seed.react...",23CGMPtex,,,rxn07927
175,23PDE2pp,"2',3'-cyclic-nucleotide phosphodiesterase (UMP...",23cump_p + h2o_p <-> 3ump_p + h_p,iEC1364_W; iEC1368_DH5a; iEC1372_W3110; iEC135...,"{'rhea': '27881', 'ec-code': '3.1.4.16', 'bioc...",23PDE2pp,R03538,3.1.4.16,rxn02522
176,23PDE9pp,"2',3'-cyclic-nucleotide phosphodiesterase (GMP...",23cgmp_p + h2o_p <-> 3gmp_p + h_p,iEcHS_1320; iEcE24377_1341; iECS88_1305; iECOK...,"{'rhea': '27861', 'ec-code': '3.1.4.16', 'bioc...",23PDE9pp,R05135,3.1.4.16,rxn03483
177,2AGPA120tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C12:0) tr...,2ddecg3p_p <-> 2ddecg3p_c,iEC1344_C; iEC1372_W3110; iEC1364_W; iEC1349_C...,"{'metanetx.reaction': 'MNXR94743', 'seed.react...",2AGPA120tipp,,,rxn07932
178,2AGPA140tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C14:0) tr...,2tdecg3p_p <-> 2tdecg3p_c,iBWG_1329; iEC042_1314; iEC55989_1330; iAF1260...,"{'metanetx.reaction': 'MNXR94744', 'seed.react...",2AGPA140tipp,,,rxn07933
179,2AGPA180tipp,2-Acyl-sn-glycero-3-phosphatidate (n-C18:0) tr...,2odecg3p_p <-> 2odecg3p_c,iEC1356_Bl21DE3; iEC1349_Crooks; iEC1364_W; iE...,"{'metanetx.reaction': 'MNXR94748', 'seed.react...",2AGPA180tipp,,,rxn07937


# ModelSeed Reactions

In [None]:
# Load ModelSeed Reaction Table

seed_reactions_url = 'https://raw.githubusercontent.com/ModelSEED/ModelSEEDDatabase/master/Biochemistry/reactions.tsv'
df_reactions_seed = pd.read_csv(seed_reactions_url, sep='\t')

df_reactions_seed.head(25)

Unnamed: 0,id,abbreviation,name,code,stoichiometry,is_transport,equation,definition,reversibility,direction,abstract_reaction,pathways,aliases,ec_numbers,deltag,deltagerr,compound_ids,status,is_obsolete,linked_reaction,notes,source
0,rxn00001,R00004,diphosphate phosphohydrolase,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00012:0:0:""PPi"";2:...",0,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,(1) H2O[0] + (1) PPi[0] <=> (2) Phosphate[0] +...,>,=,,MetaCyc: Degradation (Degradation/Utilization/...,AraCyc: INORGPYROPHOSPHAT-RXN|BiGG: IPP1; PPA;...,3.6.1.1,-3.46,0.05,cpd00001;cpd00009;cpd00012;cpd00067,OK,0,rxn27946;rxn27947;rxn27948;rxn32487;rxn38157;r...,GCC|HB|EQC|EQU,Primary Database
1,rxn00002,R00005,urea-1-carboxylate amidohydrolase,(1) cpd00001[0] + (1) cpd00742[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-3:cpd00067:0:0:""H+"";-1:...",0,(1) cpd00001[0] + (3) cpd00067[0] + (1) cpd007...,(1) H2O[0] + (3) H+[0] + (1) Allophanate[0] =>...,>,>,,MetaCyc: ALLANTOINDEG-PWY (superpathway of all...,AraCyc: ALLOPHANATE-HYDROLASE-RXN|BiGG: ALPHNH...,3.5.1.54,-20.14,1.86,cpd00001;cpd00011;cpd00013;cpd00067;cpd00742,OK,0,rxn30346;rxn35525,GCC|EQC|EQU,Primary Database
2,rxn00003,R00006,pyruvate:pyruvate acetaldehydetransferase (dec...,(1) cpd00011[0] + (1) cpd00668[0] <=> (2) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd00668:0:0:""ALCTT"";...",0,(1) cpd00011[0] + (1) cpd00668[0] <= (2) cpd00...,(1) CO2[0] + (1) ALCTT[0] <= (2) Pyruvate[0] +...,<,<,,KEGG: rn00770 (Pantothenate and CoA biosynthesis),AlgaGEM: R_R00006_c|AraGEM: R_R00006_c|BiGG: I...,2.2.1.6,8.27,0.9,cpd00011;cpd00020;cpd00067;cpd00668,OK,0,rxn30144;rxn33164,GCC|EQC|EQU,Primary Database
3,rxn00004,R00008,4-hydroxy-4-methyl-2-oxoglutarate pyruvate-lya...,(1) cpd02570[0] <=> (2) cpd00020[0],"-1:cpd02570:0:0:""Parapyruvate"";2:cpd00020:0:0:...",0,(1) cpd02570[0] <=> (2) cpd00020[0],(1) Parapyruvate[0] <=> (2) Pyruvate[0],=,=,,KEGG: rn00362 (Benzoate degradation); rn00660 ...,KEGG: R00008|Name: 4-Hydroxy-4-methyl-2-oxoglu...,4.1.3.17,4.49,0.57,cpd00020;cpd02570,OK,0,,GCC|EQC|EQU,Primary Database
4,rxn00006,R00009,hydrogen-peroxide:hydrogen-peroxide oxidoreduc...,(2) cpd00025[0] <=> (2) cpd00001[0] + (1) cpd0...,"-2:cpd00025:0:0:""H2O2"";2:cpd00001:0:0:""H2O"";1:...",0,(2) cpd00025[0] => (2) cpd00001[0] + (1) cpd00...,(2) H2O2[0] => (2) H2O[0] + (1) O2[0],>,>,,MetaCyc: C1-COMPOUNDS (C1 Compound Utilization...,AraCyc: CATAL-RXN|BiGG: CAT; CATp; CTA1; CTT1|...,1.11.1.21|1.11.1.6,-46.06,1.64,cpd00001;cpd00007;cpd00025,OK,0,rxn19264;rxn22404;rxn27744;rxn31381,GCC|EQC|EQU,Primary Database
5,rxn00007,R00010,"alpha,alpha-trehalose glucohydrolase",(1) cpd00001[0] + (1) cpd00794[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00794:0:0:""TRHL"";2...",0,(1) cpd00001[0] + (1) cpd00794[0] => (2) cpd00...,(1) H2O[0] + (1) TRHL[0] => (2) D-Glucose[0],=,>,,KEGG: rn00500 (Starch and sucrose metabolism),AlgaGEM: R_R00010_c|AraGEM: R_R00010_c|BiGG: A...,3.2.1.28,-2.79,0.26,cpd00001;cpd00027;cpd00794,OK,0,rxn29977;rxn30219;rxn30442,GCC|EQC|EQU,Primary Database
6,rxn00008,R00011,Mn(II):hydrogen-peroxide oxidoreductase,(2) cpd00001[0] <=> (1) cpd00025[0],"-2:cpd00001:0:0:""H2O"";1:cpd00025:0:0:""H2O2"";2:...",0,(2) cpd00001[0] <=> (1) cpd00025[0] + (2) cpd0...,(2) H2O[0] <=> (1) H2O2[0] + (2) H+[0],<,=,,MetaCyc: Degradation (Degradation/Utilization/...,KEGG: R00011|MetaCyc: MANGANESE-PEROXIDASE-RXN...,1.11.1.13,62.53,0.8,cpd00001;cpd00025;cpd00067,CI:2,0,,GCC|EQC|EQU,Primary Database
7,rxn00009,R00012,GTP:GTP guanylyltransferase,(2) cpd00038[0] <=> (1) cpd00012[0] + (1) cpd0...,"-2:cpd00038:0:0:""GTP"";1:cpd00012:0:0:""PPi"";1:c...",0,(2) cpd00038[0] => (1) cpd00012[0] + (1) cpd00...,(2) GTP[0] => (1) PPi[0] + (1) H+[0] + (1) Gpp...,=,>,,,KEGG: R00012|MetaCyc: 2.7.7.45-RXN|TS_Athalian...,2.7.7.45,2.7,1.52,cpd00012;cpd00038;cpd00067;cpd00925,OK,0,rxn33150;rxn33152,GCC|HB|EQC|EQU,Primary Database
8,rxn00010,R00013,glyoxylate carboxy-lyase (dimerizing; tartrona...,(2) cpd00040[0] <=> (1) cpd00011[0] + (1) cpd0...,"-2:cpd00040:0:0:""Glyoxalate"";-1:cpd00067:0:0:""...",0,(2) cpd00040[0] + (1) cpd00067[0] => (1) cpd00...,(2) Glyoxalate[0] + (1) H+[0] => (1) CO2[0] + ...,=,>,,MetaCyc: Alcohol-Degradation (Alcohol Degradat...,BiGG: GLXCBL; GLXCL|EcoCyc: GLYOCARBOLIG-RXN|K...,4.1.1.47,-4.57,1.08,cpd00011;cpd00040;cpd00067;cpd00843,OK,0,,GCC|EQC|EQU,Primary Database
9,rxn00011,R00014,pyruvate:thiamin diphosphate acetaldehydetrans...,(1) cpd00011[0] + (1) cpd03049[0] <=> (1) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd03049:0:0:""2-Hydro...",0,(1) cpd00011[0] + (1) cpd03049[0] => (1) cpd00...,(1) CO2[0] + (1) 2-Hydroxyethyl-ThPP[0] => (1)...,<,>,,KEGG: rn00010 (Glycolysis / Gluconeogenesis); ...,AraCyc: RXN-12583|CornCyc: RXN-12583|EcoCyc: R...,1.2.4.1|2.2.1.6|4.1.1.1,7.23,0.84,cpd00011;cpd00020;cpd00056;cpd00067;cpd03049,OK,0,rxn31360;rxn31361,GCC|EQC|EQU,Primary Database


In [None]:
#Convert string of alias pairs into dictionaries of keys and value - 
# 1) split by "|" to sepertae the different key - value pairs
# 2) split by ":" to seperate keys and values
df_reactions_seed['aliases'] = df_reactions_seed['aliases'].apply(lambda aliases_str: 
                                                                      {alias.split(':',1)[0]: alias.split(':',1)[-1] 
                                                                       for alias in aliases_str.split('|')} if isinstance(aliases_str, str) else {} )

# Some of the keys have values that are again a strings with multiple items that need to be splitted
# 3) split those  string by ';' and also remove leading and tailing white spaces 
df_reactions_seed['aliases'] = df_reactions_seed['aliases'].apply(lambda aliases_dict: 
                                                                      {alias_key:
                                                                       [alias_value.strip() for alias_value in alias_values.split(';')]
                                                                       for alias_key, alias_values in aliases_dict.items()}
                                                                      )

df_reactions_seed.head(25)

Unnamed: 0,id,abbreviation,name,code,stoichiometry,is_transport,equation,definition,reversibility,direction,abstract_reaction,pathways,aliases,ec_numbers,deltag,deltagerr,compound_ids,status,is_obsolete,linked_reaction,notes,source
0,rxn00001,R00004,diphosphate phosphohydrolase,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00012:0:0:""PPi"";2:...",0,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,(1) H2O[0] + (1) PPi[0] <=> (2) Phosphate[0] +...,>,=,,MetaCyc: Degradation (Degradation/Utilization/...,"{'AraCyc': ['INORGPYROPHOSPHAT-RXN'], 'BiGG': ...",3.6.1.1,-3.46,0.05,cpd00001;cpd00009;cpd00012;cpd00067,OK,0,rxn27946;rxn27947;rxn27948;rxn32487;rxn38157;r...,GCC|HB|EQC|EQU,Primary Database
1,rxn00002,R00005,urea-1-carboxylate amidohydrolase,(1) cpd00001[0] + (1) cpd00742[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-3:cpd00067:0:0:""H+"";-1:...",0,(1) cpd00001[0] + (3) cpd00067[0] + (1) cpd007...,(1) H2O[0] + (3) H+[0] + (1) Allophanate[0] =>...,>,>,,MetaCyc: ALLANTOINDEG-PWY (superpathway of all...,"{'AraCyc': ['ALLOPHANATE-HYDROLASE-RXN'], 'BiG...",3.5.1.54,-20.14,1.86,cpd00001;cpd00011;cpd00013;cpd00067;cpd00742,OK,0,rxn30346;rxn35525,GCC|EQC|EQU,Primary Database
2,rxn00003,R00006,pyruvate:pyruvate acetaldehydetransferase (dec...,(1) cpd00011[0] + (1) cpd00668[0] <=> (2) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd00668:0:0:""ALCTT"";...",0,(1) cpd00011[0] + (1) cpd00668[0] <= (2) cpd00...,(1) CO2[0] + (1) ALCTT[0] <= (2) Pyruvate[0] +...,<,<,,KEGG: rn00770 (Pantothenate and CoA biosynthesis),"{'AlgaGEM': ['R_R00006_c'], 'AraGEM': ['R_R000...",2.2.1.6,8.27,0.9,cpd00011;cpd00020;cpd00067;cpd00668,OK,0,rxn30144;rxn33164,GCC|EQC|EQU,Primary Database
3,rxn00004,R00008,4-hydroxy-4-methyl-2-oxoglutarate pyruvate-lya...,(1) cpd02570[0] <=> (2) cpd00020[0],"-1:cpd02570:0:0:""Parapyruvate"";2:cpd00020:0:0:...",0,(1) cpd02570[0] <=> (2) cpd00020[0],(1) Parapyruvate[0] <=> (2) Pyruvate[0],=,=,,KEGG: rn00362 (Benzoate degradation); rn00660 ...,"{'KEGG': ['R00008'], 'Name': ['4-Hydroxy-4-met...",4.1.3.17,4.49,0.57,cpd00020;cpd02570,OK,0,,GCC|EQC|EQU,Primary Database
4,rxn00006,R00009,hydrogen-peroxide:hydrogen-peroxide oxidoreduc...,(2) cpd00025[0] <=> (2) cpd00001[0] + (1) cpd0...,"-2:cpd00025:0:0:""H2O2"";2:cpd00001:0:0:""H2O"";1:...",0,(2) cpd00025[0] => (2) cpd00001[0] + (1) cpd00...,(2) H2O2[0] => (2) H2O[0] + (1) O2[0],>,>,,MetaCyc: C1-COMPOUNDS (C1 Compound Utilization...,"{'AraCyc': ['CATAL-RXN'], 'BiGG': ['CAT', 'CAT...",1.11.1.21|1.11.1.6,-46.06,1.64,cpd00001;cpd00007;cpd00025,OK,0,rxn19264;rxn22404;rxn27744;rxn31381,GCC|EQC|EQU,Primary Database
5,rxn00007,R00010,"alpha,alpha-trehalose glucohydrolase",(1) cpd00001[0] + (1) cpd00794[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00794:0:0:""TRHL"";2...",0,(1) cpd00001[0] + (1) cpd00794[0] => (2) cpd00...,(1) H2O[0] + (1) TRHL[0] => (2) D-Glucose[0],=,>,,KEGG: rn00500 (Starch and sucrose metabolism),"{'AlgaGEM': ['R_R00010_c'], 'AraGEM': ['R_R000...",3.2.1.28,-2.79,0.26,cpd00001;cpd00027;cpd00794,OK,0,rxn29977;rxn30219;rxn30442,GCC|EQC|EQU,Primary Database
6,rxn00008,R00011,Mn(II):hydrogen-peroxide oxidoreductase,(2) cpd00001[0] <=> (1) cpd00025[0],"-2:cpd00001:0:0:""H2O"";1:cpd00025:0:0:""H2O2"";2:...",0,(2) cpd00001[0] <=> (1) cpd00025[0] + (2) cpd0...,(2) H2O[0] <=> (1) H2O2[0] + (2) H+[0],<,=,,MetaCyc: Degradation (Degradation/Utilization/...,"{'KEGG': ['R00011'], 'MetaCyc': ['MANGANESE-PE...",1.11.1.13,62.53,0.8,cpd00001;cpd00025;cpd00067,CI:2,0,,GCC|EQC|EQU,Primary Database
7,rxn00009,R00012,GTP:GTP guanylyltransferase,(2) cpd00038[0] <=> (1) cpd00012[0] + (1) cpd0...,"-2:cpd00038:0:0:""GTP"";1:cpd00012:0:0:""PPi"";1:c...",0,(2) cpd00038[0] => (1) cpd00012[0] + (1) cpd00...,(2) GTP[0] => (1) PPi[0] + (1) H+[0] + (1) Gpp...,=,>,,,"{'KEGG': ['R00012'], 'MetaCyc': ['2.7.7.45-RXN...",2.7.7.45,2.7,1.52,cpd00012;cpd00038;cpd00067;cpd00925,OK,0,rxn33150;rxn33152,GCC|HB|EQC|EQU,Primary Database
8,rxn00010,R00013,glyoxylate carboxy-lyase (dimerizing; tartrona...,(2) cpd00040[0] <=> (1) cpd00011[0] + (1) cpd0...,"-2:cpd00040:0:0:""Glyoxalate"";-1:cpd00067:0:0:""...",0,(2) cpd00040[0] + (1) cpd00067[0] => (1) cpd00...,(2) Glyoxalate[0] + (1) H+[0] => (1) CO2[0] + ...,=,>,,MetaCyc: Alcohol-Degradation (Alcohol Degradat...,"{'BiGG': ['GLXCBL', 'GLXCL'], 'EcoCyc': ['GLYO...",4.1.1.47,-4.57,1.08,cpd00011;cpd00040;cpd00067;cpd00843,OK,0,,GCC|EQC|EQU,Primary Database
9,rxn00011,R00014,pyruvate:thiamin diphosphate acetaldehydetrans...,(1) cpd00011[0] + (1) cpd03049[0] <=> (1) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd03049:0:0:""2-Hydro...",0,(1) cpd00011[0] + (1) cpd03049[0] => (1) cpd00...,(1) CO2[0] + (1) 2-Hydroxyethyl-ThPP[0] => (1)...,<,>,,KEGG: rn00010 (Glycolysis / Gluconeogenesis); ...,"{'AraCyc': ['RXN-12583'], 'CornCyc': ['RXN-125...",1.2.4.1|2.2.1.6|4.1.1.1,7.23,0.84,cpd00011;cpd00020;cpd00056;cpd00067;cpd03049,OK,0,rxn31360;rxn31361,GCC|EQC|EQU,Primary Database


In [None]:
#All possible keys in ModelSEED aliases => seems like only KEGG, BiGG and AraCyc would make sense
#array(['AlgaGEM', 'AraCyc', 'AraGEM', 'BiGG', 'BrachyCyc', 'ChlamyCyc',
#       'CornCyc', 'DF_Athaliana', 'EcoCyc', 'JM_Creinhardtii',
#       'JP_Creinhardtii_MSB', 'JP_Creinhardtii_NMeth', 'KEGG', 'MaizeCyc',
#       'Maize_C4GEM', 'MetaCyc', 'Name', 'PlantCyc', 'PoplarCyc',
#       'RiceCyc', 'SorghumCyc', 'SoyCyc', 'TS_Athaliana', 'iAF1260',
#       'iAF692', 'iAG612', 'iAO358', 'iAbaylyiv4', 'iGT196', 'iIN800',
#       'iIT341', 'iJN746', 'iJR904', 'iMA945', 'iMEO21', 'iMM904',
#       'iMO1053-PAO1', 'iMO1056', 'iND750', 'iNJ661', 'iPS189', 'iRR1083',
#       'iRS1563', 'iRS1597', 'iSB619', 'iSO783', 'iYO844'], dtype='<U21')

In [None]:
#Get BiGG reaction ids
df_reactions_seed['BiGG'] = df_reactions_seed['aliases'].apply(lambda dict_aliases:  dict_aliases['BiGG'] if 'BiGG' in dict_aliases.keys() else None)
df_reactions_seed.head(25)

Unnamed: 0,id,abbreviation,name,code,stoichiometry,is_transport,equation,definition,reversibility,direction,abstract_reaction,pathways,aliases,ec_numbers,deltag,deltagerr,compound_ids,status,is_obsolete,linked_reaction,notes,source,BiGG
0,rxn00001,R00004,diphosphate phosphohydrolase,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00012:0:0:""PPi"";2:...",0,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,(1) H2O[0] + (1) PPi[0] <=> (2) Phosphate[0] +...,>,=,,MetaCyc: Degradation (Degradation/Utilization/...,"{'AraCyc': ['INORGPYROPHOSPHAT-RXN'], 'BiGG': ...",3.6.1.1,-3.46,0.05,cpd00001;cpd00009;cpd00012;cpd00067,OK,0,rxn27946;rxn27947;rxn27948;rxn32487;rxn38157;r...,GCC|HB|EQC|EQU,Primary Database,"[IPP1, PPA, PPA_1, PPAm]"
1,rxn00002,R00005,urea-1-carboxylate amidohydrolase,(1) cpd00001[0] + (1) cpd00742[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-3:cpd00067:0:0:""H+"";-1:...",0,(1) cpd00001[0] + (3) cpd00067[0] + (1) cpd007...,(1) H2O[0] + (3) H+[0] + (1) Allophanate[0] =>...,>,>,,MetaCyc: ALLANTOINDEG-PWY (superpathway of all...,"{'AraCyc': ['ALLOPHANATE-HYDROLASE-RXN'], 'BiG...",3.5.1.54,-20.14,1.86,cpd00001;cpd00011;cpd00013;cpd00067;cpd00742,OK,0,rxn30346;rxn35525,GCC|EQC|EQU,Primary Database,"[ALPHNH, DUR1_2]"
2,rxn00003,R00006,pyruvate:pyruvate acetaldehydetransferase (dec...,(1) cpd00011[0] + (1) cpd00668[0] <=> (2) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd00668:0:0:""ALCTT"";...",0,(1) cpd00011[0] + (1) cpd00668[0] <= (2) cpd00...,(1) CO2[0] + (1) ALCTT[0] <= (2) Pyruvate[0] +...,<,<,,KEGG: rn00770 (Pantothenate and CoA biosynthesis),"{'AlgaGEM': ['R_R00006_c'], 'AraGEM': ['R_R000...",2.2.1.6,8.27,0.9,cpd00011;cpd00020;cpd00067;cpd00668,OK,0,rxn30144;rxn33164,GCC|EQC|EQU,Primary Database,[ILV2_2]
3,rxn00004,R00008,4-hydroxy-4-methyl-2-oxoglutarate pyruvate-lya...,(1) cpd02570[0] <=> (2) cpd00020[0],"-1:cpd02570:0:0:""Parapyruvate"";2:cpd00020:0:0:...",0,(1) cpd02570[0] <=> (2) cpd00020[0],(1) Parapyruvate[0] <=> (2) Pyruvate[0],=,=,,KEGG: rn00362 (Benzoate degradation); rn00660 ...,"{'KEGG': ['R00008'], 'Name': ['4-Hydroxy-4-met...",4.1.3.17,4.49,0.57,cpd00020;cpd02570,OK,0,,GCC|EQC|EQU,Primary Database,
4,rxn00006,R00009,hydrogen-peroxide:hydrogen-peroxide oxidoreduc...,(2) cpd00025[0] <=> (2) cpd00001[0] + (1) cpd0...,"-2:cpd00025:0:0:""H2O2"";2:cpd00001:0:0:""H2O"";1:...",0,(2) cpd00025[0] => (2) cpd00001[0] + (1) cpd00...,(2) H2O2[0] => (2) H2O[0] + (1) O2[0],>,>,,MetaCyc: C1-COMPOUNDS (C1 Compound Utilization...,"{'AraCyc': ['CATAL-RXN'], 'BiGG': ['CAT', 'CAT...",1.11.1.21|1.11.1.6,-46.06,1.64,cpd00001;cpd00007;cpd00025,OK,0,rxn19264;rxn22404;rxn27744;rxn31381,GCC|EQC|EQU,Primary Database,"[CAT, CATp, CTA1, CTT1]"
5,rxn00007,R00010,"alpha,alpha-trehalose glucohydrolase",(1) cpd00001[0] + (1) cpd00794[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00794:0:0:""TRHL"";2...",0,(1) cpd00001[0] + (1) cpd00794[0] => (2) cpd00...,(1) H2O[0] + (1) TRHL[0] => (2) D-Glucose[0],=,>,,KEGG: rn00500 (Starch and sucrose metabolism),"{'AlgaGEM': ['R_R00010_c'], 'AraGEM': ['R_R000...",3.2.1.28,-2.79,0.26,cpd00001;cpd00027;cpd00794,OK,0,rxn29977;rxn30219;rxn30442,GCC|EQC|EQU,Primary Database,"[ATH1, NTH1, NTH2, TREH, TREHe, TREHpp, TREHv]"
6,rxn00008,R00011,Mn(II):hydrogen-peroxide oxidoreductase,(2) cpd00001[0] <=> (1) cpd00025[0],"-2:cpd00001:0:0:""H2O"";1:cpd00025:0:0:""H2O2"";2:...",0,(2) cpd00001[0] <=> (1) cpd00025[0] + (2) cpd0...,(2) H2O[0] <=> (1) H2O2[0] + (2) H+[0],<,=,,MetaCyc: Degradation (Degradation/Utilization/...,"{'KEGG': ['R00011'], 'MetaCyc': ['MANGANESE-PE...",1.11.1.13,62.53,0.8,cpd00001;cpd00025;cpd00067,CI:2,0,,GCC|EQC|EQU,Primary Database,
7,rxn00009,R00012,GTP:GTP guanylyltransferase,(2) cpd00038[0] <=> (1) cpd00012[0] + (1) cpd0...,"-2:cpd00038:0:0:""GTP"";1:cpd00012:0:0:""PPi"";1:c...",0,(2) cpd00038[0] => (1) cpd00012[0] + (1) cpd00...,(2) GTP[0] => (1) PPi[0] + (1) H+[0] + (1) Gpp...,=,>,,,"{'KEGG': ['R00012'], 'MetaCyc': ['2.7.7.45-RXN...",2.7.7.45,2.7,1.52,cpd00012;cpd00038;cpd00067;cpd00925,OK,0,rxn33150;rxn33152,GCC|HB|EQC|EQU,Primary Database,
8,rxn00010,R00013,glyoxylate carboxy-lyase (dimerizing; tartrona...,(2) cpd00040[0] <=> (1) cpd00011[0] + (1) cpd0...,"-2:cpd00040:0:0:""Glyoxalate"";-1:cpd00067:0:0:""...",0,(2) cpd00040[0] + (1) cpd00067[0] => (1) cpd00...,(2) Glyoxalate[0] + (1) H+[0] => (1) CO2[0] + ...,=,>,,MetaCyc: Alcohol-Degradation (Alcohol Degradat...,"{'BiGG': ['GLXCBL', 'GLXCL'], 'EcoCyc': ['GLYO...",4.1.1.47,-4.57,1.08,cpd00011;cpd00040;cpd00067;cpd00843,OK,0,,GCC|EQC|EQU,Primary Database,"[GLXCBL, GLXCL]"
9,rxn00011,R00014,pyruvate:thiamin diphosphate acetaldehydetrans...,(1) cpd00011[0] + (1) cpd03049[0] <=> (1) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd03049:0:0:""2-Hydro...",0,(1) cpd00011[0] + (1) cpd03049[0] => (1) cpd00...,(1) CO2[0] + (1) 2-Hydroxyethyl-ThPP[0] => (1)...,<,>,,KEGG: rn00010 (Glycolysis / Gluconeogenesis); ...,"{'AraCyc': ['RXN-12583'], 'CornCyc': ['RXN-125...",1.2.4.1|2.2.1.6|4.1.1.1,7.23,0.84,cpd00011;cpd00020;cpd00056;cpd00067;cpd03049,OK,0,rxn31360;rxn31361,GCC|EQC|EQU,Primary Database,


In [None]:
#Get BiGG reaction ids
df_reactions_seed['KEGG'] = df_reactions_seed['aliases'].apply(lambda dict_aliases:  dict_aliases['KEGG'] if 'KEGG' in dict_aliases.keys() else None)
df_reactions_seed.head(25)

Unnamed: 0,id,abbreviation,name,code,stoichiometry,is_transport,equation,definition,reversibility,direction,abstract_reaction,pathways,aliases,ec_numbers,deltag,deltagerr,compound_ids,status,is_obsolete,linked_reaction,notes,source,BiGG,KEGG
0,rxn00001,R00004,diphosphate phosphohydrolase,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00012:0:0:""PPi"";2:...",0,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,(1) H2O[0] + (1) PPi[0] <=> (2) Phosphate[0] +...,>,=,,MetaCyc: Degradation (Degradation/Utilization/...,"{'AraCyc': ['INORGPYROPHOSPHAT-RXN'], 'BiGG': ...",3.6.1.1,-3.46,0.05,cpd00001;cpd00009;cpd00012;cpd00067,OK,0,rxn27946;rxn27947;rxn27948;rxn32487;rxn38157;r...,GCC|HB|EQC|EQU,Primary Database,"[IPP1, PPA, PPA_1, PPAm]",[R00004]
1,rxn00002,R00005,urea-1-carboxylate amidohydrolase,(1) cpd00001[0] + (1) cpd00742[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-3:cpd00067:0:0:""H+"";-1:...",0,(1) cpd00001[0] + (3) cpd00067[0] + (1) cpd007...,(1) H2O[0] + (3) H+[0] + (1) Allophanate[0] =>...,>,>,,MetaCyc: ALLANTOINDEG-PWY (superpathway of all...,"{'AraCyc': ['ALLOPHANATE-HYDROLASE-RXN'], 'BiG...",3.5.1.54,-20.14,1.86,cpd00001;cpd00011;cpd00013;cpd00067;cpd00742,OK,0,rxn30346;rxn35525,GCC|EQC|EQU,Primary Database,"[ALPHNH, DUR1_2]",[R00005]
2,rxn00003,R00006,pyruvate:pyruvate acetaldehydetransferase (dec...,(1) cpd00011[0] + (1) cpd00668[0] <=> (2) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd00668:0:0:""ALCTT"";...",0,(1) cpd00011[0] + (1) cpd00668[0] <= (2) cpd00...,(1) CO2[0] + (1) ALCTT[0] <= (2) Pyruvate[0] +...,<,<,,KEGG: rn00770 (Pantothenate and CoA biosynthesis),"{'AlgaGEM': ['R_R00006_c'], 'AraGEM': ['R_R000...",2.2.1.6,8.27,0.9,cpd00011;cpd00020;cpd00067;cpd00668,OK,0,rxn30144;rxn33164,GCC|EQC|EQU,Primary Database,[ILV2_2],[R00006]
3,rxn00004,R00008,4-hydroxy-4-methyl-2-oxoglutarate pyruvate-lya...,(1) cpd02570[0] <=> (2) cpd00020[0],"-1:cpd02570:0:0:""Parapyruvate"";2:cpd00020:0:0:...",0,(1) cpd02570[0] <=> (2) cpd00020[0],(1) Parapyruvate[0] <=> (2) Pyruvate[0],=,=,,KEGG: rn00362 (Benzoate degradation); rn00660 ...,"{'KEGG': ['R00008'], 'Name': ['4-Hydroxy-4-met...",4.1.3.17,4.49,0.57,cpd00020;cpd02570,OK,0,,GCC|EQC|EQU,Primary Database,,[R00008]
4,rxn00006,R00009,hydrogen-peroxide:hydrogen-peroxide oxidoreduc...,(2) cpd00025[0] <=> (2) cpd00001[0] + (1) cpd0...,"-2:cpd00025:0:0:""H2O2"";2:cpd00001:0:0:""H2O"";1:...",0,(2) cpd00025[0] => (2) cpd00001[0] + (1) cpd00...,(2) H2O2[0] => (2) H2O[0] + (1) O2[0],>,>,,MetaCyc: C1-COMPOUNDS (C1 Compound Utilization...,"{'AraCyc': ['CATAL-RXN'], 'BiGG': ['CAT', 'CAT...",1.11.1.21|1.11.1.6,-46.06,1.64,cpd00001;cpd00007;cpd00025,OK,0,rxn19264;rxn22404;rxn27744;rxn31381,GCC|EQC|EQU,Primary Database,"[CAT, CATp, CTA1, CTT1]",[R00009]
5,rxn00007,R00010,"alpha,alpha-trehalose glucohydrolase",(1) cpd00001[0] + (1) cpd00794[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00794:0:0:""TRHL"";2...",0,(1) cpd00001[0] + (1) cpd00794[0] => (2) cpd00...,(1) H2O[0] + (1) TRHL[0] => (2) D-Glucose[0],=,>,,KEGG: rn00500 (Starch and sucrose metabolism),"{'AlgaGEM': ['R_R00010_c'], 'AraGEM': ['R_R000...",3.2.1.28,-2.79,0.26,cpd00001;cpd00027;cpd00794,OK,0,rxn29977;rxn30219;rxn30442,GCC|EQC|EQU,Primary Database,"[ATH1, NTH1, NTH2, TREH, TREHe, TREHpp, TREHv]","[R00010, R06103]"
6,rxn00008,R00011,Mn(II):hydrogen-peroxide oxidoreductase,(2) cpd00001[0] <=> (1) cpd00025[0],"-2:cpd00001:0:0:""H2O"";1:cpd00025:0:0:""H2O2"";2:...",0,(2) cpd00001[0] <=> (1) cpd00025[0] + (2) cpd0...,(2) H2O[0] <=> (1) H2O2[0] + (2) H+[0],<,=,,MetaCyc: Degradation (Degradation/Utilization/...,"{'KEGG': ['R00011'], 'MetaCyc': ['MANGANESE-PE...",1.11.1.13,62.53,0.8,cpd00001;cpd00025;cpd00067,CI:2,0,,GCC|EQC|EQU,Primary Database,,[R00011]
7,rxn00009,R00012,GTP:GTP guanylyltransferase,(2) cpd00038[0] <=> (1) cpd00012[0] + (1) cpd0...,"-2:cpd00038:0:0:""GTP"";1:cpd00012:0:0:""PPi"";1:c...",0,(2) cpd00038[0] => (1) cpd00012[0] + (1) cpd00...,(2) GTP[0] => (1) PPi[0] + (1) H+[0] + (1) Gpp...,=,>,,,"{'KEGG': ['R00012'], 'MetaCyc': ['2.7.7.45-RXN...",2.7.7.45,2.7,1.52,cpd00012;cpd00038;cpd00067;cpd00925,OK,0,rxn33150;rxn33152,GCC|HB|EQC|EQU,Primary Database,,[R00012]
8,rxn00010,R00013,glyoxylate carboxy-lyase (dimerizing; tartrona...,(2) cpd00040[0] <=> (1) cpd00011[0] + (1) cpd0...,"-2:cpd00040:0:0:""Glyoxalate"";-1:cpd00067:0:0:""...",0,(2) cpd00040[0] + (1) cpd00067[0] => (1) cpd00...,(2) Glyoxalate[0] + (1) H+[0] => (1) CO2[0] + ...,=,>,,MetaCyc: Alcohol-Degradation (Alcohol Degradat...,"{'BiGG': ['GLXCBL', 'GLXCL'], 'EcoCyc': ['GLYO...",4.1.1.47,-4.57,1.08,cpd00011;cpd00040;cpd00067;cpd00843,OK,0,,GCC|EQC|EQU,Primary Database,"[GLXCBL, GLXCL]",[R00013]
9,rxn00011,R00014,pyruvate:thiamin diphosphate acetaldehydetrans...,(1) cpd00011[0] + (1) cpd03049[0] <=> (1) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd03049:0:0:""2-Hydro...",0,(1) cpd00011[0] + (1) cpd03049[0] => (1) cpd00...,(1) CO2[0] + (1) 2-Hydroxyethyl-ThPP[0] => (1)...,<,>,,KEGG: rn00010 (Glycolysis / Gluconeogenesis); ...,"{'AraCyc': ['RXN-12583'], 'CornCyc': ['RXN-125...",1.2.4.1|2.2.1.6|4.1.1.1,7.23,0.84,cpd00011;cpd00020;cpd00056;cpd00067;cpd03049,OK,0,rxn31360;rxn31361,GCC|EQC|EQU,Primary Database,,[R00014]


In [None]:
#Get AraCyc reaction ids
df_reactions_seed['AraCyc'] = df_reactions_seed['aliases'].apply(lambda dict_aliases:  dict_aliases['AraCyc'] if 'AraCyc' in dict_aliases.keys() else None)
df_reactions_seed.head(25)

Unnamed: 0,id,abbreviation,name,code,stoichiometry,is_transport,equation,definition,reversibility,direction,abstract_reaction,pathways,aliases,ec_numbers,deltag,deltagerr,compound_ids,status,is_obsolete,linked_reaction,notes,source,BiGG,KEGG,AraCyc
0,rxn00001,R00004,diphosphate phosphohydrolase,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00012:0:0:""PPi"";2:...",0,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,(1) H2O[0] + (1) PPi[0] <=> (2) Phosphate[0] +...,>,=,,MetaCyc: Degradation (Degradation/Utilization/...,"{'AraCyc': ['INORGPYROPHOSPHAT-RXN'], 'BiGG': ...",3.6.1.1,-3.46,0.05,cpd00001;cpd00009;cpd00012;cpd00067,OK,0,rxn27946;rxn27947;rxn27948;rxn32487;rxn38157;r...,GCC|HB|EQC|EQU,Primary Database,"[IPP1, PPA, PPA_1, PPAm]",[R00004],[INORGPYROPHOSPHAT-RXN]
1,rxn00002,R00005,urea-1-carboxylate amidohydrolase,(1) cpd00001[0] + (1) cpd00742[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-3:cpd00067:0:0:""H+"";-1:...",0,(1) cpd00001[0] + (3) cpd00067[0] + (1) cpd007...,(1) H2O[0] + (3) H+[0] + (1) Allophanate[0] =>...,>,>,,MetaCyc: ALLANTOINDEG-PWY (superpathway of all...,"{'AraCyc': ['ALLOPHANATE-HYDROLASE-RXN'], 'BiG...",3.5.1.54,-20.14,1.86,cpd00001;cpd00011;cpd00013;cpd00067;cpd00742,OK,0,rxn30346;rxn35525,GCC|EQC|EQU,Primary Database,"[ALPHNH, DUR1_2]",[R00005],[ALLOPHANATE-HYDROLASE-RXN]
2,rxn00003,R00006,pyruvate:pyruvate acetaldehydetransferase (dec...,(1) cpd00011[0] + (1) cpd00668[0] <=> (2) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd00668:0:0:""ALCTT"";...",0,(1) cpd00011[0] + (1) cpd00668[0] <= (2) cpd00...,(1) CO2[0] + (1) ALCTT[0] <= (2) Pyruvate[0] +...,<,<,,KEGG: rn00770 (Pantothenate and CoA biosynthesis),"{'AlgaGEM': ['R_R00006_c'], 'AraGEM': ['R_R000...",2.2.1.6,8.27,0.9,cpd00011;cpd00020;cpd00067;cpd00668,OK,0,rxn30144;rxn33164,GCC|EQC|EQU,Primary Database,[ILV2_2],[R00006],
3,rxn00004,R00008,4-hydroxy-4-methyl-2-oxoglutarate pyruvate-lya...,(1) cpd02570[0] <=> (2) cpd00020[0],"-1:cpd02570:0:0:""Parapyruvate"";2:cpd00020:0:0:...",0,(1) cpd02570[0] <=> (2) cpd00020[0],(1) Parapyruvate[0] <=> (2) Pyruvate[0],=,=,,KEGG: rn00362 (Benzoate degradation); rn00660 ...,"{'KEGG': ['R00008'], 'Name': ['4-Hydroxy-4-met...",4.1.3.17,4.49,0.57,cpd00020;cpd02570,OK,0,,GCC|EQC|EQU,Primary Database,,[R00008],
4,rxn00006,R00009,hydrogen-peroxide:hydrogen-peroxide oxidoreduc...,(2) cpd00025[0] <=> (2) cpd00001[0] + (1) cpd0...,"-2:cpd00025:0:0:""H2O2"";2:cpd00001:0:0:""H2O"";1:...",0,(2) cpd00025[0] => (2) cpd00001[0] + (1) cpd00...,(2) H2O2[0] => (2) H2O[0] + (1) O2[0],>,>,,MetaCyc: C1-COMPOUNDS (C1 Compound Utilization...,"{'AraCyc': ['CATAL-RXN'], 'BiGG': ['CAT', 'CAT...",1.11.1.21|1.11.1.6,-46.06,1.64,cpd00001;cpd00007;cpd00025,OK,0,rxn19264;rxn22404;rxn27744;rxn31381,GCC|EQC|EQU,Primary Database,"[CAT, CATp, CTA1, CTT1]",[R00009],[CATAL-RXN]
5,rxn00007,R00010,"alpha,alpha-trehalose glucohydrolase",(1) cpd00001[0] + (1) cpd00794[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00794:0:0:""TRHL"";2...",0,(1) cpd00001[0] + (1) cpd00794[0] => (2) cpd00...,(1) H2O[0] + (1) TRHL[0] => (2) D-Glucose[0],=,>,,KEGG: rn00500 (Starch and sucrose metabolism),"{'AlgaGEM': ['R_R00010_c'], 'AraGEM': ['R_R000...",3.2.1.28,-2.79,0.26,cpd00001;cpd00027;cpd00794,OK,0,rxn29977;rxn30219;rxn30442,GCC|EQC|EQU,Primary Database,"[ATH1, NTH1, NTH2, TREH, TREHe, TREHpp, TREHv]","[R00010, R06103]",
6,rxn00008,R00011,Mn(II):hydrogen-peroxide oxidoreductase,(2) cpd00001[0] <=> (1) cpd00025[0],"-2:cpd00001:0:0:""H2O"";1:cpd00025:0:0:""H2O2"";2:...",0,(2) cpd00001[0] <=> (1) cpd00025[0] + (2) cpd0...,(2) H2O[0] <=> (1) H2O2[0] + (2) H+[0],<,=,,MetaCyc: Degradation (Degradation/Utilization/...,"{'KEGG': ['R00011'], 'MetaCyc': ['MANGANESE-PE...",1.11.1.13,62.53,0.8,cpd00001;cpd00025;cpd00067,CI:2,0,,GCC|EQC|EQU,Primary Database,,[R00011],
7,rxn00009,R00012,GTP:GTP guanylyltransferase,(2) cpd00038[0] <=> (1) cpd00012[0] + (1) cpd0...,"-2:cpd00038:0:0:""GTP"";1:cpd00012:0:0:""PPi"";1:c...",0,(2) cpd00038[0] => (1) cpd00012[0] + (1) cpd00...,(2) GTP[0] => (1) PPi[0] + (1) H+[0] + (1) Gpp...,=,>,,,"{'KEGG': ['R00012'], 'MetaCyc': ['2.7.7.45-RXN...",2.7.7.45,2.7,1.52,cpd00012;cpd00038;cpd00067;cpd00925,OK,0,rxn33150;rxn33152,GCC|HB|EQC|EQU,Primary Database,,[R00012],
8,rxn00010,R00013,glyoxylate carboxy-lyase (dimerizing; tartrona...,(2) cpd00040[0] <=> (1) cpd00011[0] + (1) cpd0...,"-2:cpd00040:0:0:""Glyoxalate"";-1:cpd00067:0:0:""...",0,(2) cpd00040[0] + (1) cpd00067[0] => (1) cpd00...,(2) Glyoxalate[0] + (1) H+[0] => (1) CO2[0] + ...,=,>,,MetaCyc: Alcohol-Degradation (Alcohol Degradat...,"{'BiGG': ['GLXCBL', 'GLXCL'], 'EcoCyc': ['GLYO...",4.1.1.47,-4.57,1.08,cpd00011;cpd00040;cpd00067;cpd00843,OK,0,,GCC|EQC|EQU,Primary Database,"[GLXCBL, GLXCL]",[R00013],
9,rxn00011,R00014,pyruvate:thiamin diphosphate acetaldehydetrans...,(1) cpd00011[0] + (1) cpd03049[0] <=> (1) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd03049:0:0:""2-Hydro...",0,(1) cpd00011[0] + (1) cpd03049[0] => (1) cpd00...,(1) CO2[0] + (1) 2-Hydroxyethyl-ThPP[0] => (1)...,<,>,,KEGG: rn00010 (Glycolysis / Gluconeogenesis); ...,"{'AraCyc': ['RXN-12583'], 'CornCyc': ['RXN-125...",1.2.4.1|2.2.1.6|4.1.1.1,7.23,0.84,cpd00011;cpd00020;cpd00056;cpd00067;cpd03049,OK,0,rxn31360;rxn31361,GCC|EQC|EQU,Primary Database,,[R00014],[RXN-12583]


In [None]:
# 'ec_numbers' should also be convert into a list 
df_reactions_seed['ec_numbers'] = df_reactions_seed['ec_numbers'].astype(str)
df_reactions_seed['ec_numbers'] = df_reactions_seed['ec_numbers'].apply(lambda x: x.split('|') if x else None)
df_reactions_seed.head(25)

Unnamed: 0,id,abbreviation,name,code,stoichiometry,is_transport,equation,definition,reversibility,direction,abstract_reaction,pathways,aliases,ec_numbers,deltag,deltagerr,compound_ids,status,is_obsolete,linked_reaction,notes,source,BiGG,KEGG,AraCyc
0,rxn00001,R00004,diphosphate phosphohydrolase,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00012:0:0:""PPi"";2:...",0,(1) cpd00001[0] + (1) cpd00012[0] <=> (2) cpd0...,(1) H2O[0] + (1) PPi[0] <=> (2) Phosphate[0] +...,>,=,,MetaCyc: Degradation (Degradation/Utilization/...,"{'AraCyc': ['INORGPYROPHOSPHAT-RXN'], 'BiGG': ...",[3.6.1.1],-3.46,0.05,cpd00001;cpd00009;cpd00012;cpd00067,OK,0,rxn27946;rxn27947;rxn27948;rxn32487;rxn38157;r...,GCC|HB|EQC|EQU,Primary Database,"[IPP1, PPA, PPA_1, PPAm]",[R00004],[INORGPYROPHOSPHAT-RXN]
1,rxn00002,R00005,urea-1-carboxylate amidohydrolase,(1) cpd00001[0] + (1) cpd00742[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-3:cpd00067:0:0:""H+"";-1:...",0,(1) cpd00001[0] + (3) cpd00067[0] + (1) cpd007...,(1) H2O[0] + (3) H+[0] + (1) Allophanate[0] =>...,>,>,,MetaCyc: ALLANTOINDEG-PWY (superpathway of all...,"{'AraCyc': ['ALLOPHANATE-HYDROLASE-RXN'], 'BiG...",[3.5.1.54],-20.14,1.86,cpd00001;cpd00011;cpd00013;cpd00067;cpd00742,OK,0,rxn30346;rxn35525,GCC|EQC|EQU,Primary Database,"[ALPHNH, DUR1_2]",[R00005],[ALLOPHANATE-HYDROLASE-RXN]
2,rxn00003,R00006,pyruvate:pyruvate acetaldehydetransferase (dec...,(1) cpd00011[0] + (1) cpd00668[0] <=> (2) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd00668:0:0:""ALCTT"";...",0,(1) cpd00011[0] + (1) cpd00668[0] <= (2) cpd00...,(1) CO2[0] + (1) ALCTT[0] <= (2) Pyruvate[0] +...,<,<,,KEGG: rn00770 (Pantothenate and CoA biosynthesis),"{'AlgaGEM': ['R_R00006_c'], 'AraGEM': ['R_R000...",[2.2.1.6],8.27,0.9,cpd00011;cpd00020;cpd00067;cpd00668,OK,0,rxn30144;rxn33164,GCC|EQC|EQU,Primary Database,[ILV2_2],[R00006],
3,rxn00004,R00008,4-hydroxy-4-methyl-2-oxoglutarate pyruvate-lya...,(1) cpd02570[0] <=> (2) cpd00020[0],"-1:cpd02570:0:0:""Parapyruvate"";2:cpd00020:0:0:...",0,(1) cpd02570[0] <=> (2) cpd00020[0],(1) Parapyruvate[0] <=> (2) Pyruvate[0],=,=,,KEGG: rn00362 (Benzoate degradation); rn00660 ...,"{'KEGG': ['R00008'], 'Name': ['4-Hydroxy-4-met...",[4.1.3.17],4.49,0.57,cpd00020;cpd02570,OK,0,,GCC|EQC|EQU,Primary Database,,[R00008],
4,rxn00006,R00009,hydrogen-peroxide:hydrogen-peroxide oxidoreduc...,(2) cpd00025[0] <=> (2) cpd00001[0] + (1) cpd0...,"-2:cpd00025:0:0:""H2O2"";2:cpd00001:0:0:""H2O"";1:...",0,(2) cpd00025[0] => (2) cpd00001[0] + (1) cpd00...,(2) H2O2[0] => (2) H2O[0] + (1) O2[0],>,>,,MetaCyc: C1-COMPOUNDS (C1 Compound Utilization...,"{'AraCyc': ['CATAL-RXN'], 'BiGG': ['CAT', 'CAT...","[1.11.1.21, 1.11.1.6]",-46.06,1.64,cpd00001;cpd00007;cpd00025,OK,0,rxn19264;rxn22404;rxn27744;rxn31381,GCC|EQC|EQU,Primary Database,"[CAT, CATp, CTA1, CTT1]",[R00009],[CATAL-RXN]
5,rxn00007,R00010,"alpha,alpha-trehalose glucohydrolase",(1) cpd00001[0] + (1) cpd00794[0] <=> (2) cpd0...,"-1:cpd00001:0:0:""H2O"";-1:cpd00794:0:0:""TRHL"";2...",0,(1) cpd00001[0] + (1) cpd00794[0] => (2) cpd00...,(1) H2O[0] + (1) TRHL[0] => (2) D-Glucose[0],=,>,,KEGG: rn00500 (Starch and sucrose metabolism),"{'AlgaGEM': ['R_R00010_c'], 'AraGEM': ['R_R000...",[3.2.1.28],-2.79,0.26,cpd00001;cpd00027;cpd00794,OK,0,rxn29977;rxn30219;rxn30442,GCC|EQC|EQU,Primary Database,"[ATH1, NTH1, NTH2, TREH, TREHe, TREHpp, TREHv]","[R00010, R06103]",
6,rxn00008,R00011,Mn(II):hydrogen-peroxide oxidoreductase,(2) cpd00001[0] <=> (1) cpd00025[0],"-2:cpd00001:0:0:""H2O"";1:cpd00025:0:0:""H2O2"";2:...",0,(2) cpd00001[0] <=> (1) cpd00025[0] + (2) cpd0...,(2) H2O[0] <=> (1) H2O2[0] + (2) H+[0],<,=,,MetaCyc: Degradation (Degradation/Utilization/...,"{'KEGG': ['R00011'], 'MetaCyc': ['MANGANESE-PE...",[1.11.1.13],62.53,0.8,cpd00001;cpd00025;cpd00067,CI:2,0,,GCC|EQC|EQU,Primary Database,,[R00011],
7,rxn00009,R00012,GTP:GTP guanylyltransferase,(2) cpd00038[0] <=> (1) cpd00012[0] + (1) cpd0...,"-2:cpd00038:0:0:""GTP"";1:cpd00012:0:0:""PPi"";1:c...",0,(2) cpd00038[0] => (1) cpd00012[0] + (1) cpd00...,(2) GTP[0] => (1) PPi[0] + (1) H+[0] + (1) Gpp...,=,>,,,"{'KEGG': ['R00012'], 'MetaCyc': ['2.7.7.45-RXN...",[2.7.7.45],2.7,1.52,cpd00012;cpd00038;cpd00067;cpd00925,OK,0,rxn33150;rxn33152,GCC|HB|EQC|EQU,Primary Database,,[R00012],
8,rxn00010,R00013,glyoxylate carboxy-lyase (dimerizing; tartrona...,(2) cpd00040[0] <=> (1) cpd00011[0] + (1) cpd0...,"-2:cpd00040:0:0:""Glyoxalate"";-1:cpd00067:0:0:""...",0,(2) cpd00040[0] + (1) cpd00067[0] => (1) cpd00...,(2) Glyoxalate[0] + (1) H+[0] => (1) CO2[0] + ...,=,>,,MetaCyc: Alcohol-Degradation (Alcohol Degradat...,"{'BiGG': ['GLXCBL', 'GLXCL'], 'EcoCyc': ['GLYO...",[4.1.1.47],-4.57,1.08,cpd00011;cpd00040;cpd00067;cpd00843,OK,0,,GCC|EQC|EQU,Primary Database,"[GLXCBL, GLXCL]",[R00013],
9,rxn00011,R00014,pyruvate:thiamin diphosphate acetaldehydetrans...,(1) cpd00011[0] + (1) cpd03049[0] <=> (1) cpd0...,"-1:cpd00011:0:0:""CO2"";-1:cpd03049:0:0:""2-Hydro...",0,(1) cpd00011[0] + (1) cpd03049[0] => (1) cpd00...,(1) CO2[0] + (1) 2-Hydroxyethyl-ThPP[0] => (1)...,<,>,,KEGG: rn00010 (Glycolysis / Gluconeogenesis); ...,"{'AraCyc': ['RXN-12583'], 'CornCyc': ['RXN-125...","[1.2.4.1, 2.2.1.6, 4.1.1.1]",7.23,0.84,cpd00011;cpd00020;cpd00056;cpd00067;cpd03049,OK,0,rxn31360;rxn31361,GCC|EQC|EQU,Primary Database,,[R00014],[RXN-12583]


# Mapping AraCore and BiGG




## Using KEGG Ids

In [None]:
#Check if kegg_id of the aracore reactions is in any of the list of kegg ids mapped in the BiGG table
df_reactions_aracore['is_bigg_kegg_id'] = df_reactions_aracore['kegg_id'].apply(lambda kegg_id: ((df_reactions_bigg['kegg.reaction'] == kegg_id).sum() > 0) if isinstance(kegg_id, str) else False)

#get the BiGG ids where kegg ids are matching
df_reactions_aracore['kegg_bigg_ids'] = df_reactions_aracore[['kegg_id','is_bigg_kegg_id']].apply(lambda x: 
                                                                                            df_reactions_bigg[df_reactions_bigg['kegg.reaction'] == x[0]]['bigg_id'].tolist() 
                                                                                            if x[1] 
                                                                                            else [None], axis=1)
df_reactions_aracore.head(25)

Unnamed: 0,aracore_ids,aracore_name,aracore_annotations,aracore_updated_ids,aracore_updated_universal_ids,kegg_id,ec-code,is_bigg_kegg_id,kegg_bigg_ids
0,PSII_h,photosystem II,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSII_H,PSII,R09503,1.10.3.9,False,[None]
1,Cytb6f1_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F1_H,CYTB6F1,R03817,1.10.9.1,False,[None]
2,Cytb6f2_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F2_H,CYTB6F2,R03817,1.10.9.1,False,[None]
3,PGR5PGRL11_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL11_H,PGR5PGRL11,R03817,,False,[None]
4,PGR5PGRL12_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL12_H,PGR5PGRL12,R03817,,False,[None]
5,NDH1_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH1_H,NDH1,R03817,,False,[None]
6,NDH2_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH2_H,NDH2,R03817,,False,[None]
7,PSI_h,photosystem I,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSI_H,PSI,R09542,1.97.1.12,False,[None]
8,Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k...",FD_DASH_NADPR_H,FD_DASH_NADPR,R01195,1.18.1.2,True,[FNOR]
9,ATPase_h,ATPase,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",ATPASE_H,ATPASE,R00086,3.6.3.14,True,"[ATPM, NTP1, ATPPHm, ATPS]"


In [None]:
df_reactions_aracore['is_bigg_kegg_id'].value_counts() #=> 249 reactions mapped

False    323
True     249
Name: is_bigg_kegg_id, dtype: int64

## Using Ec-code

In [None]:
#Check if ec-code of the aracore reactions is in any of the list of ec-code mapped in the BiGG table
df_reactions_aracore['is_bigg_ec_code'] = df_reactions_aracore['ec-code'].apply(lambda kegg_id: ((df_reactions_bigg['ec-code'] == kegg_id).sum() > 0) if isinstance(kegg_id, str) else False)

#get the ec-code ids where kegg ids are matching
df_reactions_aracore['ec_bigg_ids'] = df_reactions_aracore[['ec-code','is_bigg_ec_code']].apply(lambda x: 
                                                                                            df_reactions_bigg[df_reactions_bigg['ec-code'] == x[0]]['bigg_id'].tolist() 
                                                                                            if x[1] 
                                                                                            else [None], axis=1)
df_reactions_aracore.head(50)

Unnamed: 0,aracore_ids,aracore_name,aracore_annotations,aracore_updated_ids,aracore_updated_universal_ids,kegg_id,ec-code,is_bigg_kegg_id,kegg_bigg_ids,is_bigg_ec_code,ec_bigg_ids
0,PSII_h,photosystem II,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSII_H,PSII,R09503,1.10.3.9,False,[None],False,[None]
1,Cytb6f1_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F1_H,CYTB6F1,R03817,1.10.9.1,False,[None],False,[None]
2,Cytb6f2_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F2_H,CYTB6F2,R03817,1.10.9.1,False,[None],False,[None]
3,PGR5PGRL11_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL11_H,PGR5PGRL11,R03817,,False,[None],False,[None]
4,PGR5PGRL12_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL12_H,PGR5PGRL12,R03817,,False,[None],False,[None]
5,NDH1_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH1_H,NDH1,R03817,,False,[None],False,[None]
6,NDH2_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH2_H,NDH2,R03817,,False,[None],False,[None]
7,PSI_h,photosystem I,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSI_H,PSI,R09542,1.97.1.12,False,[None],False,[None]
8,Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k...",FD_DASH_NADPR_H,FD_DASH_NADPR,R01195,1.18.1.2,True,[FNOR],True,"[FRDO, FNOR]"
9,ATPase_h,ATPase,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",ATPASE_H,ATPASE,R00086,3.6.3.14,True,"[ATPM, NTP1, ATPPHm, ATPS]",True,"[ATPS4rpp, ATPS3m, ATPasel, ATPS3g, ATPS3v, AT..."


In [None]:
df_reactions_aracore['is_bigg_ec_code'].value_counts() #=> 249 reactions mapped 

False    309
True     263
Name: is_bigg_ec_code, dtype: int64

## Aggregate all BiGG ids from mapping of KEGG ids and Ec-codes

In [None]:
#Fuse list of mapped BiGG ids by kegg and ec code 
df_reactions_aracore['kegg_ec_bigg_id'] = df_reactions_aracore[['ec_bigg_ids','kegg_bigg_ids']].apply(lambda x: list(filter(None, x[0]+x[1])), axis=1)

#Create list of unique BiGG ids
df_reactions_aracore['kegg_ec_bigg_id'].apply(lambda x: list(np.unique(x)))

#Clean dataframe and remove cols
df_reactions_aracore.drop(['is_bigg_ec_code','is_bigg_kegg_id','ec_bigg_ids','kegg_bigg_ids'], axis=1, inplace=True)

df_reactions_aracore.head(50)

Unnamed: 0,aracore_ids,aracore_name,aracore_annotations,aracore_updated_ids,aracore_updated_universal_ids,kegg_id,ec-code,kegg_ec_bigg_id
0,PSII_h,photosystem II,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSII_H,PSII,R09503,1.10.3.9,[]
1,Cytb6f1_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F1_H,CYTB6F1,R03817,1.10.9.1,[]
2,Cytb6f2_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F2_H,CYTB6F2,R03817,1.10.9.1,[]
3,PGR5PGRL11_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL11_H,PGR5PGRL11,R03817,,[]
4,PGR5PGRL12_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL12_H,PGR5PGRL12,R03817,,[]
5,NDH1_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH1_H,NDH1,R03817,,[]
6,NDH2_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH2_H,NDH2,R03817,,[]
7,PSI_h,photosystem I,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSI_H,PSI,R09542,1.97.1.12,[]
8,Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k...",FD_DASH_NADPR_H,FD_DASH_NADPR,R01195,1.18.1.2,"[FRDO, FNOR, FNOR]"
9,ATPase_h,ATPase,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",ATPASE_H,ATPASE,R00086,3.6.3.14,"[ATPS4rpp, ATPS3m, ATPasel, ATPS3g, ATPS3v, AT..."


In [None]:
(df_reactions_aracore['kegg_ec_bigg_id'].apply(len) > 1).value_counts() #284 have mapped BiGG ids

False    288
True     284
Name: kegg_ec_bigg_id, dtype: int64

## Add SEED Reaction ids that are known from the BiGG Table

> Indented block




In [None]:
#Add all ModelSeed ids that have already been mapped to BiGG ids in the Bigg Table
df_reactions_aracore['bigg_seed_id'] = df_reactions_aracore['kegg_ec_bigg_id'].apply(lambda bigg_id_list: 
                                      [df_reactions_bigg[df_reactions_bigg['bigg_id'] == bigg_id]['seed.reaction'] for bigg_id in bigg_id_list] if bigg_id_list else [[None]] )
#Process packed lists of ModelSEED Ids
df_reactions_aracore['bigg_seed_id'] = df_reactions_aracore['bigg_seed_id'].apply(lambda x: [item_x  for sub_x in x for item_x in sub_x] )

#Create list of unique ModelSEED ids
df_reactions_aracore['bigg_seed_id'] = df_reactions_aracore['bigg_seed_id'].apply(lambda x: list(np.unique(list(filter(None,x)))))

In [None]:
df_reactions_aracore.head(50)

Unnamed: 0,aracore_ids,aracore_name,aracore_annotations,aracore_updated_ids,aracore_updated_universal_ids,kegg_id,ec-code,kegg_ec_bigg_id,bigg_seed_id
0,PSII_h,photosystem II,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSII_H,PSII,R09503,1.10.3.9,[],[]
1,Cytb6f1_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F1_H,CYTB6F1,R03817,1.10.9.1,[],[]
2,Cytb6f2_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F2_H,CYTB6F2,R03817,1.10.9.1,[],[]
3,PGR5PGRL11_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL11_H,PGR5PGRL11,R03817,,[],[]
4,PGR5PGRL12_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL12_H,PGR5PGRL12,R03817,,[],[]
5,NDH1_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH1_H,NDH1,R03817,,[],[]
6,NDH2_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH2_H,NDH2,R03817,,[],[]
7,PSI_h,photosystem I,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSI_H,PSI,R09542,1.97.1.12,[],[]
8,Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k...",FD_DASH_NADPR_H,FD_DASH_NADPR,R01195,1.18.1.2,"[FRDO, FNOR, FNOR]","[rxn05937, rxn14159]"
9,ATPase_h,ATPase,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",ATPASE_H,ATPASE,R00086,3.6.3.14,"[ATPS4rpp, ATPS3m, ATPasel, ATPS3g, ATPS3v, AT...","[rxn10042, rxn11300, rxn13766]"


# Mapping AraCore and ModelSeed


## Using KEGG ids

In [None]:
def check_for_kegg_modelseed(kegg_id):
  return df_reactions_seed[df_reactions_seed['KEGG'].apply(lambda x: kegg_id in x if x else False)]['id'].tolist() 

#Map KEGG ids in the aracore table and the ModelSEED table to get additional Seed ids
df_reactions_aracore['kegg_seed_ids'] = df_reactions_aracore['kegg_id'].apply(lambda kegg_id: check_for_kegg_modelseed(kegg_id) if isinstance(kegg_id, str) else [None])

df_reactions_aracore.head(25)

Unnamed: 0,aracore_ids,aracore_name,aracore_annotations,aracore_updated_ids,aracore_updated_universal_ids,kegg_id,ec-code,kegg_ec_bigg_id,bigg_seed_id,kegg_seed_ids
0,PSII_h,photosystem II,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSII_H,PSII,R09503,1.10.3.9,[],[],"[rxn16345, rxn34264]"
1,Cytb6f1_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F1_H,CYTB6F1,R03817,1.10.9.1,[],[],"[rxn11995, rxn39426]"
2,Cytb6f2_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F2_H,CYTB6F2,R03817,1.10.9.1,[],[],"[rxn11995, rxn39426]"
3,PGR5PGRL11_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL11_H,PGR5PGRL11,R03817,,[],[],"[rxn11995, rxn39426]"
4,PGR5PGRL12_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL12_H,PGR5PGRL12,R03817,,[],[],"[rxn11995, rxn39426]"
5,NDH1_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH1_H,NDH1,R03817,,[],[],"[rxn11995, rxn39426]"
6,NDH2_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH2_H,NDH2,R03817,,[],[],"[rxn11995, rxn39426]"
7,PSI_h,photosystem I,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSI_H,PSI,R09542,1.97.1.12,[],[],[rxn16384]
8,Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k...",FD_DASH_NADPR_H,FD_DASH_NADPR,R01195,1.18.1.2,"[FRDO, FNOR, FNOR]","[rxn05937, rxn14159]",[rxn14159]
9,ATPase_h,ATPase,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",ATPASE_H,ATPASE,R00086,3.6.3.14,"[ATPS4rpp, ATPS3m, ATPasel, ATPS3g, ATPS3v, AT...","[rxn10042, rxn11300, rxn13766]","[rxn00062, rxn11300, rxn27586, rxn27587, rxn27..."


In [None]:
df_reactions_aracore['kegg_seed_ids'].apply(lambda x: isinstance(x,list)).value_counts() #319 reactions with ModelSeed Id

True    572
Name: kegg_seed_ids, dtype: int64

## Add BiGG Reaction ids that are known from the ModelSeed Table


In [None]:
#Add all BiGG ids that have already been mapped to ModelSEED ids in the ModelSEED Table
df_reactions_aracore['seed_bigg_id'] = df_reactions_aracore['kegg_seed_ids'].apply(lambda kegg_seed_ids: 
                                            [df_reactions_seed[df_reactions_seed['id'] == seed_id]['BiGG'].tolist() 
                                            for seed_id in kegg_seed_ids ]
                                            if isinstance(kegg_seed_ids, list) else None ) 

#Process packed lists of BiGG Ids
df_reactions_aracore['seed_bigg_id'] = df_reactions_aracore['seed_bigg_id'].apply(lambda x: [item_x  for sub_x in x for item_x in sub_x] if x else None )

#Create list of unique BiGG Ids
df_reactions_aracore['seed_bigg_id'] = df_reactions_aracore['seed_bigg_id'].apply(lambda x: list(np.unique(list(filter(None,x)))) if x else [None])

In [None]:
df_reactions_aracore.head(50)

Unnamed: 0,aracore_ids,aracore_name,aracore_annotations,aracore_updated_ids,aracore_updated_universal_ids,kegg_id,ec-code,kegg_ec_bigg_id,bigg_seed_id,kegg_seed_ids,seed_bigg_id
0,PSII_h,photosystem II,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSII_H,PSII,R09503,1.10.3.9,[],[],"[rxn16345, rxn34264]",[]
1,Cytb6f1_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F1_H,CYTB6F1,R03817,1.10.9.1,[],[],"[rxn11995, rxn39426]",[]
2,Cytb6f2_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F2_H,CYTB6F2,R03817,1.10.9.1,[],[],"[rxn11995, rxn39426]",[]
3,PGR5PGRL11_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL11_H,PGR5PGRL11,R03817,,[],[],"[rxn11995, rxn39426]",[]
4,PGR5PGRL12_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL12_H,PGR5PGRL12,R03817,,[],[],"[rxn11995, rxn39426]",[]
5,NDH1_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH1_H,NDH1,R03817,,[],[],"[rxn11995, rxn39426]",[]
6,NDH2_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH2_H,NDH2,R03817,,[],[],"[rxn11995, rxn39426]",[]
7,PSI_h,photosystem I,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSI_H,PSI,R09542,1.97.1.12,[],[],[rxn16384],[]
8,Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k...",FD_DASH_NADPR_H,FD_DASH_NADPR,R01195,1.18.1.2,"[FRDO, FNOR, FNOR]","[rxn05937, rxn14159]",[rxn14159],[]
9,ATPase_h,ATPase,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",ATPASE_H,ATPASE,R00086,3.6.3.14,"[ATPS4rpp, ATPS3m, ATPasel, ATPS3g, ATPS3v, AT...","[rxn10042, rxn11300, rxn13766]","[rxn00062, rxn11300, rxn27586, rxn27587, rxn27...","[ATPM, ATPOBJ, FATP, NTP1, U214]"


# Aggregate Seed and BiGG Ids

In [None]:
#Fuse list of mapped BiGG ids and create list of unique ids
df_reactions_aracore['bigg_id_aggr'] = df_reactions_aracore[['seed_bigg_id','kegg_ec_bigg_id']].apply(lambda x: list(filter(None, x[0]+x[1])), axis=1)
df_reactions_aracore['bigg_id_aggr'] = df_reactions_aracore['bigg_id_aggr'].apply(lambda x: list(np.unique(x)))

#Fuse list of mapped Seeds ids and create list of unique ids
df_reactions_aracore['seed_id_aggr'] = df_reactions_aracore[['kegg_seed_ids','bigg_seed_id']].apply(lambda x: list(filter(None, x[0]+x[1])), axis=1)
df_reactions_aracore['seed_id_aggr'] = df_reactions_aracore['seed_id_aggr'].apply(lambda x: list(np.unique(x)))

#Clean dataframe and drop cols
df_reactions_aracore.drop(['kegg_ec_bigg_id', 'bigg_seed_id', 'kegg_seed_ids', 'seed_bigg_id'], axis=1, inplace=True)

df_reactions_aracore.head(50)

Unnamed: 0,aracore_ids,aracore_name,aracore_annotations,aracore_updated_ids,aracore_updated_universal_ids,kegg_id,ec-code,bigg_id_aggr,seed_id_aggr
0,PSII_h,photosystem II,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSII_H,PSII,R09503,1.10.3.9,[],"[rxn16345, rxn34264]"
1,Cytb6f1_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F1_H,CYTB6F1,R03817,1.10.9.1,[],"[rxn11995, rxn39426]"
2,Cytb6f2_h,cytochrom b6f complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",CYTB6F2_H,CYTB6F2,R03817,1.10.9.1,[],"[rxn11995, rxn39426]"
3,PGR5PGRL11_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL11_H,PGR5PGRL11,R03817,,[],"[rxn11995, rxn39426]"
4,PGR5PGRL12_h,proton gradient regulation 5 (PGR5)/PGR5-like ...,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",PGR5PGRL12_H,PGR5PGRL12,R03817,,[],"[rxn11995, rxn39426]"
5,NDH1_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH1_H,NDH1,R03817,,[],"[rxn11995, rxn39426]"
6,NDH2_h,NADH dehydrogenase-like (NDH) complex,"{'doi': '10.1016/j.tplants.2011.10.004', 'go':...",NDH2_H,NDH2,R03817,,[],"[rxn11995, rxn39426]"
7,PSI_h,photosystem I,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",PSI_H,PSI,R09542,1.97.1.12,[],[rxn16384]
8,Fd_DASH_NADPR_h,ferredoxin-NADP reductase,"{'ec-code': '1.18.1.2', 'go': 'GO:0019684', 'k...",FD_DASH_NADPR_H,FD_DASH_NADPR,R01195,1.18.1.2,"[FNOR, FRDO]","[rxn05937, rxn14159]"
9,ATPase_h,ATPase,"{'doi': '10.1016/j.tplants.2011.10.004', 'ec-c...",ATPASE_H,ATPASE,R00086,3.6.3.14,"[ATPM, ATPOBJ, ATPPHm, ATPS, ATPS3g, ATPS3m, A...","[rxn00062, rxn10042, rxn11300, rxn13766, rxn27..."


In [None]:
(df_reactions_aracore['bigg_id_aggr'].apply(len) != 0).value_counts() #298 reactions have at least one bigg id

True     298
False    274
Name: bigg_id_aggr, dtype: int64

In [None]:
(df_reactions_aracore['seed_id_aggr'].apply(len) != 0).value_counts() #320 reactions have at least one seed id

True     320
False    252
Name: seed_id_aggr, dtype: int64

In [None]:
#Export final mapping table for manual mapping
df_reactions_aracore.to_csv('drive/MyDrive/2021-05-31-reactions-mapping-table.csv') 