In [29]:
import cobra as cb
import os
import pandas as pd
import csv
import sys
sys.path.append('/home/sergio/Dropbox/s/cthermgem-dev/')
import settings
os.chdir(os.path.join(settings.PROJECT_ROOT,'iCBI'))

In [32]:
icbi = cb.io.read_sbml_model(os.path.join(settings.PROJECT_ROOT,'iCBI','iCBI665_v3.sbml')) # v3 contains glucose equivalent uptake reactions
isg = cb.io.load_json_model(os.path.join(settings.PROJECT_ROOT,'iSG676','iSG676_cb.json'))

# Metabolites

In [33]:
c = pd.read_csv(os.path.join(settings.PROJECT_ROOT,'iCBI','id_map','Compounds_Aliases.tsv'), sep='\t')
c.head(2)

Unnamed: 0,MS ID,Old MS ID,External ID,Source
0,cpd00201,cpd00201,10fthf,BiGG1
1,cpd11255,,12d3k5m,BiGG1


In [34]:
c.Source.unique()

array(['BiGG1', 'KEGG', 'MetaCyc', 'PlantCyc', 'BiGG'], dtype=object)

There are 2 bigg sources, BiGG1 and BiGG. Implying BiGG is the most current. 

In [37]:
# Create mapping dictionaries
ms2bigg1 = {}
ms2bigg = {}
for i,r in c.iterrows():
    if r['Source'] == 'BiGG':
        ms2bigg[r['MS ID']] = r['External ID']
    elif r['Source'] == 'BiGG1':
        ms2bigg1[r['MS ID']] = r['External ID']


In [43]:
not_matched = []
ms2bigg_icbi = {}
for met in icbi.metabolites:
    mid = met.id
    mid_nc = mid[:-3]
    
    if mid_nc in ms2bigg:
        ms2bigg_icbi[mid_nc] = ms2bigg[mid_nc]
    elif mid_nc in ms2bigg1:
        ms2bigg_icbi[mid_nc] = ms2bigg1[mid_nc]
    else:
        ms2bigg_icbi[mid_nc] = mid_nc
        not_matched.append(mid_nc)
    
print('Not matched: ', len(not_matched))

Not matched:  107


In [51]:
names = []
for mid in ms2bigg_icbi.keys():
    if mid + '_c0' in icbi.metabolites:
        names.append(icbi.metabolites.get_by_id(mid +'_c0').name[:-3])
    elif mid + '_e0' in icbi.metabolites:
        names.append(icbi.metabolites.get_by_id(mid +'_e0').name[:-3])
    else:
        print(mid)

In [64]:
df = pd.DataFrame({'ms':list(ms2bigg_icbi.keys()), 'bigg':list(ms2bigg_icbi.values()), 'name_icbi':names})
df.head()

Unnamed: 0,bigg,ms,name_icbi
0,pi,cpd00009,Phosphate
1,h,cpd00067,H_plus_
2,oh1,cpd00001,H2O
3,ppi,cpd00012,PPi
4,co2,cpd00011,CO2


In [79]:
df.to_csv('bigg2ms_met_raw.csv',index=False) # Manual curation will be performed to add relevant unmatched metabolites

In [81]:
icbi.metabolites.cpd11424_c0

0,1
Metabolite identifier,cpd11424_c0
Name,cpd11424_c0
Memory address,0x07f9f9f89be10
Formula,
Compartment,c0
In 2 reaction(s),"DAGS_c0, MAGS_c0"


# Reactions
Reaction in iCBI, as downloaded from KBase do not follow the modelSEED IDs but rather use a modified bigg-based ID

In [115]:
def nfix(name):
    # removes differences in icbi naming
    name = name.replace('_c0','')
    name = name.replace('_e0','')
    return name

rxnid = [nfix(r.id) for r in icbi.reactions]

# Load curated map 
ms2bigg_met = {}
with open(os.path.join(settings.PROJECT_ROOT, 'iCBI','id_map', 'bigg2ms_met.csv'), 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        ms2bigg_met[row['ms']] = row['bigg']
        
def ex_remap(rid):
    # converts exchange reaction of the form EX_cpd00001_e0 into EX_h20_e and also EX_fuc__L_e_e0 into EX_fuc__L_e
    met_id = rid[3:-3]
    if met_id in ms2bigg_met:
        return 'EX_{}_e'.format(ms2bigg_met[met_id])
    else:
        return rid[:-3]    

In [116]:
bigg2ms_rxn = {}
unmatched_ms = []
for rxn in icbi.reactions:
    if rxn.id.startswith('EX_'):
        biggid = ex_remap(rxn.id)
    else:
        biggid = nfix(rxn.id)
    try:
        r_isg = isg.reactions.get_by_id(biggid)
        bigg2ms_rxn[r_isg.id] = rxn.id
    except KeyError:
        unmatched_ms.append(rxn.id)
        bigg2ms_rxn[rxn.id] = rxn.id
print('Unmatched', len(unmatched_ms))


Unmatched 5


In [117]:
names = [icbi.reactions.get_by_id(rid).name for rid in bigg2ms_rxn.values()]

In [118]:
df = pd.DataFrame({'bigg':list(bigg2ms_rxn.keys()), 'ms':list(bigg2ms_rxn.values()), 'names':names})
df.to_csv('bigg2ms_rxn_raw.csv',index=False) # Manual curation will be performed to add relevant unmatched metabolites
df.head()

Unnamed: 0,bigg,ms,names
0,PPA,PPA_c0,CustomReaction_c0
1,ACLS,ACLS_c0,pyruvate_pyruvate_acetaldehydetransferase_deca...
2,PDHam1hi,PDHam1hi_c0,pyruvate_thiamin_diphosphate_acetaldehydetrans...
3,ECH,ECH_c0,CustomReaction_c0
4,AHEXASE3,AHEXASE3_c0,chitobiose_N_acetylglucosaminohydrolase_c0


In [119]:
unmatched_ms

['PFK_3_ppi_c0', 'FRDx_c0', 'bio1', 'EX_cpd02701_c0', 'EX_cpd11416_c0']

The only unmatched reactions correspond to new additions to the model.