# Introduction
In the previous notebooks and commits, we've fixed 60% of the mass imbalances present in the model. Here I will split the list of left over imbalances in two: one for Martyn to check by hand, the other I will do here. 


## Manual fixing of unbalanced metabolites


In [1]:
import cameo
import pandas as pd
import cobra.io
import escher

In [2]:
model = cobra.io.read_sbml_model('../model/g-thermo.xml')

In [3]:
unbalanced_id_new = ["DCTPT","THPPGLIG","SUCHY2","MHTPPT","DTTPT","AHETDYTTPHY","CELLBHY","TAL","SUCHY","OAACOLY","3HAD40_1","ATPACAT","BGLUCH","COOR","DXYL5PTST","SEPHCHCS","PYRPT","FBAf","SUCBZL","SCENLAT","DTPCUDCPPR","ECOAH9ir","GCCa","PYRACT","AMYLOSEGT","ACEDIA","HPI","SUCHMSSELCYSL","TALA","UPP1S","FICYTCCNOR","SELCYSTL","TKT2","HOXPRm","GCCb","THRPD","ADPGLCGT","ACt","LAC__Dt","FOCYTCCOR","FAPNTPAH","SUCCt","MBTCOAT","BCFASYN2","GTPHYDRO","VALDHr","TDP","MALACPAT","UGE","THRS","GLUTRS","TMDPPK","CO2t","UACMAMO","Ht","AKGDEHY","FRU1PT","GSPMDS","PRE3AOR","PGLHY","SSALx","ACCOAACT","AKGOR","NADHQOR","SSALy","ATPNAT","ACCOATT","SHSL1","STARCHHY","PRPAT","ARABINANHY","CDPDAGPT","PRPUT","PRPGT","PRPCT","23DK5MPPISO","STARCHHY2","AMYTRANS","STARCHGT","OPALMACPOR","HDEACPT","SHSL2","UDPGD","ARBTPT","TREPT","LAC__Lt","UAMAGS","BCFASYN","SBP","HMSD","GHMT","MNLPT","GALISO","GLCDPT","ASPSALY","AH6PISO","NH4t","ATPPT","GTHRDt","UAMAS","QH2t","UPP3MT","SALCNPT","TMPPP","DECACPOR","MYRACPOR","THMTPt","R05219","UPPDC1","MMACPAT","MHOPT","OMCDC","NADPt","CTPPCT","GAMPT","AMETLY","AADCOAT","ENCOAT","MANPT","UAGCVT","SHSL4h","SERD_L","DHNAOCTD","SELCYSLY","ATPTAT","LPROQOR","BTNLIG","ACDO","UAAAGGLAAT","STACHY2","COQOR","THMt","CELLOHYDRO","ACMUMPT","COBALT2t","MTRI","ETOHt","MDRPD","STACHY","PYDX5Pt","QH2OR","CLt","IVCOAR","ACTCO2L","QH2NOR","BCFASYN3","COH2OOR","MALTPT","ASN__Lt","GLUTRR","GLC__Dt","HEXACPOR","HEXACPAT","DATPT","BTN5AMPL","GTPT","MALQOR","FE2t","CTPT","OBO2OR","F1PPT","SELMELIG","LALDPOR","OH1t","PFKh","DGTPT","PHEAOR","UTPT","BFBP","OCTACPOR","PYRLLOR","O2t","SAM","SELCYSTGL"]
len(unbalanced_id_new)

174

In [4]:
#convert to list of reactions
unbalanced_rct_new = []
for rct in model.reactions:
    if rct.id in unbalanced_id_new:
        unbalanced_rct_new.append(rct)
    else:
        continue
len(unbalanced_rct_new)   

174

In [5]:
martyn_reactions = unbalanced_rct_new[0:86]
len(martyn_reactions)

86

In [6]:
viv_reactions = unbalanced_rct_new[87:173]
len(viv_reactions)

86

In [7]:
#make a csv file of both lists to use in analysis
martyn_id = []
martyn_name = []
for rct in martyn_reactions:
    martyn_id.append(rct.id)
    martyn_name.append(rct.name)


In [8]:
viv_id = []
viv_name = []
for rct in viv_reactions:
    viv_id.append(rct.id)
    viv_name.append(rct.name)

In [9]:
martyn_df = pd.DataFrame({'rct_id': martyn_id, 'rct_name':martyn_name})
viv_df = pd.DataFrame({'rct_id': viv_id, 'rct_name':viv_name})

In [11]:
#df to two csv files
martyn_df.to_csv('../databases/Martyn_unbalanced_rcts.csv')
viv_df.to_csv('../databases/Viv_unbalanced_rcts.csv')

From the list, I will just go through them all, inspect them and change what should be fixed. In some cases, it can be usefull to check how the reactions are stored in the E. coli iML1515 model. If some reactions cannot be fixed, I will keep them noted and discuss with Niko/Ben how to proceed with those cases further.

Every 5 reactions I will save & commit, to prevent too many changes from accumulating between commits.

In [7]:
model_e_coli = cameo.load_model("iML1515")
model_e_coli_MG1655 = cameo.load_model("iJO1366")

In [4]:
model = cobra.io.read_sbml_model('../model/g-thermo.xml')

In [202]:
model.reactions.AHETDYTTPHY.add_metabolites({model.metabolites.h_c: -3.0})

In [203]:
model.reactions.COOR.add_metabolites({model.metabolites.fcytb5612_c: 2.0, model.metabolites.h_c:1.0})

In [204]:
model.metabolites.fdp_B_c.formula = 'C6H10O12P2'

In [205]:
model.reactions.PFKh.add_metabolites({model.metabolites.h_c:3.0})

In [206]:
model.metabolites.f6p_B_c.formula = 'C6H11O9P'

In [207]:
model.reactions.SELCYSTGL.add_metabolites({model.metabolites.h_c:1.0})

In [208]:
model.reactions.SELCYSTL.add_metabolites({model.metabolites.h_c:1.0})

In [209]:
model.reactions.SUCHMSSELCYSL.add_metabolites({model.metabolites.h_c:1.0})

In [210]:
model.metabolites.uppg1_c.formula = 'C40H36N4O16'

In [217]:
model.reactions.FAPNTPAH.add_metabolites({model.metabolites.h_c:1.0})

In [219]:
#save & commit
cobra.io.write_sbml_model(model,'../model/g-thermo.xml')

In [3]:
#noticed water is named as oh1, which is not so useful so i cahnged it here.
model.metabolites.oh1_c.id = 'h2o_c'
model.metabolites.oh1_e.id = 'h2o_e'

AttributeError: DictList has no attribute or entry oh1_c

In [None]:
#save & commit
cobra.io.write_sbml_model(model,'../model/g-thermo.xml')

In [13]:
model.metabolites.arbt_c

0,1
Metabolite identifier,arbt_c
Name,Arbutin_C12H16O7
Memory address,0x0255e98c5c48
Formula,C12H16O7
Compartment,c
In 1 reaction(s),ARBTPT


In [6]:
model.reactions.ARBTPT.check_mass_balance()

{'charge': -1.0, 'O': 3.0, 'P': 1.0}

In [5]:
model.reactions.ARBTPT

0,1
Reaction identifier,ARBTPT
Name,R05132
Memory address,0x0255ea9376c8
Stoichiometry,arbt_c --> arbt6p_c  Arbutin_C12H16O7 --> Arbutin_6-phosphate_C12H16O10P
GPR,RTMO01762 or RTMO01416 or RTMO01045 or RTMO01418 or RTMO01651 or RTMO01493 or RTMO01633 or RTMO00...
Lower bound,0.0
Upper bound,20.0


In [168]:
for rct in rct_ahcys:
    if rct in rct_amet_c:
        print (rct)

HCYSMT: amet_c + hcys__L_c --> ahcys_c + h_c + met__L_c
DMQMT: 2omhmbl_c + amet_c --> ahcys_c + h_c + q8h2_c
UPP3MT: 2.0 amet_c + uppg3_c --> 2.0 ahcys_c + dscl_c + h_c
OMBZLM: 2ombzl_c + amet_c --> 2ommbl_c + ahcys_c + h_c
CFAS180G: 2.0 amet_c + pg181_c --> 2.0 ahcys_c + cpg180_c + 2.0 h_c
MALCOAMT: amet_c + malcoa_c --> ahcys_c + malcoame_c
CFAS160G: 2.0 amet_c + pg161_c --> 2.0 ahcys_c + cpg160_c + 2.0 h_c
CFAS160E: 2.0 amet_c + pe161_c --> 2.0 ahcys_c + cpe160_c + 2.0 h_c
AMMQLT8: 2dmmql8_c + amet_c --> ahcys_c + h_c + mql8_c
OHPHM: 2ohph_c + amet_c --> 2omph_c + ahcys_c + h_c
CFAS180E: 2.0 amet_c + pe181_c --> 2.0 ahcys_c + cpe180_c + 2.0 h_c
TEO2M: amet_c + teo2_c --> ahcys_c + mteo2_c
ACONMT: acon_T_c + amet_c --> aconm_c + ahcys_c


In [15]:
met_id = []
met_kegg = []
for met in model.metabolites:
    met_id.append(met.id)
    try:
        met_kegg.append(met.notes['KEGG'])
    except KeyError:
        met_kegg.append('--')

In [16]:
mets_df = pd.DataFrame({'ID': met_id, 'KEGG': met_kegg})
mets_df[0:5]

Unnamed: 0,ID,KEGG
0,pyridoxal_c,C00030
1,pydx5p_c,C00018
2,co2dam_c,C06504
3,adhlam_c,C16255
4,selmethtrna_c,C05336


In [21]:
mets_df.loc[mets_df['KEGG'] =='C00615']

Unnamed: 0,ID,KEGG


In [17]:
#for e. coli model
met_id_ecoli = []
met_kegg_ecoli = []
for met in model_e_coli.metabolites:
    met_id_ecoli.append(met.id)
    try:
        met_kegg_ecoli.append(met.annotation['kegg.reaction'])
    except KeyError:
        met_kegg_ecoli.append('--')

In [18]:
mets_ecoli_df = pd.DataFrame({'ID': met_id_ecoli, 'KEGG': met_kegg_ecoli})
mets_ecoli_df[0:5]

Unnamed: 0,ID,KEGG
0,octapb_c,--
1,cysi__L_e,--
2,dhap_c,--
3,prbatp_c,--
4,10fthf_c,--


In [22]:
mets_ecoli_df.loc[mets_ecoli_df['KEGG'] =='C00615']

Unnamed: 0,ID,KEGG
