In [1]:
import cobra
import pandas as pd

In [2]:
model = cobra.io.read_sbml_model("../results/reconstructions/gapfilled_pto.xml")

The purpose of this notebook is to use the gapfilled model for making growth predictions on single carbon sources. To do this, I will need to generate media conditions using the 192 carbon sources and the Biolog salts. Then, I need to run FBA and see if the model loaded is able to produce growth given the single carbon source. This will be a simple yes no to growth.

In [3]:
model.optimize()

Unnamed: 0,fluxes,reduced_costs
rxn00001_c,0.000000,-4.140411e-02
rxn00002_c,0.000000,8.126409e-18
rxn00011_c,0.000000,2.877414e-17
rxn00012_c,0.000000,-0.000000e+00
rxn00020_c,0.000000,-2.268485e-17
...,...,...
rxn25741_c,5.659114,1.489635e-17
rxn00688_c,-1.417322,-9.825617e-18
rxn08710_c,1.414779,1.002002e-16
rxn08231_c,-1.329231,-1.760676e-17


In [12]:
#define the base biolog media first
#first is the base composition, these are the salts that are necessary for growth, but alone do not contribute to growth
#/ minimally so
biolog_salts = pd.read_csv("../data/biolog_base_composition.csv")
print (biolog_salts)

         Name          ID
0         H2O  cpd00001_e
1          O2  cpd00007_e
2   Phosphate  cpd00009_e
3         CO2  cpd00011_e
4         NH3  cpd00013_e
5        Mn2+  cpd00030_e
6        Zn2+  cpd00034_e
7     Sulfate  cpd00048_e
8        Cu2+  cpd00058_e
9        Ca2+  cpd00063_e
10         H+  cpd00067_e
11        Cl-  cpd00099_e
12       Co2+  cpd00149_e
13         K+  cpd00205_e
14         Mg  cpd00254_e
15        Na+  cpd00971_e
16       Fe2+  cpd10515_e
17        fe3  cpd10516_e


In [13]:
biolog_medias = pd.read_csv("../data/biologplates.csv")
print (biolog_medias)

                                Name          ID
0                         m-Inositol  cpd00121_e
1           gamma-Amino Butyric Acid  cpd00281_e
2                        D-Galactose  cpd00108_e
3                        L-Glutamine  cpd00053_e
4                          L-Proline  cpd00129_e
5    D-Galactonic Acid-gamma-Lactone  cpd02143_e
6                         D-Mannitol  cpd00314_e
7                          D-Mannose  cpd00138_e
8                    alpha-D-Glucose  cpd00027_e
9                        L-Arabinose  cpd00224_e
10                           Sucrose  cpd00076_e
11               L-Pyroglutamic Acid  cpd01293_e
12                   L-Glutamic Acid  cpd00023_e
13                          D-Xylose  cpd00154_e
14                         L-Alanine  cpd00035_e
15                           Uridine  cpd00249_e
16                   D-Gluconic Acid  cpd00222_e
17                        Mucic Acid  cpd00652_e
18                      D-malic acid  cpd00386_e
19                  

For each metabolite in biolog_medias dataframe, I will create a list of the single carbon source, as well as all of the metabolites present in biolog_salts dataframe. With each of these lists, I will open only those metabolites' exchange reactions, allowing flux of those metabolites into the model.  

In [14]:
biolog_compositions = {}
biolog_salts_list = []
for i in biolog_salts.itertuples():
    biolog_salts_list.append(i.ID)
biolog_medias_list = []    
for i in biolog_medias.itertuples(): 
    biolog_medias_list.append(i.ID)

#add the metabolite of choice 
for i in biolog_medias_list:
    m = [i]
    media_and_salts = biolog_salts_list + m
    #print (media_and_salts)
    print (media_and_salts)

    #now I have a list including all of the salts and one carbon source, add to a dictionary of the one carbon ID and the list of salts and carbon
    
    if i in media_and_salts:
        biolog_compositions[i] = media_and_salts
    else:
        pass
    
#to ensure that the list of media sources does not add on to the end of the list, use list concatation to extend the list instead of appending the original list 


['cpd00001_e', 'cpd00007_e', 'cpd00009_e', 'cpd00011_e', 'cpd00013_e', 'cpd00030_e', 'cpd00034_e', 'cpd00048_e', 'cpd00058_e', 'cpd00063_e', 'cpd00067_e', 'cpd00099_e', 'cpd00149_e', 'cpd00205_e', 'cpd00254_e', 'cpd00971_e', 'cpd10515_e', 'cpd10516_e', 'cpd00121_e']
['cpd00001_e', 'cpd00007_e', 'cpd00009_e', 'cpd00011_e', 'cpd00013_e', 'cpd00030_e', 'cpd00034_e', 'cpd00048_e', 'cpd00058_e', 'cpd00063_e', 'cpd00067_e', 'cpd00099_e', 'cpd00149_e', 'cpd00205_e', 'cpd00254_e', 'cpd00971_e', 'cpd10515_e', 'cpd10516_e', 'cpd00281_e']
['cpd00001_e', 'cpd00007_e', 'cpd00009_e', 'cpd00011_e', 'cpd00013_e', 'cpd00030_e', 'cpd00034_e', 'cpd00048_e', 'cpd00058_e', 'cpd00063_e', 'cpd00067_e', 'cpd00099_e', 'cpd00149_e', 'cpd00205_e', 'cpd00254_e', 'cpd00971_e', 'cpd10515_e', 'cpd10516_e', 'cpd00108_e']
['cpd00001_e', 'cpd00007_e', 'cpd00009_e', 'cpd00011_e', 'cpd00013_e', 'cpd00030_e', 'cpd00034_e', 'cpd00048_e', 'cpd00058_e', 'cpd00063_e', 'cpd00067_e', 'cpd00099_e', 'cpd00149_e', 'cpd00205_e', 'c

#check to see if the metabolites are in the model
for met in model.metabolites:
    if met.id in biolog_medias_list:
        #print (met.id + " is in the model")
    else:
        #print (met.id + " is not in biolog and salts media")


In [21]:
model.metabolites.cpd00308_e

0,1
Metabolite identifier,cpd00308_e
Name,Malonate
Memory address,0x0122e99da0
Formula,C3H2O4
Compartment,e
In 2 reaction(s),"rxn12432_c, EXcpd00308_e"


In [25]:
#I now have a dictionary that has: a key identifying the carbon source and a value that contains the carbon source and all the biolog salts
#now I want to iterate over the dictionary values(which are lists) and use them as the model medium
#as per https://cobrapy.readthedocs.io/en/latest/media.html
media_names = []
for name in biolog_medias_list:
    name = str(name) + "_medium"
    media_names.append(name)

with model:
    for key, value in biolog_compositions.items():
        medium = {}
        if key in model.metabolites:
            for met in value:
                medium["EX_" + met] = 1000
            #print (medium)
            model.medium = medium
            print(key + " produces " + str(model.optimize()) + " units of growth.")
        elif met in value:
            for met in value:
                medium["EX" + met] = 1000
            #print (medium)
            model.medium = medium
            print(key + " produces " + str(model.optimize()) + " units of growth.")
        else:
            print (key, " is not in the model" )
            pass
    #model.medium

cpd00121_e produces <Solution 0.000 at 0x122fb54a8> units of growth.
cpd00281_e produces <Solution 0.000 at 0x122fb5400> units of growth.
cpd00108_e produces <Solution 0.000 at 0x122fb5438> units of growth.
cpd00053_e produces <Solution 0.000 at 0x122fb5be0> units of growth.
cpd00129_e produces <Solution 0.000 at 0x122fb5ef0> units of growth.
cpd02143_e  is not in the model
cpd00314_e produces <Solution 0.000 at 0x122fb5c18> units of growth.
cpd00138_e produces <Solution 0.000 at 0x122fb54a8> units of growth.
cpd00027_e produces <Solution 0.000 at 0x122fb5c88> units of growth.
cpd00224_e produces <Solution 0.000 at 0x122fb5438> units of growth.
cpd00076_e produces <Solution 0.000 at 0x122fb5400> units of growth.
cpd01293_e produces <Solution 0.000 at 0x122fb5ef0> units of growth.
cpd00023_e produces <Solution 0.000 at 0x122fb5c18> units of growth.
cpd00154_e produces <Solution 0.000 at 0x122fb5be0> units of growth.
cpd00035_e produces <Solution 0.000 at 0x122fb5f28> units of growth.
cp

KeyError: 'EX_cpd00308_e'

In [43]:
print (media_names)

['cpd00121_e', 'cpd00281_e', 'cpd00108_e', 'cpd00053_e', 'cpd00129_e', 'cpd02143_e', 'cpd00314_e', 'cpd00138_e', 'cpd00027_e', 'cpd00224_e', 'cpd00076_e', 'cpd01293_e', 'cpd00023_e', 'cpd00154_e', 'cpd00035_e', 'cpd00249_e', 'cpd00222_e', 'cpd00652_e', 'cpd00386_e', 'cpd00117_e', 'cpd00246_e', 'cpd00132_e', 'cpd00054_e', 'cpd00082_e', 'cpd00571_e', 'cpd00105_e', 'cpd00477_e', 'cpd00164_e', 'cpd00137_e', 'cpd01307_e', 'cpd00248_e', 'cpd00130_e', 'cpd00106_e', 'cpd11585_e', 'cpd00041_e', 'cpd00119_e', 'cpd00280_e', 'cpd00020_e', 'cpd00036_e', 'cpd00100_e', 'cpd02351_e', 'cpd00051_e', 'cpd19018_e', 'cpd24420_e', 'cpd00107_e', 'cpd00432_e', 'cpd11748_e', 'cpd24418_e', 'cpd11601_e', 'cps11602_e', 'cpd23859_e', 'cpd00024_e', 'cpd13391_e', 'cpd00588_e', 'cpd00182_e', 'cpd00029_e', 'cpd28989_e', 'cpd00308_e', 'cpd24417_e', 'cpd00339_e', 'cpd00797_e', 'cpd13392_e', 'cpd00080_e', 'cpd01107_e', 'cpd00066_e', 'cpd00211_e', 'cpd00136_e', 'cpd24429_e', 'cpd00820_e', 'cpd01200_e', 'cpd00162_e', 'cpd0