# Spore composition

In [1]:
from __future__ import print_function, division, absolute_import

import sys

import qminospy
from qminospy.me2 import ME_NLP

# python imports
from copy import copy
import re
from os.path import join, dirname, abspath
import sys
sys.path.append('/home/UCSD/cobra_utils')
from collections import defaultdict
import pickle

# third party imports
import pandas
import cobra
from tqdm import tqdm
import numpy as np
import scipy

# COBRAme
import cobrame
from cobrame.util import building, mu, me_model_interface
from cobrame.io.json import save_json_me_model, save_reduced_json_me_model

# ECOLIme
import ecolime
from ecolime import (transcription, translation, flat_files, generics, formulas, compartments)
from ecolime.util.helper_functions import *

%load_ext autoreload
%autoreload 2
print(cobra.__file__)
print(cobrame.__file__)
print(ecolime.__file__)
ecoli_files = dirname(abspath(ecolime.__file__))

  warn("Install lxml for faster SBML I/O")
  warn("cobra.io.sbml requires libsbml")


/home/jt/Downloads/cobrapy-0.5.11/cobra/__init__.py
/home/jt/UCSD/cobrame-master/cobrame/__init__.py
/home/jt/UCSD/bacillusme-master/ecolime/__init__.py


In [2]:
eco_directory = join(flat_files.ecoli_files_dir, 'iJO1366.json')
ijo_directory = join(flat_files.ecoli_files_dir, 'iYO844.json')
uni_directory = join(flat_files.ecoli_files_dir, 'universal_model.json')

eco = cobra.io.load_json_model(eco_directory)
bsub = cobra.io.load_json_model(ijo_directory)
uni = cobra.io.load_json_model(uni_directory)

bsub.optimize()

<Solution 0.12 at 0x7f53e4e52518>

In [3]:
with open(ecoli_files+'/me_models/solution.pickle', 'rb') as solution:
    me = pickle.load(solution)

### Composition dictionary
Spore and mother cell are currently impossible to properly separate to analyze chemically. Therefore, composition similarity has to be assumed to model the spore.

In [4]:
biomass_groups = ['protein','lipids','carbohydrates','DPA']
biomass_composition_df = pd.DataFrame(index=biomass_groups,columns=['mass_frac','breakdown'])

### Lipids

##### Lipid metabolites in the model:

In [6]:
keys = ['glycerol','Cardio','Dipico','glucosyl','ethanolamine']
for m in me.metabolites:
    if isinstance(m,cobrame.Metabolite) and any(k for k in keys if k in m.name):
        print(m.id,'\t',m.name)

12dag3p_BS_c 	 1 2 diacyl sn glycerol 3 phosphate C3436H6572O800P100
12dgr_BS_c 	 1 2 diacylglycerol C3436H6672O500
1ag3p_BS_c 	 1 Acyl sn glycerol 3 phsophate C1868H3636O700P100
3ig3p_c 	 C'-(3-Indolyl)-glycerol 3-phosphate
6pgg_c 	 6-Phospho-beta-D-glucosyl-(1,4)-D-glucose
DPA_c 	 Dipicolinate C7H3NO4
cdlp_BS_c 	 Cardiolipin  B subtilis  C7172H13744O1700P200
cdpdag_BS_c 	 CDPdiacylglycerol  B subtilis  C4336H7772N300O1500P200
cdpglyc_c 	 CDPglycerol C12H19N3O13P2
d12dg_BS_c 	 Diglucosyl 1 2 diacylglycerol C4636H8672O1500
eig3p_c 	 D-erythro-1-(Imidazol-4-yl)glycerol 3-phosphate
g3pe_c 	 Sn-Glycero-3-phosphoethanolamine
g3pg_c 	 Glycerophosphoglycerol
lysylpgly_BS_c 	 Lysylphophatidylglycerol C4336H8572N200O1100P100
m12dg_BS_c 	 Monoglucosyl 1 2 diacylglycerol C4036H7672O1000
pgly_BS_c 	 Phosphatidylglycerol  B subtils  C3736H7272O1000P100
psetha_BS_c 	 Phosphatidylethanolamine  B subtilis  C3636H7172N100O800P100
t12dg_BS_c 	 Triglucosyl 1 2 diacylglycerol C5236H9672O2000


##### Total lipid content of 4% from [ref](https://jb.asm.org/content/jb/99/1/298.full.pdf). Lipid breakdown from [ref](https://www.ncbi.nlm.nih.gov/pubmed/9422599).

In [7]:
# Total
biomass_composition_df.loc['lipids']=0.037
# Breakdown
temp_dict = {
    'm12dg_BS_c':0.08,
    'd12dg_BS_c':0.126,
    't12dg_BS_c':0.089,
    'cdlp_BS_c':0.008,
    'pgly_BS_c':0.162,
    'lysylpgly_BS_c':0.024,
    'psetha_BS_c':0.493,
}
temp_df = pd.DataFrame.from_dict({'mass_frac':temp_dict})

In [8]:
temp_df.div(temp_df.sum())

Unnamed: 0,mass_frac
cdlp_BS_c,0.008147
d12dg_BS_c,0.12831
lysylpgly_BS_c,0.02444
m12dg_BS_c,0.081466
pgly_BS_c,0.164969
psetha_BS_c,0.502037
t12dg_BS_c,0.090631


In [9]:
biomass_composition_df.loc['lipids']['breakdown'] = temp_df

### Protein

Protein content from [ref](https://books.google.com.mx/books?id=xU85I7R-kFsC&pg=PA296&lpg=PA296&dq=%22gross+composition+of+spores+and+vegetative+cells+of+bacillus+megaterium%22&source=bl&ots=zJcGxeZczb&sig=ACfU3U1Y8pGJK6-Bl9KLGJ4JQciul1sdIw&hl=en&sa=X&ved=2ahUKEwjDwZb0j9LoAhXDk60KHRPFCccQ6AEwAHoECAIQKQ#v=onepage&q=%22gross%20composition%20of%20spores%20and%20vegetative%20cells%20of%20bacillus%20megaterium%22&f=false). This ref is for B. megaterium, but there is no other option. Breakdown from [ref](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2106236/pdf/579.pdf).

In [10]:
# Total
biomass_composition_df.loc['protein']=0.68
# Breakdown
temp_dict = {
    'ala__L_c':0.099,
    'glu__L_c':0.01,
    'gly_c':0.235,
    'ser__L_c':0.101,
    'thr__L_c':0.069,
    'val__L_c':0.059,
    'leu__L_c':0.099/2,
    'ile__L_c':0.099/2,
    'phe__L_c':0.064,
    'tyr__L_c':0.102,
    'asp__L_c':0.098,
    'arg__L_c':0.061,
    'his__L_c':0.071,
    'lys__L_c':0.142,
}

In [11]:
for m in temp_dict.keys():
    met = me.metabolites.get_by_id(m)
    weight = met.formula_weight
    temp_dict[m] *= weight
temp_df = pd.DataFrame.from_dict({'mass_frac':temp_dict})

In [12]:
biomass_composition_df.loc['protein']['breakdown'] = temp_df.div(temp_df.sum())

### Dipicolinate (DPA)

DPA content from [ref](https://onlinelibrary.wiley.com/doi/abs/10.1002/food.19880321025).

In [13]:
biomass_composition_df.loc['DPA']['mass_frac'] = 0.178
biomass_composition_df.loc['DPA']['breakdown'] = pd.DataFrame.from_dict({'mass_frac':{'DPA_c':1.0}})

### Carbohydrates

Carbohydrate content from [ref](https://books.google.com.mx/books?id=xU85I7R-kFsC&pg=PA296&lpg=PA296&dq=%22gross+composition+of+spores+and+vegetative+cells+of+bacillus+megaterium%22&source=bl&ots=zJcGxeZczb&sig=ACfU3U1Y8pGJK6-Bl9KLGJ4JQciul1sdIw&hl=en&sa=X&ved=2ahUKEwjDwZb0j9LoAhXDk60KHRPFCccQ6AEwAHoECAIQKQ#v=onepage&q=%22gross%20composition%20of%20spores%20and%20vegetative%20cells%20of%20bacillus%20megaterium%22&f=false). Breakdown from ref

In [14]:
carb_frac = 0.048
biomass_composition_df.loc['carbohydrates']['mass_frac'] = carb_frac

In [15]:
# Fill with model carb composition
biomass_reaction = bsub.reactions.BIOMASS_BS_10
filtered_metabolite_list = []
carb_identifiers = ['glycogen','.+ose']
temp_dict = {}
for met in biomass_reaction.reactants:
        for carb_id in carb_identifiers:
            carb_regex = re.compile(carb_id)
            if re.match(carb_regex,met.id) or re.match(carb_regex,met.name):
                filtered_metabolite_list.append(met)
                weight = met.formula_weight/1000
                coefficient = biomass_reaction.metabolites[met]
                temp_dict[met.id] = -weight*coefficient
                break
temp_df = pd.DataFrame.from_dict({'mass_frac':temp_dict})

In [16]:
biomass_composition_df.loc['carbohydrates']['breakdown'] = temp_df.div(temp_df.sum())

In [17]:
biomass_composition_df['mass_frac'] = biomass_composition_df['mass_frac'].div(biomass_composition_df['mass_frac'].sum())

## Global

In [18]:
gf = []
gc = []
mets = []
names = []
classes = []
for group,bd in biomass_composition_df['breakdown'].items():
    mass_frac = biomass_composition_df.loc[group]['mass_frac']
    for m,rel_frac in bd['mass_frac'].items():
        global_frac = mass_frac*rel_frac # g/gDW
        met = bsub.metabolites.get_by_id(m)
        weight = met.formula_weight /1000 # g/mmol
        coeff = global_frac/weight
        mets.append(m)
        gf.append(global_frac)
        gc.append(coeff)
        names.append(met.name)
        classes.append(group)
        
global_composition_df = pd.DataFrame(index=mets)
global_composition_df['name'] = names
global_composition_df['class'] = classes
global_composition_df['mass_frac'] = gf
global_composition_df['coeff'] = gc

global_composition_df

Unnamed: 0,name,class,mass_frac,coeff
ala__L_c,L-Alanine,protein,0.042049,0.471971
arg__L_c,L-Arginine,protein,0.050953,0.290811
asp__L_c,L-Aspartate,protein,0.061715,0.467204
glu__L_c,L-Glutamate,protein,0.006966,0.047674
gly_c,Glycine,protein,0.0841,1.120336
his__L_c,L-Histidine,protein,0.052517,0.338484
ile__L_c,L-Isoleucine,protein,0.030955,0.235986
leu__L_c,L-Leucine,protein,0.030955,0.235986
lys__L_c,L-Lysine,protein,0.099647,0.676969
phe__L_c,L-Phenylalanine,protein,0.050401,0.305113


In [19]:
global_composition_df.to_csv('spore_composition.csv')

In [20]:
global_composition_df.sum()

name         L-AlanineL-ArginineL-AspartateL-GlutamateGlyci...
class        proteinproteinproteinproteinproteinproteinprot...
mass_frac                                             0.999294
coeff                                                  6.91565
dtype: object