# Supplemental Notebook B: Refining Yarrowia Biomass Reaction with Strain-Specific Data
This Jupyter notebook focuses on updating the biomass reaction in a Genome-Scale Model (GSM) of Yarrowia lipolytica with specific data derived from our strain. The objective is to refine the GSM's accuracy in predicting metabolic fluxes, especially when analyzing different carbon sources such as glucose and oleic acid. This enhanced model aims to provide a more accurate representation of our strain's metabolic capabilities and limitations.

### Objective
The primary goal is to refine the biomass reaction in the GSM by incorporating strain-specific data. This involves integrating 13C-Metabolic Flux Analysis (13C-MFA) data and other relevant biomass composition data into the GSM. This process is expected to improve the model's predictive accuracy for metabolic simulations and flux balance analysis.

### Important Functions
`create_biomass_reaction`: 
`get_gsm_mmol_c`: 


In [1]:
import pandas as pd
import cobra

### Load genome scale model

In [2]:
model = cobra.io.json.load_json_model("../genome_scale_models/iYLI647_corr_2.json")
model

0,1
Name,model
Memory address,105339e40
Number of metabolites,1121
Number of reactions,1348
Number of genes,648
Number of groups,0
Objective expression,1.0*biomass_C - 1.0*biomass_C_reverse_c1d5c
Compartments,"c, e, m, n, x, r, g, v"


### Load 13C-MFA biomass reaction data

In [3]:
mfa_biomass_composition_df = pd.read_csv("../data/biomass_composition/yarrowia_mfa_biomass.csv")
mfa_biomass_composition_df

Unnamed: 0,metabolite,full_name,gsm_metabolite_id,mfa_coefficient_glucose,mfa_coefficient_oil
0,ALA,Alanine,ala_L[c],0.38,0.19
1,ARG,Arginine,arg_L[c],0.13,0.08
2,ASN,Asparagine,asn_L[c],0.165,0.09
3,ASP,Aspartate,asp_L[c],0.317,0.242
4,CYS,Cysteine,cys_L[c],0.0036,0.0036
5,GLN,Glutamine,gln_L[c],0.26,0.145
6,GLU,Glutamate,glu_L[c],0.26,0.145
7,GLY,Glycine,gly[c],0.35,0.23
8,HIS,Histidine,his_L[c],0.06,0.03
9,ILE,Isoleucine,ile_L[c],0.15,0.09


### Load GSM biomass metabolite classification

In [4]:
# load the biomass reaction
gsm_biomass_reaction_df = pd.read_csv("../data/biomass_composition/yarrowia_gsm_biomass.csv")
gsm_biomass_reaction_df

Unnamed: 0,Full Name,gsm_metabolite_id,metabolite_type
0,"1,3-beta-D-Glucan",13BDglcn[c],Carbohydrate
1,ADP,adp[c],Energy Molecule
2,L-Alanine,ala_L[c],Amino Acid
3,AMP,amp[c],RNA
4,L-Arginine,arg_L[c],Amino Acid
5,L-Asparagine,asn_L[c],Amino Acid
6,L-Aspartate,asp_L[c],Amino Acid
7,ATP,atp[c],Energy Molecule
8,Biomass,biomass[c],Other
9,Chitin Monomer,chitin[c],Carbohydrate


### Load MFA mmol C data

In [5]:
mfa_biomass_mmol_c_df = pd.read_csv("../data/biomass_composition/mfa_biomass_mmol_c.csv")
mfa_biomass_mmol_c_df

Unnamed: 0,type,glucose,oil
0,Amino Acid,15.778,9.328
1,Lipid,5.574,13.936
2,DNA,0.282,0.282
3,RNA,0.817,0.817
4,Carbohydrate,10.2,10.2
5,Total,32.651,34.563


### Determine the mmols of C from each precursor type in biomass reaction

In [6]:
def get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, biomass_reaction_id):
    total_mmols = 0

    biomass_reaction = model.reactions.get_by_id(biomass_reaction_id)

    for metabolite in biomass_reaction.metabolites:
        metabolite_id = metabolite.id
      
        # get the metabolite_type from 
        metabolite_type = gsm_biomass_reaction_df[gsm_biomass_reaction_df['gsm_metabolite_id'] == metabolite_id]['metabolite_type'].values[0]

        # get coefficient (reactants have negative coefficients)
        coefficient = -1 * biomass_reaction.metabolites[metabolite]
        
        # get mmol_c, extracting number of carbon atoms using cobrapy's API
        number_of_C = metabolite.elements.get('C', 0)

        if metabolite_type == type:
            # Update total_mmols
            total_mmols += coefficient * number_of_C

    return total_mmols

total_mmols = 0
for type in ['Amino Acid', 'Lipid', 'DNA', 'RNA', 'Carbohydrate']:
    
    mmol_C = get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, 'biomass_C')
    total_mmols += mmol_C
    print(f'{type}: {mmol_C}')

print(f'Total: {total_mmols}')

Amino Acid: 16.054480973000004
Lipid: 5.211946986999999
DNA: 0.390416315
RNA: 2.111885953
Carbohydrate: 14.104663089999999
Total: 37.873393318


### Add default, glucose, and oil columns to GSM biomass reaction df

In [7]:
default_coefficients = []
glucose_coefficients = []
oil_coefficients = []

c_limited_biomass_reaction = model.reactions.get_by_id("biomass_C")

# loop over the metabolites in the biomass reaction
for _, row in gsm_biomass_reaction_df.iterrows():
    
    metabolite_id = row.gsm_metabolite_id
    metabolite = model.metabolites.get_by_id(metabolite_id)

    # get the metabolite_type from 
    type = row.metabolite_type

    # get coefficient (reactants have negative coefficients)
    default_coefficient = -1 * c_limited_biomass_reaction.metabolites[metabolite]

    if type in ['Amino Acid', 'Lipid', 'DNA', 'RNA', 'Carbohydrate']:
        gsm_mmols = get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, 'biomass_C')

        # get scale factors
        glucose_mfa_mmols = mfa_biomass_mmol_c_df.loc[mfa_biomass_mmol_c_df['type'] == type, 'glucose'].values[0]
        oil_mfa_mmols = mfa_biomass_mmol_c_df.loc[mfa_biomass_mmol_c_df['type'] == type, 'oil'].values[0]  

        glucose_scale_factor = glucose_mfa_mmols / gsm_mmols
        oil_scale_factor = oil_mfa_mmols / gsm_mmols
    else:
        glucose_scale_factor = 1
        oil_scale_factor = 1
        
    default_coefficients.append(default_coefficient)
    glucose_coefficients.append(default_coefficient * glucose_scale_factor)
    oil_coefficients.append(default_coefficient * oil_scale_factor)

gsm_biomass_reaction_df['default_coefficient'] = default_coefficients
gsm_biomass_reaction_df['glucose_coefficient'] = glucose_coefficients
gsm_biomass_reaction_df['oil_coefficient'] = oil_coefficients

# Save the biomass reaction df to csv
gsm_biomass_reaction_df.to_csv("../results/biomass_composition/gsm_biomass_composition.csv", index=False)

gsm_biomass_reaction_df

Unnamed: 0,Full Name,gsm_metabolite_id,metabolite_type,default_coefficient,glucose_coefficient,oil_coefficient
0,"1,3-beta-D-Glucan",13BDglcn[c],Carbohydrate,0.943397,0.682232,0.682232
1,ADP,adp[c],Energy Molecule,-23.09,-23.09,-23.09
2,L-Alanine,ala_L[c],Amino Acid,0.567939,0.558158,0.329985
3,AMP,amp[c],RNA,0.055401,0.021432,0.021432
4,L-Arginine,arg_L[c],Amino Acid,0.125563,0.123401,0.072955
5,L-Asparagine,asn_L[c],Amino Acid,0.186498,0.183286,0.10836
6,L-Aspartate,asp_L[c],Amino Acid,0.186531,0.183319,0.108379
7,ATP,atp[c],Energy Molecule,23.09,23.09,23.09
8,Biomass,biomass[c],Other,-1.0,-1.0,-1.0
9,Chitin Monomer,chitin[c],Carbohydrate,0.868358,0.627967,0.627967


### Create gluocse and oil biomass equations and add to model

In [8]:
def create_biomass_reaction(df, coefficient_column, reaction_id):
    # Create a new Reaction object
    biomass_reaction = cobra.Reaction(reaction_id)
    
    # For each row in the dataframe, add the metabolite and its coefficient to the reaction
    for _, row in df.iterrows():
        metabolite_id = row['gsm_metabolite_id']
        metabolite_coeff = -1 * row[coefficient_column]
        
        metabolite = model.metabolites.get_by_id(metabolite_id)
        
        # Add the metabolite to the reaction
        biomass_reaction.add_metabolites({metabolite: metabolite_coeff})
    
    return biomass_reaction

# Create the biomass reactions
glucose_biomass_reaction = create_biomass_reaction(gsm_biomass_reaction_df, 'glucose_coefficient', 'biomass_glucose')
oleic_acid_biomass_reaction = create_biomass_reaction(gsm_biomass_reaction_df, 'oil_coefficient', 'biomass_oil')

# add biomass reactions to model
model.add_reactions([glucose_biomass_reaction, oleic_acid_biomass_reaction])


### Check the mmols in the glucose and oil biomass reaction

In [9]:
# glucose
print('Default')
total_mmols = 0
for type in ['Amino Acid', 'Lipid', 'DNA', 'RNA', 'Carbohydrate']:
    
    mmol_C = get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, 'biomass_C')
    total_mmols += mmol_C
    print(f'{type}: {mmol_C}')


print(f'Total: {total_mmols}')
print()
print('Glucose')
total_mmols = 0
for type in ['Amino Acid', 'Lipid', 'DNA', 'RNA', 'Carbohydrate']:
    
    mmol_C = get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, 'biomass_glucose')
    total_mmols += mmol_C
    print(f'{type}: {mmol_C}')


print(f'Total: {total_mmols}')
print()
print('Oil')

# oil
total_mmols = 0
for type in ['Amino Acid', 'Lipid', 'DNA', 'RNA', 'Carbohydrate']:
    
    mmol_C = get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, 'biomass_oil')
    total_mmols += mmol_C
    print(f'{type}: {mmol_C}')


print(f'Total: {total_mmols}')

Default
Amino Acid: 16.054480973000004
Lipid: 5.211946986999999
DNA: 0.390416315
RNA: 2.111885953
Carbohydrate: 14.104663089999999
Total: 37.873393318

Glucose
Amino Acid: 15.777999999999995
Lipid: 5.574
DNA: 0.282
RNA: 0.817
Carbohydrate: 10.200000000000001
Total: 32.650999999999996

Oil
Amino Acid: 9.327999999999998
Lipid: 13.936000000000003
DNA: 0.282
RNA: 0.817
Carbohydrate: 10.200000000000001
Total: 34.563


### 

In [10]:
mfa_biomass_composition_df

Unnamed: 0,metabolite,full_name,gsm_metabolite_id,mfa_coefficient_glucose,mfa_coefficient_oil
0,ALA,Alanine,ala_L[c],0.38,0.19
1,ARG,Arginine,arg_L[c],0.13,0.08
2,ASN,Asparagine,asn_L[c],0.165,0.09
3,ASP,Aspartate,asp_L[c],0.317,0.242
4,CYS,Cysteine,cys_L[c],0.0036,0.0036
5,GLN,Glutamine,gln_L[c],0.26,0.145
6,GLU,Glutamate,glu_L[c],0.26,0.145
7,GLY,Glycine,gly[c],0.35,0.23
8,HIS,Histidine,his_L[c],0.06,0.03
9,ILE,Isoleucine,ile_L[c],0.15,0.09


In [11]:
model.metabolites.get_by_id('ac[c]').formula

'C2H3O2'

In [12]:
# keep track of total amount of each element
total_C_glucose = 0
total_H_glucose = 0
total_N_glucose = 0
total_O_glucose = 0
total_P_glucose = 0
total_S_glucose = 0

total_C_oil = 0
total_H_oil = 0
total_N_oil = 0
total_O_oil = 0
total_P_oil = 0
total_S_oil = 0


for _, row in mfa_biomass_composition_df.iterrows():
    # get the metabolite
    metabolite_id = row['gsm_metabolite_id']

    if metabolite_id == 'accoa[c]':
        metabolite_id = 'ac[c]'

    metabolite = model.metabolites.get_by_id(metabolite_id)

    # get the glucose coefficient
    glucose_coefficient = row['mfa_coefficient_glucose']
    oil_coefficient = row['mfa_coefficient_oil']

    # get the number of each element
    number_of_C = metabolite.elements.get('C', 0)
    number_of_H = metabolite.elements.get('H', 0)
    number_of_N = metabolite.elements.get('N', 0)
    number_of_O = metabolite.elements.get('O', 0)
    number_of_P = metabolite.elements.get('P', 0)
    number_of_S = metabolite.elements.get('S', 0)

    # update total amount of each element
    if metabolite_id not in ['atp[c]', 'nadh[c]', 'nadph[c]']:
        print(metabolite_id, number_of_C)
        total_C_glucose += glucose_coefficient * number_of_C
        total_H_glucose += glucose_coefficient * number_of_H
        total_N_glucose += glucose_coefficient * number_of_N
        total_O_glucose += glucose_coefficient * number_of_O
        total_P_glucose += glucose_coefficient * number_of_P
        total_S_glucose += glucose_coefficient * number_of_S

        total_C_oil += oil_coefficient * number_of_C
        total_H_oil += oil_coefficient * number_of_H
        total_N_oil += oil_coefficient * number_of_N
        total_O_oil += oil_coefficient * number_of_O
        total_P_oil += oil_coefficient * number_of_P
        total_S_oil += oil_coefficient * number_of_S

print('Glucose')
print(f'C: {total_C_glucose}')
print(f'H: {total_H_glucose}')
print(f'N: {total_N_glucose}')
print(f'O: {total_O_glucose}')
print(f'P: {total_P_glucose}')
print(f'S: {total_S_glucose}')
print()
print('Oil')
print(f'C: {total_C_oil}')
print(f'H: {total_H_oil}')
print(f'N: {total_N_oil}')
print(f'O: {total_O_oil}')
print(f'P: {total_P_oil}')
print(f'S: {total_S_oil}')


ala_L[c] 3
arg_L[c] 6
asn_L[c] 4
asp_L[c] 4
cys_L[c] 3
gln_L[c] 5
glu_L[c] 5
gly[c] 2
his_L[c] 6
ile_L[c] 6
leu_L[c] 6
lys_L[c] 6
met_L[c] 5
phe_L[c] 9
pro_L[c] 5
ser_L[c] 3
thr_L[c] 4
trp_L[c] 11
tyr_L[c] 9
val_L[c] 5
ac[c] 2
dhap[c] 3
r5p[c] 5
fum[c] 4
g6p[c] 6
Glucose
C: 32.590999999999994
H: 61.16229999999999
N: 4.749699999999999
O: 31.1405
P: 1.947
S: 0.0478

Oil
C: 34.503
H: 60.47829999999999
N: 2.7897000000000003
O: 36.6105
P: 2.133
S: 0.0478


### Save the updated model

In [13]:
# save model
cobra.io.save_json_model(model, "../genome_scale_models/iYLI647_corr_3.json")