# Supplemental Notebook B: Refining Yarrowia Biomass Reaction with Strain-Specific Data
This Jupyter notebook focuses on updating the biomass reaction in a Genome-Scale Model (GSM) of Yarrowia lipolytica with specific data derived from our strain. The objective is to refine the GSM's accuracy in predicting metabolic fluxes, especially when analyzing different carbon sources such as glucose and oleic acid. This enhanced model aims to provide a more accurate representation of our strain's metabolic capabilities and limitations.

### Objective
The primary goal is to refine the biomass reaction in the GSM by incorporating strain-specific data. This involves integrating 13C-Metabolic Flux Analysis (13C-MFA) data and other relevant biomass composition data into the GSM. This process is expected to improve the model's predictive accuracy for metabolic simulations and flux balance analysis.

### Important Functions
`create_biomass_reaction`: 
`get_gsm_mmol_c`: 


In [1]:
import pandas as pd
import cobra

### Load genome scale model

In [2]:
model = cobra.io.json.load_json_model("../genome_scale_models/iYLI647_corr_2.json")
model

0,1
Name,model
Memory address,10617e470
Number of metabolites,1121
Number of reactions,1348
Number of genes,648
Number of groups,0
Objective expression,1.0*biomass_C - 1.0*biomass_C_reverse_c1d5c
Compartments,"c, e, m, n, x, r, g, v"


### Load 13C-MFA biomass reaction data

In [3]:
mfa_biomass_composition_df = pd.read_csv("../data/biomass_composition/yarrowia_mfa_biomass_jan_2024.csv")
mfa_biomass_composition_df

Unnamed: 0,metabolite,full_name,gsm_metabolite_id,mfa_coefficient_glucose,mfa_coefficient_oil
0,ALA,Alanine,ala_L[c],0.355,0.195
1,ARG,Arginine,arg_L[c],0.13,0.085
2,ASN,Asparagine,asn_L[c],0.1725,0.09
3,ASP,Aspartate,asp_L[c],0.3245,0.242
4,CYS,Cysteine,cys_L[c],0.0036,0.0036
5,GLN,Glutamine,gln_L[c],0.2425,0.145
6,GLU,Glutamate,glu_L[c],0.2425,0.145
7,GLY,Glycine,gly[c],0.345,0.23
8,HIS,Histidine,his_L[c],0.06,0.035
9,ILE,Isoleucine,ile_L[c],0.15,0.09


### Check the elemental composition of the MFA biomass reactions

In [4]:
# Initialize dictionaries to keep track of totals
mfa_elements_glucose = {'C': 0, 'H': 0, 'N': 0, 'O': 0, 'P': 0, 'S': 0}
mfa_elements_oil = {'C': 0, 'H': 0, 'N': 0, 'O': 0, 'P': 0, 'S': 0}

for _, row in mfa_biomass_composition_df.iterrows():
    metabolite_id = row['gsm_metabolite_id']
    if metabolite_id == 'accoa[c]':
        metabolite_id = 'ac[c]'

    metabolite = model.metabolites.get_by_id(metabolite_id)
    glucose_coefficient = row['mfa_coefficient_glucose']
    oil_coefficient = row['mfa_coefficient_oil']

    if metabolite_id not in ['atp[c]', 'nadh[c]', 'nadph[c]']:
        for element in mfa_elements_glucose.keys():
            number_of_element = metabolite.elements.get(element, 0)
            mfa_elements_glucose[element] += glucose_coefficient * number_of_element
            mfa_elements_oil[element] += oil_coefficient * number_of_element

# Print totals for glucose
print('Glucose')
for element, total in mfa_elements_glucose.items():
    print(f'{element}: {total}')

# Print totals for oil
print('\nOil')
for element, total in mfa_elements_oil.items():
    print(f'{element}: {total}')


Glucose
C: 32.494
H: 60.95779999999999
N: 4.704200000000002
O: 31.049
P: 1.947
S: 0.0478

Oil
C: 31.529999999999998
H: 56.148799999999994
N: 2.8291999999999997
O: 33.437000000000005
P: 2.0709999999999997
S: 0.0478


### Load GSM biomass metabolite classification

In [5]:
# load the biomass reaction
gsm_biomass_reaction_df = pd.read_csv("../data/biomass_composition/yarrowia_gsm_biomass.csv")
gsm_biomass_reaction_df

Unnamed: 0,Full Name,gsm_metabolite_id,metabolite_type
0,"1,3-beta-D-Glucan",13BDglcn[c],Carbohydrate
1,ADP,adp[c],Energy Molecule
2,L-Alanine,ala_L[c],Amino Acid
3,AMP,amp[c],RNA
4,L-Arginine,arg_L[c],Amino Acid
5,L-Asparagine,asn_L[c],Amino Acid
6,L-Aspartate,asp_L[c],Amino Acid
7,ATP,atp[c],Energy Molecule
8,Biomass,biomass[c],Other
9,Chitin Monomer,chitin[c],Carbohydrate


### Load MFA mmol C data

In [6]:
mfa_biomass_mmol_c_df = pd.read_csv("../data/biomass_composition/mfa_biomass_mmol_c_jan_2024.csv")
mfa_biomass_mmol_c_df

Unnamed: 0,type,glucose,oil
0,Amino Acid,15.681,9.141
1,Lipid,5.574,11.15
2,DNA,0.282,0.282
3,RNA,0.817,0.817
4,Carbohydrate,10.2,10.2
5,Total,32.554,31.59


### Determine the mmols of C from each precursor type in biomass reaction

In [7]:
def get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, biomass_reaction_id):
    total_mmols = 0

    biomass_reaction = model.reactions.get_by_id(biomass_reaction_id)

    for metabolite in biomass_reaction.metabolites:
        metabolite_id = metabolite.id
      
        # get the metabolite_type from 
        metabolite_type = gsm_biomass_reaction_df[gsm_biomass_reaction_df['gsm_metabolite_id'] == metabolite_id]['metabolite_type'].values[0]

        # get coefficient (reactants have negative coefficients)
        coefficient = -1 * biomass_reaction.metabolites[metabolite]
        
        # get mmol_c, extracting number of carbon atoms using cobrapy's API
        number_of_C = metabolite.elements.get('C', 0)

        if metabolite_type == type:
            # Update total_mmols
            total_mmols += coefficient * number_of_C

    return total_mmols

total_mmols = 0
for type in ['Amino Acid', 'Lipid', 'DNA', 'RNA', 'Carbohydrate']:
    
    mmol_C = get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, 'biomass_C')
    total_mmols += mmol_C
    print(f'{type}: {mmol_C}')

print(f'Total: {total_mmols}')

Amino Acid: 16.054480973000004
Lipid: 5.211946986999999
DNA: 0.390416315
RNA: 2.111885953
Carbohydrate: 14.104663089999999
Total: 37.873393318


### Add default, glucose, and oil columns to GSM biomass reaction df

In [8]:
default_coefficients = []
glucose_coefficients = []
oil_coefficients = []

c_limited_biomass_reaction = model.reactions.get_by_id("biomass_C")

# loop over the metabolites in the biomass reaction
for _, row in gsm_biomass_reaction_df.iterrows():
    
    metabolite_id = row.gsm_metabolite_id
    metabolite = model.metabolites.get_by_id(metabolite_id)

    # get the metabolite_type from 
    type = row.metabolite_type

    # get coefficient (reactants have negative coefficients)
    default_coefficient = -1 * c_limited_biomass_reaction.metabolites[metabolite]

    if type in ['Amino Acid', 'Lipid', 'DNA', 'RNA', 'Carbohydrate']:
        gsm_mmols = get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, 'biomass_C')

        # get scale factors
        glucose_mfa_mmols = mfa_biomass_mmol_c_df.loc[mfa_biomass_mmol_c_df['type'] == type, 'glucose'].values[0]
        oil_mfa_mmols = mfa_biomass_mmol_c_df.loc[mfa_biomass_mmol_c_df['type'] == type, 'oil'].values[0]  

        glucose_scale_factor = glucose_mfa_mmols / gsm_mmols
        oil_scale_factor = oil_mfa_mmols / gsm_mmols
    else:
        glucose_scale_factor = 1
        oil_scale_factor = 1
        
    default_coefficients.append(default_coefficient)
    glucose_coefficients.append(default_coefficient * glucose_scale_factor)
    oil_coefficients.append(default_coefficient * oil_scale_factor)

gsm_biomass_reaction_df['default_coefficient'] = default_coefficients
gsm_biomass_reaction_df['glucose_coefficient'] = glucose_coefficients
gsm_biomass_reaction_df['oil_coefficient'] = oil_coefficients

# Save the biomass reaction df to csv
gsm_biomass_reaction_df.to_csv("../results/biomass_composition/gsm_biomass_composition.csv", index=False)

gsm_biomass_reaction_df

Unnamed: 0,Full Name,gsm_metabolite_id,metabolite_type,default_coefficient,glucose_coefficient,oil_coefficient
0,"1,3-beta-D-Glucan",13BDglcn[c],Carbohydrate,0.943397,0.682232,0.682232
1,ADP,adp[c],Energy Molecule,-23.09,-23.09,-23.09
2,L-Alanine,ala_L[c],Amino Acid,0.567939,0.554727,0.323369
3,AMP,amp[c],RNA,0.055401,0.021432,0.021432
4,L-Arginine,arg_L[c],Amino Acid,0.125563,0.122642,0.071492
5,L-Asparagine,asn_L[c],Amino Acid,0.186498,0.18216,0.106187
6,L-Aspartate,asp_L[c],Amino Acid,0.186531,0.182192,0.106206
7,ATP,atp[c],Energy Molecule,23.09,23.09,23.09
8,Biomass,biomass[c],Other,-1.0,-1.0,-1.0
9,Chitin Monomer,chitin[c],Carbohydrate,0.868358,0.627967,0.627967


### Create gluocse and oil biomass equations and add to model

In [9]:
def create_biomass_reaction(df, coefficient_column, reaction_id):
    # Create a new Reaction object
    biomass_reaction = cobra.Reaction(reaction_id)
    
    # For each row in the dataframe, add the metabolite and its coefficient to the reaction
    for _, row in df.iterrows():
        metabolite_id = row['gsm_metabolite_id']
        metabolite_coeff = -1 * row[coefficient_column]
        
        metabolite = model.metabolites.get_by_id(metabolite_id)
        
        # Add the metabolite to the reaction
        biomass_reaction.add_metabolites({metabolite: metabolite_coeff})
    
    return biomass_reaction

# Create the biomass reactions
glucose_biomass_reaction = create_biomass_reaction(gsm_biomass_reaction_df, 'glucose_coefficient', 'biomass_glucose')
oleic_acid_biomass_reaction = create_biomass_reaction(gsm_biomass_reaction_df, 'oil_coefficient', 'biomass_oil')

# add biomass reactions to model
model.add_reactions([glucose_biomass_reaction, oleic_acid_biomass_reaction])


### Check the mmols in the glucose and oil biomass reaction

In [10]:
# glucose
print('Default')
total_mmols = 0
for type in ['Amino Acid', 'Lipid', 'DNA', 'RNA', 'Carbohydrate']:
    
    mmol_C = get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, 'biomass_C')
    total_mmols += mmol_C
    print(f'{type}: {mmol_C}')


print(f'Total: {total_mmols}')
print()
print('Glucose')
total_mmols = 0
for type in ['Amino Acid', 'Lipid', 'DNA', 'RNA', 'Carbohydrate']:
    
    mmol_C = get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, 'biomass_glucose')
    total_mmols += mmol_C
    print(f'{type}: {mmol_C}')


print(f'Total: {total_mmols}')
print()
print('Oil')

# oil
total_mmols = 0
for type in ['Amino Acid', 'Lipid', 'DNA', 'RNA', 'Carbohydrate']:
    
    mmol_C = get_gsm_mmol_c(model, gsm_biomass_reaction_df, type, 'biomass_oil')
    total_mmols += mmol_C
    print(f'{type}: {mmol_C}')


print(f'Total: {total_mmols}')

Default
Amino Acid: 16.054480973000004
Lipid: 5.211946986999999
DNA: 0.390416315
RNA: 2.111885953
Carbohydrate: 14.104663089999999
Total: 37.873393318

Glucose
Amino Acid: 15.680999999999996
Lipid: 5.574
DNA: 0.282
RNA: 0.817
Carbohydrate: 10.200000000000001
Total: 32.553999999999995

Oil
Amino Acid: 9.140999999999998
Lipid: 11.150000000000002
DNA: 0.282
RNA: 0.817
Carbohydrate: 10.200000000000001
Total: 31.590000000000003


### 

### Check the elemental composition of the MFA biomass reactions

In [11]:
# Initialize dictionaries to keep track of totals
total_elements_glucose = {'C': 0, 'H': 0, 'N': 0, 'O': 0, 'P': 0, 'S': 0}
total_elements_oil = {'C': 0, 'H': 0, 'N': 0, 'O': 0, 'P': 0, 'S': 0}

for _, row in mfa_biomass_composition_df.iterrows():
    metabolite_id = row['gsm_metabolite_id']
    if metabolite_id == 'accoa[c]':
        metabolite_id = 'ac[c]'

    metabolite = model.metabolites.get_by_id(metabolite_id)
    glucose_coefficient = row['mfa_coefficient_glucose']
    oil_coefficient = row['mfa_coefficient_oil']

    if metabolite_id not in ['atp[c]', 'nadh[c]', 'nadph[c]']:
        for element in total_elements_glucose.keys():
            number_of_element = metabolite.elements.get(element, 0)
            total_elements_glucose[element] += glucose_coefficient * number_of_element
            total_elements_oil[element] += oil_coefficient * number_of_element

# Print totals for glucose
print('Glucose')
for element, total in total_elements_glucose.items():
    print(f'{element}: {total}')

# Print totals for oil
print('\nOil')
for element, total in total_elements_oil.items():
    print(f'{element}: {total}')


Glucose
C: 32.494
H: 60.95779999999999
N: 4.704200000000002
O: 31.049
P: 1.947
S: 0.0478

Oil
C: 31.529999999999998
H: 56.148799999999994
N: 2.8291999999999997
O: 33.437000000000005
P: 2.0709999999999997
S: 0.0478


### Save the updated model

In [12]:
# save model
cobra.io.save_json_model(model, "../genome_scale_models/iYLI647_corr_3.json")