# Custom Yarrowia Biomass Reaction
Update the biomass reaction with data from our strain

In [1]:
import pandas as pd
import cobra

### Load genome scale model

In [2]:
model = cobra.io.json.load_json_model("../genome_scale_models/iYLI647_corr_2.json")
model

0,1
Name,model
Memory address,104a5b6a0
Number of metabolites,1121
Number of reactions,1348
Number of genes,648
Number of groups,0
Objective expression,1.0*biomass_C - 1.0*biomass_C_reverse_c1d5c
Compartments,"c, e, m, n, x, r, g, v"


### Load 13C-MFA biomass reaction data

In [3]:
mfa_biomass_composition_df = pd.read_csv("../data/biomass_composition/yarrowia_mfa_biomass.csv")
mfa_biomass_composition_df

Unnamed: 0,metabolite,full_name,gsm_metabolite_id,mfa_coefficient_glucose,mfa_coefficient_oil
0,ALA,Alanine,ala_L[c],0.38,0.19
1,ARG,Arginine,arg_L[c],0.13,0.08
2,ASN,Asparagine,asn_L[c],0.196,0.196
3,ASP,Aspartate,asp_L[c],0.33,0.18
4,CYS,Cysteine,cys_L[c],0.036,0.036
5,GLN,Glutamine,gln_L[c],0.239,0.239
6,GLU,Glutamate,glu_L[c],0.52,0.29
7,GLY,Glycine,gly[c],0.29,0.17
8,HIS,Histidine,his_L[c],0.06,0.03
9,ILE,Isoleucine,ile_L[c],0.15,0.09


### Calculate protein fraction

In [4]:
glucose_protein_mg = 0
oil_protein_mg = 0

for _, row in mfa_biomass_composition_df.iterrows():
    metabolite_id = row["gsm_metabolite_id"]
    metabolite = model.metabolites.get_by_id(metabolite_id)
    molar_mass = metabolite.formula_weight

    if metabolite_id not in ['accoa[c]', 'dhap[c]', 'atp[c]', 'nadph[c]', 'nadh[c]']:
        glucose_coefficient = row.mfa_coefficient_glucose
        glucose_protein_mg += glucose_coefficient * molar_mass

        oil_coefficient = row.mfa_coefficient_oil
        oil_protein_mg += oil_coefficient * molar_mass

print("Glucose protein mg: ", glucose_protein_mg)
print("Oil protein mg: ", oil_protein_mg)

Glucose protein mg:  505.5094208199999
Oil protein mg:  331.96798721999994


### Load GSM biomass reaction data with metabolite classification

In [5]:
# load the biomass reaction
gsm_biomass_reaction_df = pd.read_csv("../data/biomass_composition/yarrowia_gsm_biomass.csv")
gsm_biomass_reaction_df

Unnamed: 0,Full Name,gsm_metabolite_id,metabolite_type
0,"1,3-beta-D-Glucan",13BDglcn[c],Carbohydrate
1,ADP,adp[c],Energy Molecule
2,L-Alanine,ala_L[c],Protein
3,AMP,amp[c],Energy Molecule
4,L-Arginine,arg_L[c],Protein
5,L-Asparagine,asn_L[c],Protein
6,L-Aspartate,asp_L[c],Protein
7,ATP,atp[c],Energy Molecule
8,Biomass,biomass[c],Other
9,Chitin Monomer,chitin[c],Carbohydrate


### Calculate the mg of each biomass precursor reaction

In [6]:
c_limited_biomass_reaction = model.reactions.get_by_id("biomass_C")

protein_mg = 0
lipid_mg = 0
carbohydrate_mg = 0

for _, row in gsm_biomass_reaction_df.iterrows():
    metabolite_id = row.gsm_metabolite_id
    metabolite = model.metabolites.get_by_id(metabolite_id)
    type = row.metabolite_type

    coefficient = -1 * c_limited_biomass_reaction.metabolites[metabolite]
    molar_mass = metabolite.formula_weight

    # ignore products of biomass reaction
    if coefficient < 0:
        continue
    elif type == "Protein":
        protein_mg += coefficient * molar_mass
    elif type == "Lipid":
        lipid_mg += coefficient * molar_mass
    elif type == "Carbohydrate":
        carbohydrate_mg += coefficient * molar_mass

print(f'Protein: {protein_mg:.2f}')
print(f'Lipid: {lipid_mg:.2f}')
print(f'Carbohydrate: {carbohydrate_mg:.2f}')
print(f'Total: {protein_mg + lipid_mg + carbohydrate_mg:.2f}')

Protein: 445.27
Lipid: 88.61
Carbohydrate: 370.00
Total: 903.88


### Add columns for molecular weight, default, glucose, and oleic acid

In [7]:
glucose_lipid_mg = 150
oleic_acid_lipid_mg = 250

glucose_carbohydrate_mg = 1000 - glucose_lipid_mg - glucose_protein_mg
oleic_acid_carbohydrate_mg = 1000 - oleic_acid_lipid_mg - oil_protein_mg

glucose_lipid_scale_factor = glucose_lipid_mg / lipid_mg
oleic_acid_lipid_scale_factor = oleic_acid_lipid_mg / lipid_mg

glucose_carbohydrate_scale_factor = glucose_carbohydrate_mg / carbohydrate_mg
oleic_acid_carbohydrate_scale_factor = oleic_acid_carbohydrate_mg / carbohydrate_mg

print(glucose_lipid_scale_factor, oleic_acid_lipid_scale_factor)

molar_weights = []
default_coefficients = []
glucose_coefficients = []
oleic_acid_coefficients = []

for _, row in gsm_biomass_reaction_df.iterrows():
    metabolite_id = row.gsm_metabolite_id
    type = row.metabolite_type
    metabolite = model.metabolites.get_by_id(metabolite_id)
    molar_weight = metabolite.formula_weight

    default_coefficient = c_limited_biomass_reaction.metabolites[metabolite]

    if type == "Protein":
        glucose_coefficient = -1 * mfa_biomass_composition_df[mfa_biomass_composition_df['gsm_metabolite_id'] == metabolite_id]['mfa_coefficient_glucose'].values[0]
        oleic_acid_coefficient = -1 * mfa_biomass_composition_df[mfa_biomass_composition_df['gsm_metabolite_id'] == metabolite_id]['mfa_coefficient_oil'].values[0]
    elif type == "Lipid":
        glucose_coefficient = glucose_lipid_scale_factor * default_coefficient
        oleic_acid_coefficient = oleic_acid_lipid_scale_factor * default_coefficient
    elif type == "Carbohydrate":
        glucose_coefficient = glucose_carbohydrate_scale_factor * default_coefficient
        oleic_acid_coefficient = oleic_acid_carbohydrate_scale_factor * default_coefficient
    else:
        glucose_coefficient = default_coefficient
        oleic_acid_coefficient = default_coefficient

    glucose_coefficients.append(glucose_coefficient)
    oleic_acid_coefficients.append(oleic_acid_coefficient)
    molar_weights.append(molar_weight)
    default_coefficients.append(default_coefficient)

# add columns to dataframe
gsm_biomass_reaction_df['molar_weight'] = molar_weights
gsm_biomass_reaction_df['default_coefficient'] = default_coefficients
gsm_biomass_reaction_df['glucose_coefficient'] = glucose_coefficients
gsm_biomass_reaction_df['oleic_acid_coefficient'] = oleic_acid_coefficients

gsm_biomass_reaction_df

1.6927434645029995 2.8212391075049994


Unnamed: 0,Full Name,gsm_metabolite_id,metabolite_type,molar_weight,default_coefficient,glucose_coefficient,oleic_acid_coefficient
0,"1,3-beta-D-Glucan",13BDglcn[c],Carbohydrate,162.1406,-0.943397,-0.878365,-1.065877
1,ADP,adp[c],Energy Molecule,424.177302,23.09,23.09,23.09
2,L-Alanine,ala_L[c],Protein,89.09318,-0.567939,-0.38,-0.19
3,AMP,amp[c],Energy Molecule,345.205341,-0.055401,-0.055401,-0.055401
4,L-Arginine,arg_L[c],Protein,175.2089,-0.125563,-0.13,-0.08
5,L-Asparagine,asn_L[c],Protein,132.11792,-0.186498,-0.196,-0.196
6,L-Aspartate,asp_L[c],Protein,132.09474,-0.186531,-0.33,-0.18
7,ATP,atp[c],Energy Molecule,503.149263,-23.09,-23.09,-23.09
8,Biomass,biomass[c],Other,0.0,1.0,1.0,1.0
9,Chitin Monomer,chitin[c],Carbohydrate,203.19252,-0.868358,-0.808499,-0.981097


### Check biomass reactions

In [8]:
def check_total_mg(df):
    # Columns for different biomass reactions
    conditions = ['default', 'glucose', 'oleic_acid']
    
    for condition in conditions:
        coef = f'{condition}_coefficient'
        protein_sum = df[df['metabolite_type'] == 'Protein'][coef].mul(df['molar_weight']).sum()
        carbohydrate_sum = df[df['metabolite_type'] == 'Carbohydrate'][coef].mul(df['molar_weight']).sum()
        lipid_sum = df[df['metabolite_type'] == 'Lipid'][coef].mul(df['molar_weight']).sum()

        print(f'{condition} biomass composition:')
        print(f'Protein: {-1 * protein_sum:.2f}')
        print(f'Lipid: {-1 * lipid_sum:.2f}')
        print(f'Carbohydrate: {-1 * carbohydrate_sum:.2f}')
        print(f'Total: {-1 * protein_sum + -1 * lipid_sum + -1 * carbohydrate_sum:.2f}')
        print() 

# Calculate and print the result
check_total_mg(gsm_biomass_reaction_df)


default biomass composition:
Protein: 445.27
Lipid: 88.61
Carbohydrate: 370.00
Total: 903.88

glucose biomass composition:
Protein: 505.51
Lipid: 150.00
Carbohydrate: 344.49
Total: 1000.00

oleic_acid biomass composition:
Protein: 331.97
Lipid: 250.00
Carbohydrate: 418.03
Total: 1000.00



### Create glucose and oleic acid biomass reactions from dataframe

In [9]:
def create_biomass_reaction(df, coefficient_column, reaction_id):
    # Create a new Reaction object
    biomass_reaction = cobra.Reaction(reaction_id)
    
    # For each row in the dataframe, add the metabolite and its coefficient to the reaction
    for _, row in df.iterrows():
        metabolite_id = row['gsm_metabolite_id']
        metabolite_coeff = row[coefficient_column]
        
        # Assuming that the metabolite objects already exist in your model, 
        # get the metabolite object using its ID
        # Note: If your model doesn't have these metabolites, you'd need to create new Metabolite objects
        metabolite = model.metabolites.get_by_id(metabolite_id)
        
        # Add the metabolite to the reaction
        biomass_reaction.add_metabolites({metabolite: metabolite_coeff})
    
    return biomass_reaction

# Create the biomass reactions
glucose_biomass_reaction = create_biomass_reaction(gsm_biomass_reaction_df, 'glucose_coefficient', 'biomass_glucose')
oleic_acid_biomass_reaction = create_biomass_reaction(gsm_biomass_reaction_df, 'oleic_acid_coefficient', 'biomass_oleic_acid')

### Add new biomass reactions to the model

In [10]:
# # add biomass reactions to model
model.add_reactions([glucose_biomass_reaction, oleic_acid_biomass_reaction])

# save model
cobra.io.save_json_model(model, "../genome_scale_models/iYLI647_corr_3.json")

### Check carbohydrate sources

In [17]:
carb_df = gsm_biomass_reaction_df[gsm_biomass_reaction_df['metabolite_type'] == 'Carbohydrate']

for _, row in carb_df.iterrows():
    total_mg = row['default_coefficient'] * row['molar_weight']
    print(total_mg)
carb_df

-152.96294378193622
-176.44393745175108
-38.2407359860192
-2.34811688478016


Unnamed: 0,Full Name,gsm_metabolite_id,metabolite_type,molar_weight,default_coefficient,glucose_coefficient,oleic_acid_coefficient
0,"1,3-beta-D-Glucan",13BDglcn[c],Carbohydrate,162.1406,-0.943397,-0.878365,-1.065877
9,Chitin Monomer,chitin[c],Carbohydrate,203.19252,-0.868358,-0.808499,-0.981097
27,Mannan,mannan[c],Carbohydrate,162.1406,-0.235849,-0.219591,-0.266469
40,Trehalose,tre[c],Carbohydrate,342.29648,-0.00686,-0.006387,-0.007751


### Investigate sources of 13BDglcn[c]

In [20]:
for r in model.metabolites.get_by_id('13BDglcn[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

0,1
Reaction identifier,13BGH
Name,Endo 1 3 beta glucan glucohydrase
Memory address,0x134a1a680
Stoichiometry,13BDglcn[c] + h2o[c] --> glc_D[c]  1_3_beta_D_Glucan + H2O --> D_Glucose
GPR,YALI0F05390g
Lower bound,0.0
Upper bound,1000.0


0,1
Reaction identifier,13GS
Name,1 3 beta glucan synthase
Memory address,0x134a1bfd0
Stoichiometry,udpg[c] --> 13BDglcn[c] + h[c] + udp[c]  UDPglucose --> 1_3_beta_D_Glucan + H + UDP
GPR,YALI0C01411g and YALI0E21021g
Lower bound,0.0
Upper bound,1000.0


### Investigate sources of chitin[c]

In [21]:
for r in model.metabolites.get_by_id('chitin[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

0,1
Reaction identifier,CHTNS
Name,chitin synthase
Memory address,0x134ba6dd0
Stoichiometry,udpacgal[c] --> chitin[c] + h[c] + udp[c]  UDP_N_acetyl_D_galactosamine --> Chitin__monomer_ + H + UDP
GPR,YALI0D25938g or YALI0B16324g or YALI0C24354g or YALI0D03179g or YALI0E22198g
Lower bound,0.0
Upper bound,1000.0


0,1
Reaction identifier,CHTNDA
Name,chitin deacetylase
Memory address,0x134ba71f0
Stoichiometry,chitin[c] + h2o[c] --> ac[c] + chitos[c] + h[c]  Chitin__monomer_ + H2O --> Acetate + Chitosan + H
GPR,YALI0F30833g
Lower bound,0.0
Upper bound,1000.0


In [22]:
for r in model.metabolites.get_by_id('udpacgal[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

0,1
Reaction identifier,UDPACGLP
Name,UDP N acetylglucosamine diphosphorylase
Memory address,0x1350dbca0
Stoichiometry,acgam1p[c] + h[c] + utp[c] <=> ppi[c] + udpacgal[c]  N_Acetyl_D_glucosamine_1_phosphate + H + UTP <=> Diphosphate + UDP_N_acetyl_D_galactosamine
GPR,YALI0E03146g
Lower bound,-1000.0
Upper bound,1000.0


0,1
Reaction identifier,CHTNS
Name,chitin synthase
Memory address,0x134ba6dd0
Stoichiometry,udpacgal[c] --> chitin[c] + h[c] + udp[c]  UDP_N_acetyl_D_galactosamine --> Chitin__monomer_ + H + UDP
GPR,YALI0D25938g or YALI0B16324g or YALI0C24354g or YALI0D03179g or YALI0E22198g
Lower bound,0.0
Upper bound,1000.0


In [23]:
for r in model.metabolites.get_by_id('acgam1p[c]').reactions:
    if 'biomass' not in r.id:
        display(r)


0,1
Reaction identifier,ACGAMPM
Name,phosphoacetylglucosamine mutase
Memory address,0x134a70610
Stoichiometry,acgam6p[c] <=> acgam1p[c]  N_Acetyl_D_glucosamine_6_phosphate <=> N_Acetyl_D_glucosamine_1_phosphate
GPR,YALI0E29579g
Lower bound,-1000.0
Upper bound,1000.0


0,1
Reaction identifier,UDPACGLP
Name,UDP N acetylglucosamine diphosphorylase
Memory address,0x1350dbca0
Stoichiometry,acgam1p[c] + h[c] + utp[c] <=> ppi[c] + udpacgal[c]  N_Acetyl_D_glucosamine_1_phosphate + H + UTP <=> Diphosphate + UDP_N_acetyl_D_galactosamine
GPR,YALI0E03146g
Lower bound,-1000.0
Upper bound,1000.0


In [24]:
for r in model.metabolites.get_by_id('acgam6p[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

0,1
Reaction identifier,ACGAMPM
Name,phosphoacetylglucosamine mutase
Memory address,0x134a70610
Stoichiometry,acgam6p[c] <=> acgam1p[c]  N_Acetyl_D_glucosamine_6_phosphate <=> N_Acetyl_D_glucosamine_1_phosphate
GPR,YALI0E29579g
Lower bound,-1000.0
Upper bound,1000.0


0,1
Reaction identifier,ACGAM6PS
Name,N acetylglucosamine 6 phosphate synthase
Memory address,0x134a57fd0
Stoichiometry,accoa[c] + gam6p[c] <=> acgam6p[c] + coa[c] + h[c]  Acetyl_CoA + D_Glucosamine_6_phosphate <=> N_Acetyl_D_glucosamine_6_phosphate + Coenzyme_A + H
GPR,YALI0D20152g
Lower bound,-1000.0
Upper bound,1000.0


In [25]:
for r in model.metabolites.get_by_id('gam6p[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

0,1
Reaction identifier,GAM6Pt
Name,D glucosamine 6 phosphate reversible uniport
Memory address,0x134d9ae30
Stoichiometry,gam6p[e] <=> gam6p[c]  D_Glucosamine_6_phosphate <=> D_Glucosamine_6_phosphate
GPR,
Lower bound,-1000.0
Upper bound,1000.0


0,1
Reaction identifier,G6PDA
Name,glucosamine 6 phosphate deaminase
Memory address,0x134d98d30
Stoichiometry,gam6p[c] + h2o[c] --> f6p[c] + nh4[c]  D_Glucosamine_6_phosphate + H2O --> D_Fructose_6_phosphate + Ammonium
GPR,YALI0C01419g
Lower bound,0.0
Upper bound,1000.0


0,1
Reaction identifier,GF6PTA
Name,glutamine fructose 6 phosphate transaminase
Memory address,0x134dc8d30
Stoichiometry,f6p[c] + gln_L[c] --> gam6p[c] + glu_L[c]  D_Fructose_6_phosphate + L_Glutamine --> D_Glucosamine_6_phosphate + L_Glutamate
GPR,YALI0B21428g
Lower bound,0.0
Upper bound,1000.0


0,1
Reaction identifier,PGAMT
Name,phosphoglucosamine mutase
Memory address,0x134fb0a90
Stoichiometry,gam1p[c] <=> gam6p[c]  D_Glucosamine_1_phosphate <=> D_Glucosamine_6_phosphate
GPR,YALI0E29579g
Lower bound,-1000.0
Upper bound,1000.0


0,1
Reaction identifier,ACGAM6PS
Name,N acetylglucosamine 6 phosphate synthase
Memory address,0x134a57fd0
Stoichiometry,accoa[c] + gam6p[c] <=> acgam6p[c] + coa[c] + h[c]  Acetyl_CoA + D_Glucosamine_6_phosphate <=> N_Acetyl_D_glucosamine_6_phosphate + Coenzyme_A + H
GPR,YALI0D20152g
Lower bound,-1000.0
Upper bound,1000.0


In [26]:
for r in model.metabolites.get_by_id('gam1p[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

0,1
Reaction identifier,PGAMT
Name,phosphoglucosamine mutase
Memory address,0x134fb0a90
Stoichiometry,gam1p[c] <=> gam6p[c]  D_Glucosamine_1_phosphate <=> D_Glucosamine_6_phosphate
GPR,YALI0E29579g
Lower bound,-1000.0
Upper bound,1000.0


### Create new GSM biomass reactions that align with MFA biomass reactions

In [12]:
# # duplicate the biomass reaction
# c_limited_biomass_reaction = model.reactions.get_by_id("biomass_C").copy()
# biomass_reaction_glucose = cobra.Reaction()
# biomass_reaction_oleic_acid = cobra.Reaction()

# biomass_reaction_glucose.id = "biomass_C_glucose"
# biomass_reaction_oleic_acid.id = "biomass_C_oleic_acid"

# metabolites = []

# # update coeffients in new biomass reactions
# for metabolite in c_limited_biomass_reaction.metabolites:
#     metabolite_id = metabolite.id

#     if metabolite_id in list(mfa_biomass_composition_df['gsm_metabolite_id']):
#         # get the coefficients from the mfa biomass composition
#         glucose_coefficient = mfa_biomass_composition_df[mfa_biomass_composition_df['gsm_metabolite_id'] == metabolite_id]['mfa_coefficient_glucose'].values[0]
#         oil_coefficient = mfa_biomass_composition_df[mfa_biomass_composition_df['gsm_metabolite_id'] == metabolite_id]['mfa_coefficient_oil'].values[0]

#         # add the metabolite to the biomass reactions
#         biomass_reaction_glucose.add_metabolites({metabolite: glucose_coefficient})
#         biomass_reaction_oleic_acid.add_metabolites({metabolite: oil_coefficient})

#     else:
#         coefficient = c_limited_biomass_reaction.metabolites[metabolite]
     
#         # add the metabolite to the biomass reactions
#         biomass_reaction_glucose.add_metabolites({metabolite: coefficient})
#         biomass_reaction_oleic_acid.add_metabolites({metabolite: coefficient})

#     metabolites.append({
#         'full_name': metabolite.name,
#         'gsm_metabolite_id': metabolite_id,
#         'biomass_c_iYLI647': c_limited_biomass_reaction.metabolites[metabolite],	
#         'gsm_coefficient_glucose': biomass_reaction_glucose.metabolites[metabolite],
#         'gsm_coefficient_oil': biomass_reaction_oleic_acid.metabolites[metabolite]
#     })
     

# gsm_biomass_composition_df = pd.DataFrame(metabolites)

# # save as csv
# gsm_biomass_composition_df.to_csv("../results/biomass_composition/gsm_biomass_composition.csv", index=False)

# gsm_biomass_composition_df


### Add biomass reactions to model and save new model

In [13]:
# # add biomass reactions to model
# model.add_reactions([biomass_reaction_glucose, biomass_reaction_oleic_acid])

# # save model
# cobra.io.save_json_model(model, "../genome_scale_models/iYLI647_corr_3.json")

### Save the updated model

In [14]:
# save the updated model
# cobra.io.save_json_model(model, "../genome_scale_models/iYLI647_corr_2.json")
# model