# Custom Yarrowia Biomass Reaction
Update the biomass reaction with data from our strain

In [1]:
import pandas as pd
import cobra

### Load genome scale model

In [3]:
model = cobra.io.json.load_json_model("../genome_scale_models/iYLI647_corr_2.json")
model

FileNotFoundError: [Errno 2] No such file or directory: '../genome_scale_models/iYLI647_corr_2.json'

### Load 13C-MFA biomass reaction data

In [None]:
mfa_biomass_composition_df = pd.read_csv("../data/biomass_composition/yarrowia_mfa_biomass.csv")
mfa_biomass_composition_df

### Calculate protein fraction

In [None]:
glucose_protein_mg = 0
oil_protein_mg = 0

for _, row in mfa_biomass_composition_df.iterrows():
    metabolite_id = row["gsm_metabolite_id"]
    metabolite = model.metabolites.get_by_id(metabolite_id)
    molar_mass = metabolite.formula_weight

    if metabolite_id not in ['accoa[c]', 'dhap[c]', 'atp[c]', 'nadph[c]', 'nadh[c]']:
        glucose_coefficient = row.mfa_coefficient_glucose
        glucose_protein_mg += glucose_coefficient * molar_mass

        oil_coefficient = row.mfa_coefficient_oil
        oil_protein_mg += oil_coefficient * molar_mass

print("Glucose protein mg: ", glucose_protein_mg)
print("Oil protein mg: ", oil_protein_mg)

### Load GSM biomass reaction data with metabolite classification

In [None]:
# load the biomass reaction
gsm_biomass_reaction_df = pd.read_csv("../data/biomass_composition/yarrowia_gsm_biomass.csv")
gsm_biomass_reaction_df

### Calculate the mg of each biomass precursor reaction

In [None]:
c_limited_biomass_reaction = model.reactions.get_by_id("biomass_C")

protein_mg = 0
lipid_mg = 0
carbohydrate_mg = 0

for _, row in gsm_biomass_reaction_df.iterrows():
    metabolite_id = row.gsm_metabolite_id
    metabolite = model.metabolites.get_by_id(metabolite_id)
    type = row.metabolite_type

    coefficient = -1 * c_limited_biomass_reaction.metabolites[metabolite]
    molar_mass = metabolite.formula_weight

    # ignore products of biomass reaction
    if coefficient < 0:
        continue
    elif type == "Protein":
        protein_mg += coefficient * molar_mass
    elif type == "Lipid":
        lipid_mg += coefficient * molar_mass
    elif type == "Carbohydrate":
        carbohydrate_mg += coefficient * molar_mass

print(f'Protein: {protein_mg:.2f}')
print(f'Lipid: {lipid_mg:.2f}')
print(f'Carbohydrate: {carbohydrate_mg:.2f}')
print(f'Total: {protein_mg + lipid_mg + carbohydrate_mg:.2f}')

### Add columns for molecular weight, default, glucose, and oleic acid

In [None]:
glucose_lipid_mg = 150
oleic_acid_lipid_mg = 250

glucose_carbohydrate_mg = 1000 - glucose_lipid_mg - glucose_protein_mg
oleic_acid_carbohydrate_mg = 1000 - oleic_acid_lipid_mg - oil_protein_mg

glucose_lipid_scale_factor = glucose_lipid_mg / lipid_mg
oleic_acid_lipid_scale_factor = oleic_acid_lipid_mg / lipid_mg

glucose_carbohydrate_scale_factor = glucose_carbohydrate_mg / carbohydrate_mg
oleic_acid_carbohydrate_scale_factor = oleic_acid_carbohydrate_mg / carbohydrate_mg

print(glucose_lipid_scale_factor, oleic_acid_lipid_scale_factor)

molar_weights = []
default_coefficients = []
glucose_coefficients = []
oleic_acid_coefficients = []

for _, row in gsm_biomass_reaction_df.iterrows():
    metabolite_id = row.gsm_metabolite_id
    type = row.metabolite_type
    metabolite = model.metabolites.get_by_id(metabolite_id)
    molar_weight = metabolite.formula_weight

    default_coefficient = c_limited_biomass_reaction.metabolites[metabolite]

    if type == "Protein":
        glucose_coefficient = -1 * mfa_biomass_composition_df[mfa_biomass_composition_df['gsm_metabolite_id'] == metabolite_id]['mfa_coefficient_glucose'].values[0]
        oleic_acid_coefficient = -1 * mfa_biomass_composition_df[mfa_biomass_composition_df['gsm_metabolite_id'] == metabolite_id]['mfa_coefficient_oil'].values[0]
    elif type == "Lipid":
        glucose_coefficient = glucose_lipid_scale_factor * default_coefficient
        oleic_acid_coefficient = oleic_acid_lipid_scale_factor * default_coefficient
    elif type == "Carbohydrate":
        glucose_coefficient = glucose_carbohydrate_scale_factor * default_coefficient
        oleic_acid_coefficient = oleic_acid_carbohydrate_scale_factor * default_coefficient
    else:
        glucose_coefficient = default_coefficient
        oleic_acid_coefficient = default_coefficient

    glucose_coefficients.append(glucose_coefficient)
    oleic_acid_coefficients.append(oleic_acid_coefficient)
    molar_weights.append(molar_weight)
    default_coefficients.append(default_coefficient)

# add columns to dataframe
gsm_biomass_reaction_df['molar_weight'] = molar_weights
gsm_biomass_reaction_df['default_coefficient'] = default_coefficients
gsm_biomass_reaction_df['glucose_coefficient'] = glucose_coefficients
gsm_biomass_reaction_df['oleic_acid_coefficient'] = oleic_acid_coefficients

gsm_biomass_reaction_df

### Check biomass reactions

In [None]:
def check_total_mg(df):
    # Columns for different biomass reactions
    conditions = ['default', 'glucose', 'oleic_acid']
    
    for condition in conditions:
        coef = f'{condition}_coefficient'
        protein_sum = df[df['metabolite_type'] == 'Protein'][coef].mul(df['molar_weight']).sum()
        carbohydrate_sum = df[df['metabolite_type'] == 'Carbohydrate'][coef].mul(df['molar_weight']).sum()
        lipid_sum = df[df['metabolite_type'] == 'Lipid'][coef].mul(df['molar_weight']).sum()

        print(f'{condition} biomass composition:')
        print(f'Protein: {-1 * protein_sum:.2f}')
        print(f'Lipid: {-1 * lipid_sum:.2f}')
        print(f'Carbohydrate: {-1 * carbohydrate_sum:.2f}')
        print(f'Total: {-1 * protein_sum + -1 * lipid_sum + -1 * carbohydrate_sum:.2f}')
        print() 

# Calculate and print the result
check_total_mg(gsm_biomass_reaction_df)


### Create glucose and oleic acid biomass reactions from dataframe

In [None]:
def create_biomass_reaction(df, coefficient_column, reaction_id):
    # Create a new Reaction object
    biomass_reaction = cobra.Reaction(reaction_id)
    
    # For each row in the dataframe, add the metabolite and its coefficient to the reaction
    for _, row in df.iterrows():
        metabolite_id = row['gsm_metabolite_id']
        metabolite_coeff = row[coefficient_column]
        
        # Assuming that the metabolite objects already exist in your model, 
        # get the metabolite object using its ID
        # Note: If your model doesn't have these metabolites, you'd need to create new Metabolite objects
        metabolite = model.metabolites.get_by_id(metabolite_id)
        
        # Add the metabolite to the reaction
        biomass_reaction.add_metabolites({metabolite: metabolite_coeff})
    
    return biomass_reaction

# Create the biomass reactions
glucose_biomass_reaction = create_biomass_reaction(gsm_biomass_reaction_df, 'glucose_coefficient', 'biomass_glucose')
oleic_acid_biomass_reaction = create_biomass_reaction(gsm_biomass_reaction_df, 'oleic_acid_coefficient', 'biomass_oleic_acid')

### Add new biomass reactions to the model

In [None]:
# # add biomass reactions to model
model.add_reactions([glucose_biomass_reaction, oleic_acid_biomass_reaction])

# save model
cobra.io.save_json_model(model, "../genome_scale_models/iYLI647_corr_3.json")

### Check carbohydrate sources

In [None]:
carb_df = gsm_biomass_reaction_df[gsm_biomass_reaction_df['metabolite_type'] == 'Carbohydrate']

for _, row in carb_df.iterrows():
    total_mg = row['default_coefficient'] * row['molar_weight']
    print(total_mg)
carb_df

### Investigate sources of 13BDglcn[c]

In [None]:
for r in model.metabolites.get_by_id('13BDglcn[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

### Investigate sources of chitin[c]

In [None]:
for r in model.metabolites.get_by_id('chitin[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

In [None]:
for r in model.metabolites.get_by_id('udpacgal[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

In [None]:
for r in model.metabolites.get_by_id('acgam1p[c]').reactions:
    if 'biomass' not in r.id:
        display(r)


In [None]:
for r in model.metabolites.get_by_id('acgam6p[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

In [None]:
for r in model.metabolites.get_by_id('gam6p[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

In [None]:
for r in model.metabolites.get_by_id('gam1p[c]').reactions:
    if 'biomass' not in r.id:
        display(r)

### Create new GSM biomass reactions that align with MFA biomass reactions

In [None]:
# # duplicate the biomass reaction
# c_limited_biomass_reaction = model.reactions.get_by_id("biomass_C").copy()
# biomass_reaction_glucose = cobra.Reaction()
# biomass_reaction_oleic_acid = cobra.Reaction()

# biomass_reaction_glucose.id = "biomass_C_glucose"
# biomass_reaction_oleic_acid.id = "biomass_C_oleic_acid"

# metabolites = []

# # update coeffients in new biomass reactions
# for metabolite in c_limited_biomass_reaction.metabolites:
#     metabolite_id = metabolite.id

#     if metabolite_id in list(mfa_biomass_composition_df['gsm_metabolite_id']):
#         # get the coefficients from the mfa biomass composition
#         glucose_coefficient = mfa_biomass_composition_df[mfa_biomass_composition_df['gsm_metabolite_id'] == metabolite_id]['mfa_coefficient_glucose'].values[0]
#         oil_coefficient = mfa_biomass_composition_df[mfa_biomass_composition_df['gsm_metabolite_id'] == metabolite_id]['mfa_coefficient_oil'].values[0]

#         # add the metabolite to the biomass reactions
#         biomass_reaction_glucose.add_metabolites({metabolite: glucose_coefficient})
#         biomass_reaction_oleic_acid.add_metabolites({metabolite: oil_coefficient})

#     else:
#         coefficient = c_limited_biomass_reaction.metabolites[metabolite]
     
#         # add the metabolite to the biomass reactions
#         biomass_reaction_glucose.add_metabolites({metabolite: coefficient})
#         biomass_reaction_oleic_acid.add_metabolites({metabolite: coefficient})

#     metabolites.append({
#         'full_name': metabolite.name,
#         'gsm_metabolite_id': metabolite_id,
#         'biomass_c_iYLI647': c_limited_biomass_reaction.metabolites[metabolite],	
#         'gsm_coefficient_glucose': biomass_reaction_glucose.metabolites[metabolite],
#         'gsm_coefficient_oil': biomass_reaction_oleic_acid.metabolites[metabolite]
#     })
     

# gsm_biomass_composition_df = pd.DataFrame(metabolites)

# # save as csv
# gsm_biomass_composition_df.to_csv("../results/biomass_composition/gsm_biomass_composition.csv", index=False)

# gsm_biomass_composition_df


### Add biomass reactions to model and save new model

In [None]:
# # add biomass reactions to model
# model.add_reactions([biomass_reaction_glucose, biomass_reaction_oleic_acid])

# # save model
# cobra.io.save_json_model(model, "../genome_scale_models/iYLI647_corr_3.json")

### Save the updated model

In [None]:
# save the updated model
# cobra.io.save_json_model(model, "../genome_scale_models/iYLI647_corr_2.json")
# model