In [None]:
import cobra
from cobra.io import read_sbml_model
from cobra.flux_analysis import pfba
import os, re
import pandas as pd

print(os.getcwd())

ext_dir = '/../../data/external'
salb_gem_dir = '../3. pathway_engineering'

# Import model
model = read_sbml_model(f"{os.getcwd()}/{salb_gem_dir}/Salb-GEM-PTA-Gapfilled-Biosustain.xml")


# 1. Test a medium in model

In [None]:
medium = model.medium
medium['EX_glc__D_e'] = 0.8
medium['EX_glu__L_e'] = 0
model.medium = medium

In [None]:
default_medium = model.medium

with model:
    model.solver = 'cplex'
    solution_pfba = pfba(model)
    solution_fba = model.optimize()


In [None]:
print(solution_pfba.fluxes['BIOMASS_SALB'], solution_fba.objective_value)

# 2. Iterate each medium and plot growth

In [None]:
import matplotlib.pyplot as plt

df_medium = pd.read_csv('medium.csv')

# Prepare a list to store growth rates
growth_rates = []

# Iterate over each medium condition in the DataFrame
for index, row in df_medium.iterrows():
    with model:
        # Set the medium for the model
        # model.solver = 'cplex'
        # medium = model.medium
        # medium['EX_glc__D_e'] = 0  # Assuming glucose is the default carbon source
        # medium[row['Reaction']] = row['Concentration']
        # model.medium = medium
        # model.reactions.EX_o2_e.bounds = (-0.1, 1000)
        model.reactions.EX_glc__D_e.bounds = (0, 1000)
        model.reactions.get_by_id(row["Reaction"]).bounds = (-row["Concentration"], 1000)
        print(model.reactions.get_by_id(row["Reaction"]))
        # Perform pFBA
        solution = cobra.flux_analysis.pfba(model)
    
        # Record the growth rate
        growth_rate = solution.fluxes['BIOMASS_SALB']  # Replace with your specific biomass reaction ID
        growth_rates.append(growth_rate)

# Add the growth rates to the DataFrame
df_medium['Growth'] = growth_rates

In [None]:
# Test glutamate and glucose
with model: 
    medium = model.medium
    medium['EX_glc__D_e'] = 0.4
    medium['EX_glu__L_e'] = 0.48
    model.medium = medium
    solution = cobra.flux_analysis.pfba(model)

# Record the growth rate
growth_rate = solution.fluxes['BIOMASS_SALB']  # Replace with your specific biomass reaction ID
growth_rates.append(growth_rate)


In [None]:
df_medium.loc[len(df_medium.index)] = ['Glutamate+Glucose', 'EX_glu__L_e+EX_glc__D_e', 0.88, growth_rate] 


In [None]:
df_medium.sort_values('Growth', inplace=True, ascending=False)
df_medium.to_csv('medium_comparison.csv', index=False)

In [None]:
import seaborn as sns
sns.set_style('whitegrid')
plt.figure(figsize=(6, 6))

# Define a gradient of green colors
palette = sns.color_palette("viridis_r", 6)
# Prepare new x-tick labels: Medium + Concentration
labels = [f"{medium} ({concentration}mmol/gDW/h])" for medium, reaction, concentration, growth in df_medium.values]

# Plot bars using original medium names as x-values
bars = plt.bar(df_medium['Medium'], df_medium['Growth'])

# Color each bar and add text labels
for i, growth in enumerate(df_medium['Growth']):
    bars[i].set_color(palette[i])
    plt.text(bars[i].get_x() + bars[i].get_width()/2., growth,
             f'{growth:.4f}', ha='center', va='bottom')

# Set updated labels with concentration
plt.xticks(ticks=range(len(labels)), labels=labels, rotation=45, ha='right')

plt.xlabel('Medium (Concentration)', fontsize=14, fontweight='bold')
plt.ylabel('Growth Rate (h^-1)', fontsize=14, fontweight='bold')

plt.show()

In [None]:
with model:
    medium = model.medium
    medium['EX_glc__D_e'] = 0  # Assuming glucose is the default carbon source
    medium['EX_glu__L_e'] = 9.6
    model.medium = medium
    solution = pfba(model)

solution.fluxes.to_csv('flux_glu__L.csv')
    

In [None]:
with model:
    medium = model.medium
    medium['EX_glc__D_e'] = 0  # Assuming glucose is the default carbon source
    medium['EX_mnl_e'] = 8
    model.medium = medium
    solution = model.optimize()
solution.fluxes.to_csv('flux_mnl.csv')

In [None]:
with model:
    medium = model.medium
    medium['EX_glc__D_e'] = 8  # Assuming glucose is the default carbon source
    model.medium = medium
    solution = model.optimize()

solution.fluxes.to_csv('flux_glc__D.csv')

# 3. Find out the best medium for growth

In [None]:
salb_mass_dir = '../2. gapfilling/4.mass_balance'

agreed_bio_data = pd.read_csv(f"{os.getcwd()}/{salb_mass_dir}/agreed_bio_data_Salb_Biosustain.csv")
agreed_bio_data['model_simulation_0.05'] = agreed_bio_data['model_simulation_0.05'].astype('boolean')

In [None]:
print(f"\nTrue Positive (actual true, predicted true):\n")
agreed_bio_data_nan_drop = agreed_bio_data[agreed_bio_data['model_simulation_0.05'].notna()]
true_positive = agreed_bio_data_nan_drop[
    agreed_bio_data_nan_drop.xs("activity", axis=1)
    & (agreed_bio_data_nan_drop.xs("model_simulation_0.05", axis=1))
    ][["bigg","chemical", "moa", "exchange", "model_simulation_0.05"]]

print(true_positive)

In [None]:
print(true_positive['chemical'].unique())

In [None]:
# Add carbon number

def get_carbon_count(met_id, model):
    """Retrieve the carbon count for a given metabolite ID."""
    metabolite = model.metabolites.get_by_id(f"{met_id}_e")
    if metabolite.formula.startswith('C'):
        carbon = metabolite.elements['C']
    else:
        carbon = 0
    return carbon

true_positive['carbon_number'] = true_positive['bigg'].apply(lambda x: get_carbon_count(x, model))

In [None]:
def calculate_media(carbon_number):
    """Calculate media value based on the carbon number."""
    if carbon_number != 0:  # Avoid division by zero
        return 0.8 * 6 / carbon_number
    else:
        return 0  # Return zero or some default value if carbon number is 0

# Add a 'media' column to the DataFrame
true_positive['media'] = true_positive['carbon_number'].apply(calculate_media)

true_positive

# Test production of PTA using Glutamate

In [None]:
# Add sink function so that the model can produce the product without affecting other pathways.
dm = model.add_boundary(metabolite=model.metabolites.pta_f_c, type="sink")

dm

In [None]:
model.medium

In [None]:
true_positive

In [None]:
def growth_on_new_medium_with_objective(true_positive, model, objective, base_C_exchange, base_C_exchange_concentration, growth_bound):
    """
    test all medium based on the true_positive table, given the base medium that the cell grows, the new medium that is added in and a fixed growth.
    
    """
    # Change bounds so that model only grows at a fixed level
    model.reactions.BIOMASS_SALB.bounds = (growth_bound, growth_bound)

    production = []
    # Iterate over each medium source in the true_positives table
    for index, row in true_positive.iterrows():
        medium = model.medium
        medium['EX_glc__D_e'] = 0.0
        medium['EX_glu__L_e'] = 0.0
        medium[base_C_exchange] = base_C_exchange_concentration

        with model:
            # Set the medium for the model
            if row['carbon_number'] > 0 and row['exchange'] != base_C_exchange:
                medium[row['exchange']] = row['media'] * 0.2
                
            else:
                if row['exchange'] == base_C_exchange:
                    medium[base_C_exchange] = base_C_exchange_concentration
                    
                elif row['moa'].startswith("N"):
                    medium[row['exchange']] = 10

                elif row['moa'].startswith("P"):
                    medium[row['exchange']] = 10

                elif row['moa'].startswith("S"):
                    medium[row['exchange']] = 10
            
            model.medium = medium

            # Change model to produce PTA instead.
            model.objective = objective
            solution = model.optimize()
            
        # Record the growth rate
        print(row['bigg'], solution.objective_value)
        production.append(solution.objective_value)
        
    # Add the new growth rates to the DataFrame
    true_positive[f'PTA_Production_{base_C_exchange}'] = production

    return true_positive

In [None]:
def growth_with_20_more(true_positive, model, objective, base_C_exchange, base_C_exchange_concentration, growth_bound):    

    # Add extra 20% of itself
    row_to_duplicate = true_positive[true_positive['exchange'] == base_C_exchange]

    row_to_duplicate['chemical'] = 'Extra ' + row_to_duplicate['chemical']
    row_to_duplicate['bigg'] = 'EXTRA_' + row_to_duplicate['bigg']
    row_to_duplicate['exchange'] = 'EXTRA_' + row_to_duplicate['exchange']


    medium = model.medium
    medium['EX_glc__D_e'] = 0.0
    medium['EX_glu__L_e'] = 0.0
    medium[base_C_exchange] = base_C_exchange_concentration * 1.2

    with model:
        model.reactions.BIOMASS_SALB.bounds = (growth_bound, growth_bound)
        model.medium = medium
        model.objective = objective
        solution = model.optimize().objective_value
    
    # Record solution
    pta_columns = row_to_duplicate.filter(like='PTA_Production_').columns
    row_to_duplicate.loc[:, pta_columns] = 0.0
    row_to_duplicate[f'PTA_Production_{base_C_exchange}'] = solution
    
    print(row_to_duplicate)
    
    true_positive = true_positive.append(row_to_duplicate, ignore_index=True)
    return true_positive


In [None]:
true_positive[true_positive['chemical'].str.contains('Xylose')]

In [None]:
import seaborn as sns
def plot_PTA_production(true_positive, base, base_C_exchange, base_C_exchange_concentration):
    palette4 = sns.color_palette("Spectral", 6)

    # Prepare varibles for plots
    true_positive.index = true_positive['bigg']

    pta_col = f'PTA_Production_{base_C_exchange}'
    
    _true_positive = true_positive[true_positive[pta_col] != 0]
    base_line = _true_positive[_true_positive.exchange == base_C_exchange].iloc[0].at[pta_col]
    _true_positive.sort_values(pta_col, inplace=True)

    # Scatter plot
    plt.figure(figsize=(12, 18))
    # Create a horizontal bar plot
    bars = plt.barh(_true_positive['chemical'], _true_positive[pta_col], color=palette4[-1])

    # Highlight the last 10 bars in a different color
    for i in range(-13, 0):
        bars[i].set_color(palette4[4])

    # Add text for each bar
    for bar, chem in zip(bars, _true_positive['bigg']):
        text = '{:.5f}'.format(bar.get_width())
        if chem in base_C_exchange:
            bar.set_color(palette4[0])
        elif chem in ["glyc", "xyl__D", "ile__L"]:  # Bold text for specific chemicals
            bar.set_color(palette4[1])
        elif chem.startswith("EXTRA"):
            bar.set_color(palette4[0])

        # plt.text(bar.get_width(), bar.get_y() + bar.get_height()/2, text, va='center')

    plt.xlabel('PTA Flux')  # X-axis now represents PTA Production
    plt.ylabel('Added Medium and Supplements')  # Y-axis now represents the Medium
    plt.title(f'PTA production after adding 20% of chemical into medium of {base}')
    plt.axvline(x=base_line, color=palette4[0], linestyle='--')
    plt.xlim(base_line - 0.01,)
    plt.savefig(f'supplement_{base}.png', dpi=300, bbox_inches='tight')

    plt


In [None]:
true_positive = growth_on_new_medium_with_objective(true_positive, model, "SK_pta_f_c", "EX_glu__L_e", 0.96, 0.03)

true_positive = growth_on_new_medium_with_objective(true_positive, model, "SK_pta_f_c", "EX_mnl_e", 0.8, 0.03)

true_positive = growth_on_new_medium_with_objective(true_positive, model, "SK_pta_f_c", "EX_glyc_e", 1.6, 0.03)



In [None]:
# Add an extra row in True Positive to test the extra 20% of source

true_positive = growth_with_20_more(true_positive, model, "SK_pta_f_c", "EX_glu__L_e", 0.96, 0.03)

true_positive = growth_with_20_more(true_positive, model, "SK_pta_f_c", "EX_mnl_e", 0.8, 0.03)

true_positive = growth_with_20_more(true_positive, model, "SK_pta_f_c", "EX_glyc_e", 1.6, 0.03)



In [None]:
true_positive

In [None]:
true_positive.to_csv(os.getcwd() + "/" + 'Salb_supplement.csv', index=True)

In [None]:
# Make graph for each of the base medium

plt_pta_glutamate = plot_PTA_production(true_positive, "Glutamaic acid", "EX_glu__L_e", 0.96)

plt_pta_mannitol = plot_PTA_production(true_positive, "Mannitol", "EX_mnl_e", 0.8)

plt_pta_glycerol = plot_PTA_production(true_positive, "Glycerol", "EX_glyc_e", 1.6)

In [None]:
true_positive.to_csv(os.getcwd() + "/" + 'Salb_true_positive.csv', index=True)

In [None]:
with model:
    model.reactions.BIOMASS_SALB.bounds = (0.05, 0.05)

    medium = model.medium
    medium['EX_glc__D_e'] = 0.0
    medium['EX_mnl_e'] = 0.8
    medium['EX_glyc_e'] = 0.1 * 0.8
    model.medium = medium
    model.objective = 'SK_pta_f_c'
    solution = model.optimize()

solution.fluxes.to_csv('flux_mnl_glyc.csv')


In [None]:
with model:
    model.reactions.BIOMASS_SALB.bounds = (0.05, 0.05)

    medium = model.medium
    medium['EX_glc__D_e'] = 0.0
    medium['EX_glu__L_e'] = 0.96
    medium['EX_malt_e'] = 0.2 * 5 / 12 * 0.96
    model.medium = medium
    model.objective = 'SK_pta_f_c'
    solution = model.optimize()

solution.fluxes.to_csv('flux_glu_malt.csv')

In [None]:
model