In [None]:
import sys
sys.path.append('../src')

import os
import cobra
import pandas as pd
import omg 
from plot_multiomics import *
from tqdm import tqdm

In [None]:
user_params = {
    'host': 'ecoli', # ecoli or ropacus
    'modelfile': '../data/models/iJO1366_MVA.json',
    'cerevisiae_modelfile': '../data/models/iMM904.json', 
    'timestart': 0.0,
    'timestop': 8.0,
    'numtimepoints': 9,
    # TODO: Update designs file & designs file path
    'designsfile': 'rec_strains.csv',
    'designsfilepath': '../data/art_output',
    'mapping_file': '../mapping/inchikey_to_cid.txt',
    'output_file_path': 'data/omg_output',
    'edd_omics_file_path': '../data/omg_output/edd/',
    'numreactions': 8,
    'numinstances': 10,
    'ext_metabolites': {
        'glc__D_e': 22.203,
        'nh4_e': 18.695,
        'pi_e': 69.454,
        'so4_e': 2.0,
        'mg2_e': 2.0,
        'k_e': 21.883,
        'na1_e': 103.7,
        'cl_e': 27.25,
        'isoprenol_e': 0.0,
        'ac_e': 0.0,
        'for_e': 0.0,
        'lac__D_e': 0.0,
        'etoh_e': 0.0
    },
    'initial_OD': 0.01,
    'BIOMASS_REACTION_ID': 'BIOMASS_Ec_iJO1366_core_53p95M'
} 

### 1. Getting and preparing the metabolic model

In [None]:
file_name = user_params['modelfile']
model = cobra.io.load_json_model(file_name)
model.solver = 'glpk'

# Add minimum flux constraints for production of isoprenol and formate and limit oxygen intake
iso = 'EX_isoprenol_e'
iso_cons = model.problem.Constraint(model.reactions.EX_isoprenol_e.flux_expression,lb = 0.20)
model.add_cons_vars(iso_cons)
for_cons = model.problem.Constraint(model.reactions.EX_for_e.flux_expression,lb = 0.10)
model.add_cons_vars(for_cons)
o2_cons = model.problem.Constraint(model.reactions.EX_o2_e.flux_expression,lb = -8.0)
model.add_cons_vars(o2_cons)

# Constrain several central carbon metabolism fluxes to more realistic upper and lower bounds
CC_rxn_names = ['ACCOAC','MDH','PTAr','CS','ACACT1r','PPC','PPCK','PFL']
for reaction in CC_rxn_names:
    reaction_constraint = model.problem.Constraint(model.reactions.get_by_id(reaction).flux_expression,lb = -1.0,ub =  1.0)
    model.add_cons_vars(reaction_constraint)

# Create also a similar model with a higher production of isoprenol, which we will use with MOMA to simulate bioengineered strains:
modelHI = model.copy()
iso_cons = modelHI.problem.Constraint(modelHI.reactions.EX_isoprenol_e.flux_expression,lb = 0.25)
modelHI.add_cons_vars(iso_cons)


### 2. Obtaining times series for the WT

In [None]:
# Create grid for simulation
t0 = user_params['timestart']
tf = user_params['timestop']
points = user_params['numtimepoints']
tspan, delt = np.linspace(t0, tf, points, dtype='float64', retstep=True)
grid = (tspan, delt)

# Use model to obtain the time series for fluxes, OD and external metabolites
solution_TS, model_TS, cell, Emets, Erxn2Emet = \
    omg.get_flux_time_series(model, user_params['ext_metabolites'], grid, user_params)


In [None]:
# perform the same calculation for the model with higher isoprenol production that we created above
solutionHI_TS, modelHI_TS, cellHI, EmetsHI, Erxn2EmetHI = \
    omg.get_flux_time_series(modelHI, user_params['ext_metabolites'], grid, user_params)


### 3. Getting bioengineered flux profiles through MOMA

In [None]:
# Read the file with suggested designs (i.e. reactions KOs and Overexpressions)
rec_df = pd.read_csv(f'{user_params["designsfilepath"]}/{user_params["designsfile"]}', index_col=0)
rec_df.sort_values(by='pred', ascending=False, inplace=True)
rec_df = rec_df.head(10)
rec_df.reset_index(inplace=True, drop=True)
rec_df['Line Name'] = [f'Strain {i}' for i in range(1, rec_df.shape[0]+1)]
cols = rec_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
rec_df = rec_df[cols]

num_columns_to_update = len(rec_df.columns) - 1  # Excluding the first column
rec_df.iloc[-1, 1:] = np.random.randint(0, 3, size=num_columns_to_update)

print(rec_df.shape)
rec_df

We then use MOMA to calculate flux profiles at each time point for the recommended strains. Instead of using the solution time series corresponding to the initial model, we use the solution time series corresponding to the higher production. The reason is that, otherwise, we would never see an increase in isoprenol production, since MOMA minimizes the changes in flux by design. Remember that our goal here is just to create realistic flux profiles that can be used to showcase our straindesignXAI. This approach is good enough for that purpose:

In [None]:
%%time
solutionsMOMA_TS = {}
cols = rec_df.columns[:-2]

if user_params['numinstances'] not in [None, 0]:
    num_strains = user_params['numinstances']
else:
    num_strains = rec_df.shape[0]

for i in tqdm(range(num_strains)):
    design = rec_df[cols].loc[i]
    if design['Line Name']=='WT':
        solutionsMOMA_TS[i] = omg.getBEFluxes(model_TS, design, solution_TS, grid)
    else:
        solutionsMOMA_TS[i] = omg.getBEFluxes(model_TS, design, solutionHI_TS, grid)

### 4. Producing the external metabolite concentrations for each recommendation

In [None]:
cellsEmetsBE = {}
for i in range(num_strains):
    cell, Emets = omg.integrate_fluxes(solutionsMOMA_TS[i], model_TS, user_params['ext_metabolites'], grid, user_params)
    cellsEmetsBE[i] = (cell, Emets) 



We can visualize the obtained concentrations for a recommendation:


In [None]:
i = 0
cellBE, EmetsBE = cellsEmetsBE[i]
plot_DO_extmets(cellBE, EmetsBE[['glc__D_e','isoprenol_e','ac_e','for_e','lac__D_e','etoh_e']])

In [None]:
EmetsBE

### Visualizing actual vs predicted isoprenol concentrations for recommendations


In [None]:
production = []
for i in range(user_params['numinstances']):
    cell, Emets = cellsEmetsBE[i]
    production.append(Emets.loc[9,'isoprenol_e'])
    
production_df = rec_df.copy()
production_df['Actual Isoprenol [mM]'] = production.copy()
production_df

In [None]:
# Save the results
production_df.to_csv('../data/recommendations_with_production.csv')

### Generate omics data for the best recommendation

In [None]:
proteomics_timeseries = {}
transcriptomics_timeseries = {}
metabolomics_timeseries = {}
metabolomics_oldids_timeseries = {}
fluxomics_timeseries = {}
        
for t in tspan:
    fluxomics_timeseries[t] = solutionsMOMA_TS[0][t].fluxes.to_dict()
    (proteomics_timeseries[t], transcriptomics_timeseries[t], 
     metabolomics_timeseries[t], metabolomics_oldids_timeseries[t]) = omg.get_multiomics(model, 
                                                                                         solutionsMOMA_TS[0][t], 
                                                                                         user_params['mapping_file'], 
                                                                                         old_ids=True)

In [None]:
omg.write_experiment_description_file(user_params['edd_omics_file_path'], line_name='Strain 1', label='_BT')
omg.write_OD_data(cell, user_params['edd_omics_file_path'], line_name='Strain 1', label='_BT')
omg.write_external_metabolite(Emets, user_params['edd_omics_file_path'], line_name='Strain 1', label='_BT')

omg.write_omics_files(fluxomics_timeseries, 'fluxomics', user_params, line_name='Strain 1', label='_BT')
omg.write_omics_files(proteomics_timeseries, 'proteomics', user_params, line_name='Strain 1', label='_BT')
omg.write_omics_files(transcriptomics_timeseries, 'transcriptomics', user_params, line_name='Strain 1', label='_BT')
omg.write_omics_files(metabolomics_timeseries, 'metabolomics', user_params, line_name='Strain 1', label='_BT')

genesSM       = ['b0180','b2708','b3197','b1094','b2224','b3256','b2316','b3255','b0185','b1101'] 
proteinsSM    = ['P17115','P45395','P0A6A8','P76461','P77580','P24182','P0A9Q5','P0ABD5','P77580','P00893']
metabolitesSM = ['CID:1549101','CID:175','CID:164533','CID:15938965','CID:21604863','CID:15939608','CID:27284','CID:1038','CID:16741146','CID:1778309']
transcriptomics_timeseriesSM ={}
proteomics_timeseriesSM      ={}
metabolomics_timeseriesSM    ={}
for t in tspan:
    transcriptomics_timeseriesSM[t] = {gene:    transcriptomics_timeseries[t][gene] for gene    in genesSM}
    proteomics_timeseriesSM[t]      = {protein: proteomics_timeseries[t][protein]   for protein in proteinsSM}
    metabolomics_timeseriesSM[t]    = {metab:   metabolomics_timeseries[t][metab]   for metab   in metabolitesSM}

omg.write_omics_files(proteomics_timeseriesSM,     'proteomics'     , user_params, line_name='Strain 1', label='_BTSM')
omg.write_omics_files(transcriptomics_timeseriesSM,'transcriptomics', user_params, line_name='Strain 1', label='_BTSM')
omg.write_omics_files(metabolomics_timeseriesSM,   'metabolomics'   , user_params, line_name='Strain 1', label='_BTSM')
    