# Test dataset model generation
This notebook is used to test the generation of data realized originally in the Build_dataset notebook. We give one example for each situation, simulated data and experimental data.

### Simulated data

In [5]:
import os
import numpy as np
from simulatedDataset import SimulatedDataset
from metabolicDataset import MetabolicDataset

np.random.seed(seed=10) 

DIRECTORY = "./"
cobra_name =  'e_coli_core_duplicated'  
medium_name = 'e_coli_core'
cobra_file = os.path.join(DIRECTORY,"Dataset_input",cobra_name)
medium_file = os.path.join(DIRECTORY,"Dataset_input",medium_name)
sample_size  = 50

# Run cobra
parameter = SimulatedDataset(cobra_name=cobra_file, 
                             medium_name=medium_file, 
                             medium_bound='UB',#'EB' 
                             method='pFBA',
                             objective=[],
                             measure=[],
                             sample_size=sample_size)


# parameter.get_simulated_data(sample_size=50) # ? Leaving objective and measure as empty lists sets the default objective reaction of the SBML model as the objective reaction value_med and the measure (Y) as this objective reaction.

# Saving file
training_file =  medium_name+'_'+parameter.medium_bound+'_'+str(sample_size)
parameter.save(DIRECTORY, training_file, reduce=False) # Reduce model

# Load
parameter = MetabolicDataset(training_file=medium_name+'_'+str(parameter.medium_bound)+'_'+str(sample_size))
# parameter.printout()




In [6]:

import os
import time
import numpy as np
import tensorflow as tf
from aMNWtModel import AMNWtModel
from tools import printout

DIRECTORY = './'
SAVE_RESERVOIR = False

seed = 10
np.random.seed(seed=seed)  
tf.random.set_seed(seed)

# FBA simulated training set for E. coli core
## (not working with M1 chips ). I don't understand :)
# Create, train and evaluate AMN_Wt models with FBA simulated training set for E. coli core with upper bound (UB) or exact bound (EB) 
train_name = 'e_coli_core_UB_50' # e_coli_core_UB_50
objective = ['BIOMASS_Ecoli_core_w_GAM']
reservoir_name = train_name + "_AMN_Wt"
training_file = os.path.join(DIRECTORY,'Dataset_model/',train_name)

print("---------------------------------------- model ----------------------------------------")

model = AMNWtModel(training_file = train_name, 
                   objective=objective,  
                   model_type='AMN_Wt', 
                   timestep =4,
                   n_hidden = 1,
                   hidden_dim = 50,
                   scaler=True,
                   train_rate=1e-2,
                   epochs=10, 
                   xfold=5,
                   verbose=True,
                   batch_size=7)

model.train_test_split(test_size=0.1, random_state=seed)
model.printout()

print("---------------------------------------- train and evaluate ----------------------------------------")
start_time = time.time()
_, stats, _ = model.train_evaluate(verbose=False)
reservoir = model
delta_time = time.time() - start_time

print("---------------------------------------- printing cross-validation results ----------------------------------------")
stats.printout(reservoir_name, delta_time)


print("---------------------------------------- evaluate model on test set ----------------------------------------")
if SAVE_RESERVOIR:
    reservoir_file = os.path.join(DIRECTORY,'Reservoir/',reservoir_name)
    reservoir.save(reservoir_file)

reservoir.printout()

start_time = time.time()

## Strange two first lines, investigate
reservoir.X, reservoir.Y = model.X_test, model.Y_test
X, Y = reservoir.model_input(model.X_test, model.Y_test, verbose=False)
pred, obj, loss = reservoir.evaluate_model(X, Y, verbose=False)
delta_time = time.time() - start_time
printout('Test set', delta_time, obj, loss)

---------------------------------------- model ----------------------------------------
number of reactions:  154 154
number of metabolites:  72
filtered measurements size:  1
training file: e_coli_core_UB_50
model type: AMN_Wt
model scaler: 1.0
model input dim: 0
model output dim: 0
model medium bound: UB
timestep: 4
nbr hidden layer: 1
hidden layer size: 50
activation function: relu
training epochs: 10
training regression: True
training learn rate: 0.01
training droP_out: 0.25
training batch size: 7
training validation iter: 0
training xfold: 5
training early stopping: False
---------------------------------------- train and evaluate ----------------------------------------
train = -25.47 test = -41.32 loss-train = 0.028367 loss-test = 0.027396
train = -34.05 test = -21.94 loss-train = 0.023299 loss-test = 0.024118
train = -25.56 test = -42.41 loss-train = 0.023264 loss-test = 0.022864
train = -27.96 test = -18.93 loss-train = 0.034178 loss-test = 0.034799
train = -27.78 test = -66.8

### Experimental data

In [7]:
import os
import numpy as np
from metabolicDataset import MetabolicDataset
from experimentalDataset import ExperimentalDataset



np.random.seed(seed=10) 

DIRECTORY = "./"
cobra_name =  'iML1515_EXP' # reduced iML1515 model  
medium_name = 'iML1515_EXP'
cobra_file = os.path.join(DIRECTORY,"Dataset_input",cobra_name)
medium_file = os.path.join(DIRECTORY,"Dataset_input",medium_name)

# Get data
parameter = ExperimentalDataset(cobra_name=cobra_file, 
                             medium_name=medium_file, 
                             medium_bound='UB', 
                             medium_size=38, 
                             method='EXP',
                             verbose=False)


# parameter.get_simulated_data(sample_size=50) # ? Leaving objective and measure as empty lists sets the default objective reaction of the SBML model as the objective reaction value_med and the measure (Y) as this objective reaction.



training_file = medium_name+'_'+str(parameter.medium_bound)
parameter.save(DIRECTORY, training_file, reduce=False)

# Load
parameter = MetabolicDataset(training_file=medium_name+'_'+str(parameter.medium_bound))
# parameter.printout()






In [8]:
import os
import time
import numpy as np
import tensorflow as tf
from aMNWtModel import AMNWtModel
from tools import printout

DIRECTORY = './'
SAVE_RESERVOIR = False

seed = 10
np.random.seed(seed=seed)  
tf.random.set_seed(seed)

# FBA simulated training set for E. coli core
## (not working with M1 chips ). I don't understand :)
# Create, train and evaluate AMN_Wt models with FBA simulated training set for E. coli core with upper bound (UB) or exact bound (EB) 
train_name = 'iML1515_EXP_UB' # e_coli_core_EB
objective = ['BIOMASS_Ec_iML1515_core_75p37M']
reservoir_name = train_name + "_AMN_Wt"



training_file = os.path.join(DIRECTORY,'Dataset_model/',train_name)

print("---------------------------------------- model ----------------------------------------")

model = AMNWtModel(training_file = train_name, 
                   objective=objective,  
                   model_type='AMN_Wt', 
                   timestep =4,
                   n_hidden = 1,
                   hidden_dim = 50,
                   scaler=True,
                   train_rate=1e-2,
                   epochs=10, 
                   xfold=5,
                   verbose=True,
                   batch_size=7)

model.train_test_split(test_size=0.1, random_state=seed)
model.printout()

print("---------------------------------------- train and evaluate ----------------------------------------")
start_time = time.time()
_, stats, _ = model.train_evaluate(verbose=False)
reservoir = model
delta_time = time.time() - start_time

print("---------------------------------------- printing cross-validation results ----------------------------------------")
stats.printout(reservoir_name, delta_time)


print("---------------------------------------- evaluate model on test set ----------------------------------------")
if SAVE_RESERVOIR:
    reservoir_file = os.path.join(DIRECTORY,'Reservoir/',reservoir_name)
    reservoir.save(reservoir_file)

reservoir.printout()

start_time = time.time()

## Strange two first lines, investigate
reservoir.X, reservoir.Y = model.X_test, model.Y_test
X, Y = reservoir.model_input(model.X_test, model.Y_test, verbose=False)
pred, obj, loss = reservoir.evaluate_model(X, Y, verbose=False)
delta_time = time.time() - start_time
printout('Test set', delta_time, obj, loss)

---------------------------------------- model ----------------------------------------
number of reactions:  543 1
number of metabolites:  1080
filtered measurements size:  1
training file: iML1515_EXP_UB
model type: AMN_Wt
model scaler: 1.0
model input dim: 0
model output dim: 0
model medium bound: UB
timestep: 4
nbr hidden layer: 1
hidden layer size: 50
activation function: relu
training epochs: 10
training regression: True
training learn rate: 0.01
training droP_out: 0.25
training batch size: 7
training validation iter: 0
training xfold: 5
training early stopping: False
---------------------------------------- train and evaluate ----------------------------------------
train = -518.07 test = -346.42 loss-train = 0.044226 loss-test = 0.044243
train = -0.09 test = -0.13 loss-train = 0.002123 loss-test = 0.002107
train = -0.05 test = -0.01 loss-train = 0.000436 loss-test = 0.000444
train = -0.01 test = -0.10 loss-train = 0.000334 loss-test = 0.000333
train = -1.48 test = -2.09 loss-tr