# Test dataset model generation
This notebook is used to test the generation of data realized originally in the Build_dataset notebook. We give one example for each situation, simulated data and experimental data.

### Simulated data

In [5]:
import os
import numpy as np
from simulatedDataset import SimulatedDataset
from metabolicDataset import MetabolicDataset

np.random.seed(seed=10) 

# Generate dataset using cobra
medium_dir = "./Dataset_input"
cobra_dir = "./Dataset_input"
medium_file = 'e_coli_core.csv'
cobra_file = 'e_coli_core_duplicated.xml'
parameter = SimulatedDataset(input_cobra_file=os.path.join(cobra_dir,cobra_file), 
                             medium_file=os.path.join(medium_dir,medium_file), ##input !
                             medium_bound='UB',#'EB' 
                             method='pFBA',
                             objective=[],
                             measure=[],
                             sample_size=50)


# Saving dataset
# Save the dataset into a npz file and the cobra model in the given directory
dataset_dir = "./Dataset"
dataset_name = 'e_coli_core_UB_50'
parameter.save(dataset_dir=dataset_dir, 
               dataset_name=dataset_name,
               reduce=False)

# Load dataset
parameter = MetabolicDataset(dataset_file=os.path.join(dataset_dir,dataset_name)+'.npz')
# parameter.printout()

In [6]:
import time
import numpy as np
import tensorflow as tf
from aMNWtModel import AMNWtModel
from tools import printout

seed = 10
np.random.seed(seed=seed)  
tf.random.set_seed(seed)

print("---------------------------------------- model ----------------------------------------")
model = AMNWtModel(dataset_file="./Dataset/e_coli_core_UB_50.npz", 
                   objective=['BIOMASS_Ecoli_core_w_GAM'],  
                   timestep=4,
                   n_hidden=1,
                   hidden_dim=50,
                   scaler=True,
                   train_rate=1e-2,
                   epochs=10, 
                   xfold=5,
                   verbose=True,
                   batch_size=7)

model.train_test_split(test_size=0.1, random_state=seed)
model.printout()

print("---------------------------------------- train and evaluate ----------------------------------------")
start_time = time.time()
_, stats, _ = model.train_evaluate(verbose=False)
reservoir = model
delta_time = time.time() - start_time

print("---------------------------------------- printing cross-validation results ----------------------------------------")
reservoir_name = "e_coli_core_UB_50_AMN_Wt"
stats.printout(reservoir_name, delta_time)

# reservoir.save("./Reservoir/e_coli_core_UB_50_AMN_Wt")
# reservoir.printout()

print("---------------------------------------- evaluate model on test set ----------------------------------------")

start_time = time.time()
## Strange two first lines, investigate
reservoir.X, reservoir.Y = model.X_test, model.Y_test
X, Y = reservoir.model_input(model.X_test, model.Y_test, verbose=False)
pred, obj, loss = reservoir.evaluate_model(X, Y, verbose=False)
delta_time = time.time() - start_time
printout('Test set', delta_time, obj, loss)

---------------------------------------- model ----------------------------------------
number of reactions:  154 154
number of metabolites:  72
filtered measurements size:  1
dataset file: ./Dataset/e_coli_core_UB_50.npz
model type: AMNWt
model scaler: 1.0
model medium bound: UB
timestep: 4
training set size (50, 20) (50, 1)
nbr hidden layer: 1
hidden layer size: 50
activation function: relu
training epochs: 10
training regression: True
training learn rate: 0.01
training droP_out: 0.25
training batch size: 7
training validation iter: 0
training xfold: 5
training early stopping: False
---------------------------------------- train and evaluate ----------------------------------------
train = -25.47 test = -41.32 loss-train = 0.028367 loss-test = 0.027396
train = -34.05 test = -21.94 loss-train = 0.023299 loss-test = 0.024118
train = -25.56 test = -42.41 loss-train = 0.023264 loss-test = 0.022864
train = -27.96 test = -18.93 loss-train = 0.034178 loss-test = 0.034799
train = -27.78 test

### Experimental data

In [7]:
import os
import numpy as np
from metabolicDataset import MetabolicDataset
from experimentalDataset import ExperimentalDataset

np.random.seed(seed=10) 


# Generate dataset using cobra model and experimental data
medium_dir = "./Dataset_input"
cobra_dir = "./Dataset_input"
medium_file = 'iML1515_EXP.csv'
cobra_file = 'iML1515_EXP.xml'
parameter = ExperimentalDataset(input_cobra_file=os.path.join(cobra_dir,cobra_file), 
                                medium_file=os.path.join(medium_dir,medium_file), 
                                medium_bound='UB', 
                                medium_size=38, 
                                method='EXP',
                                verbose=False)

# Saving dataset
# Save the dataset into a npz file and the cobra model in the given directory
dataset_dir = "./Dataset"
dataset_name = 'iML1515_EXP_UB'
parameter.save(dataset_dir = dataset_dir, 
               dataset_name=dataset_name,
               reduce=False)

# Load dataset
parameter = MetabolicDataset(dataset_file=os.path.join(dataset_dir,dataset_name)+'.npz')
# parameter.printout()

In [8]:
import time
import numpy as np
import tensorflow as tf
from aMNWtModel import AMNWtModel
from tools import printout

seed = 10
np.random.seed(seed=seed)  
tf.random.set_seed(seed)

print("---------------------------------------- model ----------------------------------------")
model = AMNWtModel(dataset_file="./Dataset/iML1515_EXP_UB.npz", 
                   objective=['BIOMASS_Ec_iML1515_core_75p37M'],  
                   timestep=4,
                   n_hidden=1,
                   hidden_dim=50,
                   scaler=True,
                   train_rate=1e-2,
                   epochs=10, 
                   xfold=5,
                   verbose=True,
                   batch_size=7)

model.train_test_split(test_size=0.1, random_state=seed)
model.printout()

print("---------------------------------------- train and evaluate ----------------------------------------")
start_time = time.time()
_, stats, _ = model.train_evaluate(verbose=False)
reservoir = model
delta_time = time.time() - start_time

print("---------------------------------------- printing cross-validation results ----------------------------------------")
reservoir_name = "./Dataset/iML1515_EXP_UB.npz_AMN_Wt"
stats.printout(reservoir_name, delta_time)

# reservoir.save("./Reservoir/e_coli_core_UB_50_AMN_Wt")
# reservoir.printout()

print("---------------------------------------- evaluate model on test set ----------------------------------------")

start_time = time.time()
## Strange two first lines, investigate
reservoir.X, reservoir.Y = model.X_test, model.Y_test
X, Y = reservoir.model_input(model.X_test, model.Y_test, verbose=False)
pred, obj, loss = reservoir.evaluate_model(X, Y, verbose=False)
delta_time = time.time() - start_time
printout('Test set', delta_time, obj, loss)



---------------------------------------- model ----------------------------------------
number of reactions:  543 1
number of metabolites:  1080
filtered measurements size:  1
dataset file: ./Dataset/iML1515_EXP_UB.npz
model type: AMNWt
model scaler: 1.0
model medium bound: UB
timestep: 4
training set size (110, 38) (110, 1)
nbr hidden layer: 1
hidden layer size: 50
activation function: relu
training epochs: 10
training regression: True
training learn rate: 0.01
training droP_out: 0.25
training batch size: 7
training validation iter: 0
training xfold: 5
training early stopping: False
---------------------------------------- train and evaluate ----------------------------------------
train = -518.07 test = -346.42 loss-train = 0.044226 loss-test = 0.044243
train = -0.09 test = -0.13 loss-train = 0.002123 loss-test = 0.002107
train = -0.05 test = -0.01 loss-train = 0.000436 loss-test = 0.000444
train = -0.01 test = -0.10 loss-train = 0.000334 loss-test = 0.000333
train = -1.48 test = -2.