## Train and evaluate model using Scikit-learn and Keras wrapper 

In [25]:
import tensorflow as tf
from aMNWtModel import AMNWtModel
from aMRNNModel import AMRNNModel


model_class = AMRNNModel
model_class = AMNWtModel

seed = 10
# np.random.seed(seed=seed)  
tf.random.set_seed(seed)

# dataset_file = "./Dataset/IJN1463_EXP_UB_Anne.npz"
# objective=['BIOMASS_KT2440_WT3']

# dataset_file = "./Dataset/IJN1463_10_UB.npz"
# objective=['BIOMASS_KT2440_WT3']

# dataset_file = "./Dataset/e_coli_core_UB_100.npz"
# objective=['BIOMASS_Ecoli_core_w_GAM']
# epoch = 200
# batch_size = 7
# uptake_max_index = None

dataset_file = "./Dataset/biolog_iML1515_EXP_UB.npz"
objective=['BIOMASS_Ec_iML1515_core_75p37M']
epoch = 1 #20
batch_size = 30
uptake_max_index=151



print("---------------------------------------- model ----------------------------------------")
model = model_class(dataset_file=dataset_file, 
                   objective=objective,
                   timestep=4,
                #    n_hidden=1, 
                   hidden_dim=50,
                   epochs=50, 
                   verbose=True,
                   batch_size=7,
                   uptake_max_index = uptake_max_index)
model.printout()

# Preprocessing
from sklearn.preprocessing import MinMaxScaler,StandardScaler,MaxAbsScaler 
from tools import MaxScaler
scaler= MaxScaler()
model.train_test_split(test_size=0.1, random_state=seed)
model.preprocess(scaler)
model.preprocessing_for_specific_model()

---------------------------------------- model ----------------------------------------
number of metabolites:  1877
filtered measurements size:  1
dataset file: ./Dataset/biolog_iML1515_EXP_UB.npz
model type: AMNWt
model medium bound: UB
timestep: 4
training set size (17400, 430) (17400, 1)
training epochs: 50
training regression: True
training batch size: 7
training validation iter: 0
training early stopping: False


In [28]:
# cross validation
from sklearn.metrics import make_scorer
from sklearn.model_selection import KFold, cross_validate
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

estimator= KerasRegressor(build_fn=model.build_model, 
                          epochs=epoch, 
                          batch_size=batch_size, 
                          verbose=0)

scoring = {"loss_constraint":make_scorer(model.loss_constraint),
           "mse":make_scorer(model.mse),
           "R2":make_scorer(model.R2),
           }



callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
fit_params = {'callbacks': [callback]}
fit_params = {}

kfold= KFold(n_splits=5,shuffle=True, random_state=seed)

results=cross_validate(estimator, 
                       model.X_train, 
                       model.Y_train, 
                       cv=kfold, 
                       n_jobs=5, 
                       scoring=scoring, 
                       fit_params=fit_params,
                       return_train_score=True)
results

{'fit_time': array([244.11587715, 244.32231021, 242.90950251, 238.71664691,
        243.06635141]),
 'score_time': array([27.58643961, 20.34151244, 27.91285682, 27.53911853, 30.4492178 ]),
 'test_loss_constraint': array([0.00044196, 0.00039969, 0.00042471, 0.00053706, 0.0004263 ]),
 'train_loss_constraint': array([0.00044196, 0.0003997 , 0.0004247 , 0.00053706, 0.0004263 ]),
 'test_mse': array([0.11339866, 0.10475939, 0.10861337, 0.16975524, 0.10407544]),
 'train_mse': array([0.11052044, 0.10287896, 0.10883499, 0.17306297, 0.10495039]),
 'test_R2': array([-8.59873990e-02, -1.69030148e-04, -5.63728762e-02, -6.76422977e-01,
        -2.16543083e-02]),
 'train_R2': array([-0.0759675 , -0.00260751, -0.05543267, -0.67322491, -0.01536882])}

In [29]:
# AMRNN with 1 layer as RNNCell input
# AMRNN with 2 layer as RNNCell input, hidden layer 50
# AMNWt with 2 layer as RNNCell input hidden layer 500
# AMRNN with 2 layer as RNNCell input, hidden layer 50
import pandas as pd
df = pd.DataFrame(results)
df.describe()

Unnamed: 0,fit_time,score_time,test_loss_constraint,train_loss_constraint,test_mse,train_mse,test_R2,train_R2
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,242.626138,26.765829,0.000446,0.000446,0.12012,0.12005,-0.168121,-0.16452
std,2.272363,3.789005,5.3e-05,5.3e-05,0.027994,0.02979,0.286035,0.285913
min,238.716647,20.341512,0.0004,0.0004,0.104075,0.102879,-0.676423,-0.673225
25%,242.909503,27.539119,0.000425,0.000425,0.104759,0.10495,-0.085987,-0.075967
50%,243.066351,27.58644,0.000426,0.000426,0.108613,0.108835,-0.056373,-0.055433
75%,244.115877,27.912857,0.000442,0.000442,0.113399,0.11052,-0.021654,-0.015369
max,244.32231,30.449218,0.000537,0.000537,0.169755,0.173063,-0.000169,-0.002608


In [33]:
AMNWt_model = model.build_model()
history = AMNWt_model.fit(model.X_train, model.Y_train, epochs=epoch, batch_size=batch_size, verbose=0)

print("R2 :", model.R2(model.Y_train, AMNWt_model.predict(model.X_train)))
print("Q2 :", model.R2(model.Y_test, AMNWt_model.predict(model.X_test)))

R2 : -0.011620220954988492
Q2 : -0.018134945292827664


## Search for hyperparameters

In [34]:
from sklearn.model_selection import RandomizedSearchCV

estimator= KerasRegressor(build_fn=model.build_model, 
                          epochs=epoch, 
                          batch_size=batch_size, 
                          verbose=0)

distributions = dict(batch_size=[7,20],
                     nb_epoch=[2,100],
                    #  hidden_dim=[1,2],
                     )

scoring = {"loss_constraint":make_scorer(model.loss_constraint),
           "mse":make_scorer(model.mse),
           "R2":make_scorer(model.R2),
           }

clf = RandomizedSearchCV(estimator, distributions, random_state=0)
search = clf.fit(model.X_test, model.Y_test)



## Save and load the model

In [35]:
import tensorflow as tf

# from aMNWtModel import AMNWtModel, RNNCell
# model_class = AMNWtModel
# model_file = "Models/AMNWt_model.keras"

uptake_max_index = -1

from aMRNNModel import AMRNNModel, RNNCell
model_class = AMRNNModel
model_file = "Models/AMRNN_model.keras"


seed = 10
tf.random.set_seed(seed)
dataset_file = "./Dataset/e_coli_core_UB_100.npz"
objective=['BIOMASS_Ecoli_core_w_GAM']
epoch = 200
batch_size = 7


# dataset_file = "./Dataset/biolog_iML1515_EXP_UB.npz"
# objective=['BIOMASS_Ec_iML1515_core_75p37M']
# epoch = 1 #20
# batch_size = 30
# uptake_max_index=151

# Dataset plus model structure
print("---------------------------------------- model ----------------------------------------")
model = model_class(dataset_file=dataset_file, 
                   objective=objective,
                   timestep=4,
                   hidden_dim=50,
                   epochs=50, 
                   verbose=True,
                   batch_size=7)
model.printout()

# Preprocessing
from sklearn.preprocessing import MinMaxScaler,StandardScaler,MaxAbsScaler 
from tools import MaxScaler
scaler= MaxScaler()
model.train_test_split(test_size=0.1, random_state=seed)
model.preprocess(scaler)
model.preprocessing_for_specific_model()

batch_size = 7

# Construct and train an AMNWt model
AMNWt_model = model.build_model()
history = AMNWt_model.fit(model.X_train, model.Y_train, epochs=epoch, batch_size=batch_size, verbose=0)

print("R2 :", model.R2(model.Y_train, AMNWt_model.predict(model.X_train)))
print("Q2 :", model.R2(model.Y_test, AMNWt_model.predict(model.X_test)))

---------------------------------------- model ----------------------------------------
number of metabolites:  72
filtered measurements size:  1
dataset file: ./Dataset/e_coli_core_UB_100.npz
model type: AMNWt
model medium bound: UB
timestep: 4
training set size (100, 20) (100, 1)
training epochs: 50
training regression: True
training batch size: 7
training validation iter: 0
training early stopping: False


R2 : 0.43442647328897727
Q2 : 0.3337210888765758


In [36]:
from tools import custom_loss

seed = 10
tf.random.set_seed(seed)

# Recreate new model from config file, compile and train it. First test on config.
config = AMNWt_model.get_config()
AMNWt_model_= tf.keras.Model.from_config(config, custom_objects={"RNNCell":RNNCell})
my_mse = custom_loss(model.S, model.P_out,model.P_in)
AMNWt_model_.compile(loss=my_mse,optimizer='adam',metrics=[my_mse])
history = AMNWt_model_.fit(model.X_train, model.Y_train, epochs=epoch, batch_size=batch_size, verbose=0)
# 
print("R2 :", model.R2(model.Y_train, AMNWt_model_.predict(model.X_train)))
print("Q2 :", model.R2(model.Y_test, AMNWt_model_.predict(model.X_test)))

R2 : 0.43442647328897727
Q2 : 0.3337210888765758


In [37]:
# Save the train AMNWt_model
tf.keras.models.save_model(AMNWt_model,model_file, overwrite=True, save_format=None, save_traces=True)

In [38]:
from tools import custom_loss

AMNWt_model_ = tf.keras.models.load_model(model_file, custom_objects={"RNNCell":RNNCell,
                                                                               "my_mse":custom_loss(model.S, model.P_out,model.P_in)})


history = AMNWt_model_.fit(model.X_train, model.Y_train, epochs=epoch, batch_size=batch_size, verbose=0)
print("R2 :", model.R2(model.Y_train, AMNWt_model_.predict(model.X_train)))
print("Q2 :", model.R2(model.Y_test, AMNWt_model_.predict(model.X_test)))

R2 : 0.7781931445961526
Q2 : 0.6904411610096228
