## Train and evaluate model using Scikit-learn and Keras wrapper 

In [1]:
import tensorflow as tf
from aMNWtModel import AMNWtModel
from aMRNNModel import AMRNNModel


model_class = AMRNNModel
model_class = AMNWtModel

seed = 10
# np.random.seed(seed=seed)  
tf.random.set_seed(seed)

# dataset_file = "./Dataset/IJN1463_EXP_UB_Anne.npz"
# objective=['BIOMASS_KT2440_WT3']

# dataset_file = "./Dataset/IJN1463_10_UB.npz"
# objective=['BIOMASS_KT2440_WT3']

# dataset_file = "./Dataset/e_coli_core_UB_100.npz"
# objective=['BIOMASS_Ecoli_core_w_GAM']
# epoch = 200
# batch_size = 7
# uptake_max_index = None

dataset_file = "./Dataset/e_coli_core_UB.npz"
objective=['BIOMASS_Ecoli_core_w_GAM']
epoch = 200
batch_size = 7
uptake_max_index = None


# dataset_file = "./Dataset/biolog_iML1515_EXP_UB.npz"
# objective=['BIOMASS_Ec_iML1515_core_75p37M']
# epoch = 1 #20
# batch_size = 30
# uptake_max_index=151



print("---------------------------------------- model ----------------------------------------")
model = model_class(dataset_file=dataset_file, 
                   objective=objective,
                   timestep=4,
                #    n_hidden=1, 
                   hidden_dim=50,
                   epochs=50, 
                   verbose=True,
                   batch_size=7,
                   uptake_max_index = uptake_max_index)
model.printout()

# Preprocessing
from sklearn.preprocessing import MinMaxScaler,StandardScaler,MaxAbsScaler 
from tools import MaxScaler
scaler= MaxScaler()
model.train_test_split(test_size=0.1, random_state=seed)
model.preprocess(scaler)
model.preprocessing_for_specific_model()

---------------------------------------- model ----------------------------------------
number of metabolites:  72
filtered measurements size:  1
dataset file: ./Dataset/e_coli_core_UB.npz
model type: AMNWt
model medium bound: UB
timestep: 4
training set size (1000, 20) (1000, 1)
training epochs: 50
training regression: True
training batch size: 7
training validation iter: 0
training early stopping: False


In [2]:
# cross validation
from sklearn.metrics import make_scorer
from sklearn.model_selection import KFold, cross_validate
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

estimator= KerasRegressor(build_fn=model.build_model, 
                          epochs=epoch, 
                          batch_size=batch_size, 
                          verbose=0)

scoring = {"loss_constraint":make_scorer(model.loss_constraint),
           "mse":make_scorer(model.mse),
           "R2":make_scorer(model.R2),
           }



callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
fit_params = {'callbacks': [callback]}
fit_params = {}

kfold= KFold(n_splits=5,shuffle=True, random_state=seed)

results=cross_validate(estimator, 
                       model.X_train, 
                       model.Y_train, 
                       cv=kfold, 
                       n_jobs=5, 
                       scoring=scoring, 
                       fit_params=fit_params,
                       return_train_score=True)
results

2023-10-11 10:02:47.184293: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-11 10:02:47.184304: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-11 10:02:47.186651: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 5. Tune using inter_op_parallelism_threads for best performance.
2023-10-11 10:02:47.186729: I tensorflow/core/common_runtime/process_util.cc:

{'fit_time': array([139.88623023, 132.04725814, 142.88333154, 140.65977454,
        142.53933716]),
 'score_time': array([0.46417737, 0.64663649, 0.37417245, 0.4068861 , 0.4509871 ]),
 'test_loss_constraint': array([0.00112894, 0.00111523, 0.00111331, 0.00115773, 0.00109726]),
 'train_loss_constraint': array([0.00110981, 0.00111597, 0.00113845, 0.00111186, 0.0011276 ]),
 'test_mse': array([0.00090884, 0.00089563, 0.00089477, 0.00093087, 0.0008814 ]),
 'train_mse': array([0.00090603, 0.00089245, 0.00091395, 0.00088425, 0.00090205]),
 'test_R2': array([0.98458044, 0.98677159, 0.98552257, 0.98646931, 0.98546767]),
 'train_R2': array([0.98295185, 0.98683045, 0.98601447, 0.98792604, 0.98696528])}

In [3]:
# AMRNN with 1 layer as RNNCell input
# AMRNN with 2 layer as RNNCell input, hidden layer 50
# AMNWt with 2 layer as RNNCell input hidden layer 500
# AMRNN with 2 layer as RNNCell input, hidden layer 50
import pandas as pd
df = pd.DataFrame(results)
df.describe()

Unnamed: 0,fit_time,score_time,test_loss_constraint,train_loss_constraint,test_mse,train_mse,test_R2,train_R2
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,139.603186,0.468572,0.001122,0.001121,0.000902,0.0009,0.985762,0.986138
std,4.406497,0.105773,2.3e-05,1.2e-05,1.9e-05,1.2e-05,0.000875,0.001906
min,132.047258,0.374172,0.001097,0.00111,0.000881,0.000884,0.98458,0.982952
25%,139.88623,0.406886,0.001113,0.001112,0.000895,0.000892,0.985468,0.986014
50%,140.659775,0.450987,0.001115,0.001116,0.000896,0.000902,0.985523,0.98683
75%,142.539337,0.464177,0.001129,0.001128,0.000909,0.000906,0.986469,0.986965
max,142.883332,0.646636,0.001158,0.001138,0.000931,0.000914,0.986772,0.987926


In [4]:
AMNWt_model = model.build_model()
history = AMNWt_model.fit(model.X_train, model.Y_train, epochs=epoch, batch_size=batch_size, verbose=0)

print("R2 :", model.R2(model.Y_train, AMNWt_model.predict(model.X_train)))
print("Q2 :", model.R2(model.Y_test, AMNWt_model.predict(model.X_test)))

2023-10-11 10:05:41.941927: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-11 10:05:41.944587: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2023-10-11 10:05:42.141710: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


R2 : 0.9888228666362848
Q2 : 0.9861056804729627


## Search for hyperparameters

In [34]:
from sklearn.model_selection import RandomizedSearchCV

estimator= KerasRegressor(build_fn=model.build_model, 
                          epochs=epoch, 
                          batch_size=batch_size, 
                          verbose=0)

distributions = dict(batch_size=[7,20],
                     nb_epoch=[2,100],
                    #  hidden_dim=[1,2],
                     )

scoring = {"loss_constraint":make_scorer(model.loss_constraint),
           "mse":make_scorer(model.mse),
           "R2":make_scorer(model.R2),
           }

clf = RandomizedSearchCV(estimator, distributions, random_state=0)
search = clf.fit(model.X_test, model.Y_test)



## Save and load the model

In [4]:
import tensorflow as tf

from aMNWtModel import AMNWtModel, RNNCell
model_class = AMNWtModel



# from aMRNNModel import AMRNNModel, RNNCell
# model_class = AMRNNModel
# model_file = "Models/AMRNN_model.keras"


seed = 10
tf.random.set_seed(seed)


# dataset_file = "./Dataset/IJN1463_10_UB.npz"
# objective=['BIOMASS_KT2440_WT3']
# epoch = 20
# batch_size = 30
# uptake_max_index = None
# model_file = "Models/AMNWt_IJN1463_10_UB.keras"
# model_file = "Models/AMNWt_IJN1463_10_UB_no_scaling.keras"


dataset_file = "./Dataset/e_coli_core_UB.npz"
objective=['BIOMASS_Ecoli_core_w_GAM']
epoch = 200
batch_size = 7
uptake_max_index = None
model_file = "Models/AMNWt_e_coli_core_UB.keras"
model_file = "Models/AMNWt_e_coli_core_UB_no_scaling.keras"


# dataset_file = "./Dataset/biolog_iML1515_EXP_UB.npz"
# objective=['BIOMASS_Ec_iML1515_core_75p37M']
# epoch = 1 #20
# batch_size = 30
# uptake_max_index=151

# Dataset plus model structure
print("---------------------------------------- model ----------------------------------------")
model = model_class(dataset_file=dataset_file, 
                   objective=objective,
                   timestep=4,
                   hidden_dim=50,
                   epochs=50, 
                   verbose=True,
                   batch_size=7)
model.printout()

# Preprocessing
from sklearn.preprocessing import MinMaxScaler,StandardScaler,MaxAbsScaler 
from tools import MaxScaler
scaler= MaxScaler()
model.train_test_split(test_size=0.1, random_state=seed)
# model.preprocess(scaler)
model.preprocessing_for_specific_model()


# Construct and train an AMNWt model
AMNWt_model = model.build_model()
history = AMNWt_model.fit(model.X_train, model.Y_train, epochs=epoch, batch_size=batch_size, verbose=0)

print("R2 :", model.R2(model.Y_train, AMNWt_model.predict(model.X_train)))
print("Q2 :", model.R2(model.Y_test, AMNWt_model.predict(model.X_test)))

---------------------------------------- model ----------------------------------------
number of metabolites:  72
filtered measurements size:  1
dataset file: ./Dataset/e_coli_core_UB.npz
model type: AMNWt
model medium bound: UB
timestep: 4
training set size (1000, 20) (1000, 1)
training epochs: 50
training regression: True
training batch size: 7
training validation iter: 0
training early stopping: False
R2 : 0.9886600582518922
Q2 : 0.9857406892294839


In [2]:
from tools import custom_loss

seed = 10
tf.random.set_seed(seed)

# Recreate new model from config file, compile and train it. First test on config.
config = AMNWt_model.get_config()
AMNWt_model_= tf.keras.Model.from_config(config, custom_objects={"RNNCell":RNNCell})
my_mse = custom_loss(model.S, model.P_out,model.P_in)
AMNWt_model_.compile(loss=my_mse,optimizer='adam',metrics=[my_mse])
history = AMNWt_model_.fit(model.X_train, model.Y_train, epochs=epoch, batch_size=batch_size, verbose=0)
# 
print("R2 :", model.R2(model.Y_train, AMNWt_model_.predict(model.X_train)))
print("Q2 :", model.R2(model.Y_test, AMNWt_model_.predict(model.X_test)))

KeyboardInterrupt: 

In [5]:
# Save the train AMNWt_model
tf.keras.models.save_model(AMNWt_model,model_file, overwrite=True, save_format=None, save_traces=True)

In [4]:
from tools import custom_loss

AMNWt_model_ = tf.keras.models.load_model(model_file, 
                                          custom_objects={"RNNCell":RNNCell,
                                                          "my_mse":custom_loss(model.S, 
                                                                               model.P_out,
                                                                               model.P_in)}
                                          )

print("R2 :", model.R2(model.Y_train, AMNWt_model_.predict(model.X_train)))
print("Q2 :", model.R2(model.Y_test, AMNWt_model_.predict(model.X_test)))
history = AMNWt_model_.fit(model.X_train, model.Y_train, epochs=epoch, batch_size=batch_size, verbose=0)
print("R2 :", model.R2(model.Y_train, AMNWt_model_.predict(model.X_train)))
print("Q2 :", model.R2(model.Y_test, AMNWt_model_.predict(model.X_test)))

R2 : 0.7816972835567102
Q2 : 0.7742851205067021


KeyboardInterrupt: 