## Train and evaluate model using Scikit-learn and Keras wrapper 

In [3]:
import tensorflow as tf
from amn.model import AMNWtModel
from amn.model import AMRNNModel

model_class = AMRNNModel
# model_class = AMNWtModel

data_dir = "../data"
seed = 10
# np.random.seed(seed=seed)  
tf.random.set_seed(seed)

# dataset_file = "/Dataset/IJN1463_EXP_UB_Anne.npz"
# objective=['BIOMASS_KT2440_WT3']

# dataset_file = "/Dataset/IJN1463_10_UB.npz"
# objective=['BIOMASS_KT2440_WT3']

# dataset_file = "/Dataset/e_coli_core_UB_100.npz"
# objective=['BIOMASS_Ecoli_core_w_GAM']
# epochs = 200
# batch_size = 7
# uptake_max_index = None

dataset_file = "/Dataset/e_coli_core_UB.npz"
objective=['BIOMASS_Ecoli_core_w_GAM']
uptake_max_index = None
epochs = 20 #200
batch_size = 7


# dataset_file = "/Dataset/biolog_iML1515_EXP_UB.npz"
# objective=['BIOMASS_Ec_iML1515_core_75p37M']
# epochs = 1 #20
# batch_size = 30
# uptake_max_index=151




print("---------------------------------------- model ----------------------------------------")
model = model_class(dataset_file=data_dir + dataset_file, 
                   objective=objective,
                   timestep=4,
                   hidden_dim=50,
                   verbose=True,
                   uptake_max_index = uptake_max_index)
model.printout()

# Preprocessing
from amn.tools import MaxScaler
scaler= MaxScaler()
model.train_test_split(test_size=0.1, random_state=seed)
model.preprocess(scaler)
model.preprocessing_for_specific_model()

---------------------------------------- model ----------------------------------------
['CYTDK2' 'XPPT' 'HXPRT' ... 'FORCT_rev' 'EX_3hpp_e_i' 'HADPCOADH3_rev']
number of metabolites:  1877
filtered measurements size:  1
dataset file: ../data/Dataset/iML1515_EXP_paul_UB.npz
model type: AMNWt
model medium bound: UB
timestep: 4
training set size (186, 49) (186, 1)
nbr hidden layer: 1
hidden layer size: 50
activation function: relu


In [6]:


# cross validation
from sklearn.metrics import make_scorer
from sklearn.model_selection import KFold, cross_validate
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

estimator= KerasRegressor(build_fn=model.build_model, 
                          epochs=epochs, 
                          batch_size=batch_size, 
                          verbose=0)

scoring = {"loss_constraint":make_scorer(model.loss_constraint),
           "mse":make_scorer(model.mse),
           "R2":make_scorer(model.R2),
           }



callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
fit_params = {'callbacks': [callback]}
fit_params = {}

kfold= KFold(n_splits=5,shuffle=True, random_state=seed)

results=cross_validate(estimator, 
                       model.X_train, 
                       model.Y_train, 
                       cv=kfold, 
                       n_jobs=5, 
                       scoring=scoring, 
                       fit_params=fit_params,
                       return_train_score=True)
results

2023-11-09 14:40:35.991803: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-09 14:40:35.993384: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 5. Tune using inter_op_parallelism_threads for best performance.
2023-11-09 14:40:36.007946: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-09 14:40:36.010370: I tensorflow/core/common_runtime/process_util.cc:

{'fit_time': array([14.91294599, 22.20490575, 13.78981233, 15.43258643, 15.91683006]),
 'score_time': array([0.94125652, 0.27923036, 1.28827071, 0.64941287, 0.49342775]),
 'test_loss_constraint': array([0.00376069, 0.00415281, 0.00352208, 0.00386676, 0.00399523]),
 'train_loss_constraint': array([0.00383015, 0.00399166, 0.00358072, 0.00375434, 0.00407531]),
 'test_mse': array([0.00366208, 0.00394042, 0.00519764, 0.00351517, 0.00373788]),
 'train_mse': array([0.00372008, 0.00384605, 0.00536005, 0.00332458, 0.00387086]),
 'test_R2': array([0.7911341 , 0.81550174, 0.38100968, 0.86698227, 0.81566853]),
 'train_R2': array([0.80389084, 0.79766492, 0.37770605, 0.87800637, 0.811624  ])}

In [8]:
import pandas as pd
df = pd.DataFrame(results)
df.describe()

Unnamed: 0,fit_time,score_time,test_loss_constraint,train_loss_constraint,test_mse,train_mse,test_R2,train_R2
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,16.451416,0.73032,0.00386,0.003846,0.004011,0.004024,0.734059,0.733778
std,3.312003,0.394319,0.000239,0.000195,0.000681,0.000778,0.199288,0.201647
min,13.789812,0.27923,0.003522,0.003581,0.003515,0.003325,0.38101,0.377706
25%,14.912946,0.493428,0.003761,0.003754,0.003662,0.00372,0.791134,0.797665
50%,15.432586,0.649413,0.003867,0.00383,0.003738,0.003846,0.815502,0.803891
75%,15.91683,0.941257,0.003995,0.003992,0.00394,0.003871,0.815669,0.811624
max,22.204906,1.288271,0.004153,0.004075,0.005198,0.00536,0.866982,0.878006


In [9]:
AMNWt_model = model.build_model()
history = AMNWt_model.fit(model.X_train, model.Y_train, epochs=epochs, batch_size=batch_size, verbose=0)

print("R2 :", model.R2(model.Y_train, AMNWt_model.predict(model.X_train)))
print("Q2 :", model.R2(model.Y_test, AMNWt_model.predict(model.X_test)))

2023-11-09 14:41:08.009841: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-09 14:41:08.013006: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2023-11-09 14:41:08.209271: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


R2 : 0.8440483586310867
Q2 : 0.8478538192871721


## Search for hyperparameters

In [10]:
from sklearn.model_selection import RandomizedSearchCV

estimator= KerasRegressor(build_fn=model.build_model, 
                          epochs=epochs, 
                          batch_size=batch_size, 
                          verbose=0)

distributions = dict(batch_size=[7,20],
                     nb_epoch=[2,100],
                    #  hidden_dim=[1,2],
                     )

scoring = {"loss_constraint":make_scorer(model.loss_constraint),
           "mse":make_scorer(model.mse),
           "R2":make_scorer(model.R2),
           }

clf = RandomizedSearchCV(estimator, distributions, random_state=0)
search = clf.fit(model.X_test, model.Y_test)





In [11]:
search

RandomizedSearchCV(estimator=<keras.wrappers.scikit_learn.KerasRegressor object at 0x7f36a01583d0>,
                   param_distributions={'batch_size': [7, 20],
                                        'nb_epoch': [2, 100]},
                   random_state=0)