# Install conda on your Colab environment

Ignore this first cell if you are running the notebook in a local environment.

One can still run it locally but it will have no effect.

In [1]:
# Run this cell first - it will install a conda distribution (mamba)
# on your Drive then restart the kernel automatically 
# (don't worry about the crashing/restarting kernel messages)
# It HAS to be runned FIRST everytime you use the notebook in colab

import os
import sys
RunningInCOLAB  = 'google.colab' in str(get_ipython())

if RunningInCOLAB:
    !pip install -q condacolab
    import condacolab
    condacolab.install()

# Set up your Colab or local environment
# Then import libraries

Run this cell in both cases of use (local or Colab)

In [4]:
import os
import sys
RunningInCOLAB  = 'google.colab' in str(get_ipython())

if RunningInCOLAB:
    
    # Check everything is fine with conda in Colab
    import condacolab
    condacolab.check()
    
    # Mount your drive environment in the colab runtime
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)
    
    # Change this variable to your path on Google Drive to which the repo has been cloned
    # If you followed the colab notebook 'repo_cloning.ipynb', nothing to change here
    repo_path_in_drive = '/content/drive/My Drive/Github/amn_release/'
    # Change directory to your repo cloned in your drive
    DIRECTORY = repo_path_in_drive
    os.chdir(repo_path_in_drive)
    # Copy the environment given in the environment_amn_light.yml
    !mamba env update -n base -f environment_amn_light.yml
    
    # This is one of the few Colab-compatible font
    font = 'Liberation Sans'
    
else:
    
    # In this case the local root of the repo is our working directory
    DIRECTORY = './'
    font = 'arial'

# printing the working directory files. One can check you see the same folders and files as in the git webpage.
print(os.listdir(DIRECTORY))

from Build_Model import *

# We declare this function here and not in the
# function-storing python file to modify it easily
# as it can change the printouts of the methods
def printout(filename, Stats, model, time): 
    # printing Stats
    print('Stats for %s CPU-time %.4f' % (filename, time))
    print('R2 = %.4f (+/- %.4f) Constraint = %.4f (+/- %.4f)' % \
          (Stats.train_objective[0], Stats.train_objective[1],
           Stats.train_loss[0], Stats.train_loss[1]))
    print('Q2 = %.4f (+/- %.4f) Constraint = %.4f (+/- %.4f)' % \
          (Stats.test_objective[0], Stats.test_objective[1],
           Stats.test_loss[0], Stats.test_loss[1]))

['README.md', 'Duplicate_Model.ipynb', 'Build_Model_Dense.ipynb', 'Build_Dataset.py', 'Dataset_experimental', '.ipynb_checkpoints', '.git', 'Build_Experimental.ipynb', 'Reservoir', 'Dataset_model', 'Figures.ipynb', 'Result', 'Figures', '.gitignore', 'Duplicate_Model.py', 'LICENSE', 'Build_Dataset.ipynb', 'Dataset_input', '__pycache__', 'Build_Experimental.py', 'old', 'environment_amn.yml', 'Build_Model.py', 'Build_Model.ipynb', '.DS_Store']


## (2) Neural model: examples of trainable ANN model (neural only) with FBA simulated training set or experimental datasets

### FBA simulated training set

In [21]:
# Create, train and evaluate ANN models with FBA simulated training set for E. coli core

# What you can change 
seed = 10
ratio_test = 0.1 # part of the training set removed for test
np.random.seed(seed=seed)  
trainname = 'e_coli_core_UB'
# End of What you can change

# Create model
trainingfile = DIRECTORY+'Dataset_model/'+trainname
model = Neural_Model(trainingfile = trainingfile, 
              objective=['BIOMASS_Ecoli_core_w_GAM'], 
              model_type = 'ANN_Dense',
              n_hidden = 1, hidden_dim = 50, 
              epochs = 500, xfold = 5)
ID = np.random.choice(model.X.shape[0], 
                      size=int(model.X.shape[0]*ratio_test), replace=False)
Xtest,  Ytest  = model.X[ID,:], model.Y[ID,:]
Xtrain, Ytrain = np.delete(model.X, ID, axis=0), np.delete(model.Y, ID, axis=0) 
model.printout()

# Train and evaluate
reservoirname = trainname +'_'+model.model_type
reservoirfile = DIRECTORY+'Reservoir/'+reservoirname
start_time = time.time()
model.X, model.Y = Xtrain, Ytrain
reservoir, pred, stats, _ = train_evaluate_model(model, verbose=False)
delta_time = time.time() - start_time

# Printing cross-validation results
printout(reservoirname, stats, model, delta_time)

# Save, reload and run idependent test set
reservoir.save(reservoirfile)
reservoir.load(reservoirfile)
reservoir.printout()
if len(Xtest) > 0:
    start_time = time.time()
    reservoir.X, reservoir.Y = Xtest, Ytest
    X, Y = model_input(reservoir,verbose=False)
    pred, stats = evaluate_model(reservoir.model, X, Y, reservoir, verbose=False)
    delta_time = time.time() - start_time
    printout('Test set', stats, model, delta_time)
"""
Stats for e_coli_core_UB_ANN_Dense CPU-time 149.2822
R2 = 0.9587 (+/- 0.0329) Constraint = -1.0000 (+/- 0.0000)
Q2 = 0.9582 (+/- 0.0327) Constraint = -1.0000 (+/- 0.0000)
Stats for Test set CPU-time 0.0347
R2 = 0.9104 (+/- 0.0000) Constraint = -1.0000 (+/- 0.0000)
"""

training file: ./Dataset_model/e_coli_core_UB
model type: ANN_Dense
model scaler: 0.0
model input dim: 20
model output dim: 1
model medium bound: UB
timestep: 0
training set size (1000, 20) (1000, 1)
nbr hidden layer: 1
hidden layer size: 50
activation function: relu
training epochs: 500
training regression: True
training learn rate: 0.001
training dropout: 0.25
training batch size: 5
training validation iter: 0
training xfold: 5
training early stopping: False
nbr parameters: 1101
train = 0.99 test = 0.99 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 1101
train = 0.93 test = 0.93 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 1101
train = 0.98 test = 0.98 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 1101
train = 0.99 test = 0.99 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 1101
train = 0.91 test = 0.91 loss-train = -1.000000 loss-test = -1.000000 iter=0
Stats for e_coli_core_UB_ANN_Dense CPU-time 1

In [3]:
# Create, train and evaluate ANN models with FBA simulated training set for E. coli core

# What you can change 
seed = 10
np.random.seed(seed=seed)  
trainname = 'e_coli_core_UB_5000'
# End of What you can change

# Create model
trainingfile = DIRECTORY+'Dataset_model/'+trainname
cobramodel = cobra.io.read_sbml_model(trainingfile+'.xml')
OBJ = [r.id for r in cobramodel.reactions]
parameter = TrainingSet()
parameter.load(trainingfile)
Y, i = {}, 0
for obj in OBJ:
    objective = [obj]
    print(objective)
    model = Neural_Model(trainingfile = trainingfile, 
                         objective=objective, 
                         model_type = 'ANN_Dense',
                         n_hidden = 1, hidden_dim = 50, 
                         epochs = 500, xfold = 5)
        
    # Train and evaluate
    start_time = time.time()
    try:
        reservoir, pred, stats, _ = train_evaluate_model(model, verbose=False)
    except:
        pred = 0 * parameter.Y # zero vector
    delta_time = time.time() - start_time

    # Printing cross-validation results
    print(objective,'----------------------------------------------------------------------', i)
    printout(objective, stats, reservoir, delta_time)
    Y[i] = pred[:,0]
    i = i+1

# Collate all predicted Y and get stats and constraints
Y = np.transpose(np.asarray(list(Y.values())))
print(Y.shape, parameter.Y.shape)
print('Q2=', r2_score(parameter.Y, Y, multioutput='variance_weighted'))
X = tf.convert_to_tensor(np.float32(model.X)) # Loss computed of tf tensors
Y = tf.convert_to_tensor(np.float32(Y))
L2, _ = Loss_SV(Y, model.S)
L2 = np.mean(L2.numpy())
print('Loss_SV =', L2)
L3, _ = Loss_Vin(Y, model.Pin, X, 'UB')
L3 = np.mean(L3.numpy())
print('Loss_Vin =', L3)
L = (L2+L3)/2
print('Constraints =', L)

"""
size 10
hidden_dim = 50 no scaler
(10, 154) (10, 154)
Q2= -0.07538283773845325
Loss_SV 0.48731223
Loss_Vin 0.2028226
Loss_cst 0.34506741166114807
hidden_dim = 50 with scaler : no better
Q2= -0.04873539601415515
Loss_SV = 0.45491654
Loss_Vin = 0.20206484
Constraints = 0.328490674495697

size 100
hidden_dim = 50 no scaler
(100, 154) (100, 154)
Q2= 0.8483248084564434
Loss_SV 0.36500132
Loss_Vin 0.019197455
Loss_cst 0.19209939241409302
hidden_dim = 100 : no better
(100, 154) (100, 154)
Q2= 0.7824626824411614
Loss_SV 0.46771127
Loss_Vin 0.02226068
Loss_cst 0.24498596787452698

size 500 no scaler

size 1000 no scaler
(1000, 154) (1000, 154)
Q2= 0.9507007593125759
Loss_SV 0.19790417
Loss_Vin 0.013121797
Loss_cst 0.10551298409700394

size 10000 no scaler

"""

['PFK']
nbr parameters: 1101
train = 1.00 test = 1.00 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 1101
train = 1.00 test = 1.00 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 1101
train = 1.00 test = 1.00 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 1101
train = 1.00 test = 1.00 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 1101
train = 1.00 test = 1.00 loss-train = -1.000000 loss-test = -1.000000 iter=0
['PFK'] ---------------------------------------------------------------------- 0
Stats for ['PFK'] CPU-time 720.0392
R2 = 0.9976 (+/- 0.0007) Constraint = -1.0000 (+/- 0.0000)
Q2 = 0.9976 (+/- 0.0009) Constraint = -1.0000 (+/- 0.0000)
['PFL']
nbr parameters: 1101
train = 0.93 test = 0.93 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 1101
train = 0.93 test = 0.92 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 1101
train = 0.92 test = 0.92 loss-train = -1.0

'\nsize 10\nhidden_dim = 50 no scaler\n(10, 154) (10, 154)\nQ2= -0.07538283773845325\nLoss_SV 0.48731223\nLoss_Vin 0.2028226\nLoss_cst 0.34506741166114807\nhidden_dim = 50 with scaler : no better\nQ2= -0.04873539601415515\nLoss_SV = 0.45491654\nLoss_Vin = 0.20206484\nConstraints = 0.328490674495697\n\nsize 100\nhidden_dim = 50 no scaler\n(100, 154) (100, 154)\nQ2= 0.8483248084564434\nLoss_SV 0.36500132\nLoss_Vin 0.019197455\nLoss_cst 0.19209939241409302\nhidden_dim = 100 : no better\n(100, 154) (100, 154)\nQ2= 0.7824626824411614\nLoss_SV 0.46771127\nLoss_Vin 0.02226068\nLoss_cst 0.24498596787452698\n\nsize 500 no scaler\n\nsize 1000 no scaler\n(1000, 154) (1000, 154)\nQ2= 0.9507007593125759\nLoss_SV 0.19790417\nLoss_Vin 0.013121797\nLoss_cst 0.10551298409700394\n\nsize 10000 no scaler\n\n'