# Install conda on your Colab environment

Ignore this first cell if you are running the notebook in a local environment.

One can still run it locally but it will have no effect.

In [1]:
# Run this cell first - it will install a conda distribution (mamba)
# on your Drive then restart the kernel automatically 
# (don't worry about the crashing/restarting kernel messages)
# It HAS to be runned FIRST everytime you use the notebook in colab

import os
import sys
RunningInCOLAB  = 'google.colab' in str(get_ipython())

if RunningInCOLAB:
    !pip install -q condacolab
    import condacolab
    condacolab.install()

# Set up your Colab or local environment
# Then import libraries

Run this cell in both cases of use (local or Colab)

In [1]:
import os
import sys
RunningInCOLAB  = 'google.colab' in str(get_ipython())

if RunningInCOLAB:
    
    # Check everything is fine with conda in Colab
    import condacolab
    condacolab.check()
    
    # Mount your drive environment in the colab runtime
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)
    
    # Change this variable to your path on Google Drive to which the repo has been cloned
    # If you followed the colab notebook 'repo_cloning.ipynb', nothing to change here
    repo_path_in_drive = '/content/drive/My Drive/Github/amn_release/'
    # Change directory to your repo cloned in your drive
    DIRECTORY = repo_path_in_drive
    os.chdir(repo_path_in_drive)
    # Copy the environment given in the environment_amn_light.yml
    !mamba env update -n base -f environment_amn_light.yml
    
    # This is one of the few Colab-compatible font
    font = 'Liberation Sans'
    
else:
    
    # In this case the local root of the repo is our working directory
    DIRECTORY = './'
    font = 'arial'

# printing the working directory files. One can check you see the same folders and files as in the git webpage.
print(os.listdir(DIRECTORY))

from Library.Build_Model import *

# We declare this function here and not in the
# function-storing python file to modify it easily
# as it can change the printouts of the methods
def printout(filename, Stats, model, time): 
    # printing Stats
    if Stats == None:
        print('Stats for %s failed CPU-time %.4f' % (filename, time))
        return
    print('Stats for %s CPU-time %.4f' % (filename, time))
    print('R2 = %.4f (+/- %.4f) Constraint = %.4f (+/- %.4f)' % \
          (Stats.train_objective[0], Stats.train_objective[1],
           Stats.train_loss[0], Stats.train_loss[1]))
    print('Q2 = %.4f (+/- %.4f) Constraint = %.4f (+/- %.4f)' % \
          (Stats.test_objective[0], Stats.test_objective[1],
           Stats.test_loss[0], Stats.test_loss[1]))
    
# Get R2/Q2 and constraints
def collate_stats(model, parameter, measurement, Y, verbose=False):  
    if verbose: print(Y.shape, parameter.Y.shape)
    Y_true = parameter.Y[:, measurement]
    Y_pred = Y[:, measurement]
    RQ2 = r2_score(Y_true, Y_pred, multioutput='variance_weighted')
    if verbose: print('RQ2 =', RQ2)
    X = tf.convert_to_tensor(np.float32(model.X)) # Loss computed of tf tensors
    Y = tf.convert_to_tensor(np.float32(Y))
    L1 = (np.square(Y_true - Y_pred)).mean(axis=0)
    if verbose: print('Loss_Vout =', L1)
    L2, _ = Loss_SV(Y, model.S)
    L2 = np.mean(L2.numpy())
    if verbose: print('Loss_SV =', L2)
    L3, _ = Loss_Vin(Y, model.Pin, X, model.mediumbound)
    L3 = np.mean(L3.numpy())
    if verbose: print('Loss_Vin =', L3)
    L = (L1+L2+L3)/3
    if verbose: print('Constraints =', L)
    return RQ2, L

['README.md', 'Duplicate_Model.ipynb', 'Dataset_experimental', 'Tutorial.ipynb', '.ipynb_checkpoints', '.git', 'Build_Model_RC.ipynb', 'biolog_simulations.npy', 'environment_amn_light.yml', 'Build_Experimental.ipynb', 'Reservoir', 'Build_Model_MM.ipynb', 'Dataset_model', 'Figures.ipynb', 'Result', 'Figures', '.gitignore', 'LICENSE', 'Build_Model_ANN_Dense.ipynb', 'Build_Dataset.ipynb', 'Build_Model_RF.ipynb', 'Library', 'Dataset_input', 'Functions', 'environment_amn.yml', 'Build_Model_AMN.ipynb', '.DS_Store']


# Create and Train ANN




In [2]:
# What you can change
seed = 2
np.random.seed(seed=seed)
# trainname = 'e_coli_core_UB_1540'
# trainname = 'e_coli_core_UB_500'
# trainname = 'e_coli_core_UB_10'
# trainname = 'e_coli_core_UB_100'
trainname = 'e_coli_core_UB_50'
xfold = 5
Maxloop = 3
# End of what you can change

# Load training set
trainingfile = DIRECTORY+'Dataset_model/'+trainname
cobramodel = cobra.io.read_sbml_model(trainingfile+'.xml')
parameter = TrainingSet()
parameter.load(trainingfile)
RQ2, Loss = [], []

for Nloop in range(Maxloop):
    """model = Neural_Model(trainingfile = trainingfile,
                         model_type = 'ANN_Dense',
                         scaler=False, batch_size=50,
                         train_rate=1e-2, activation="linear",
                         n_hidden = 1, hidden_dim = 500,
                         epochs = 200, xfold = xfold)"""
    model = Neural_Model(trainingfile = trainingfile,
                         model_type = 'ANN_Dense',
                         scaler=True, batch_size=5,
                         train_rate=1e-3, activation="relu",
                         n_hidden = 1, hidden_dim = 50,
                         epochs = 500, xfold = xfold)
    # Train and evaluate
    start_time = time.time()
    try:
        reservoir, pred, stats, _ = train_evaluate_model(model, verbose=False)
    except:
        reservoir, pred, stats, _ = None, np.zeros(model.Y.shape), None, None
    # Printing cross-validation results
    delta_time = time.time() - start_time
    printout('All fluxes', stats, reservoir, delta_time)
 
    # Collate all predicted Y and get stats and constraints
    biomass_index = get_index_from_id('BIOMASS_Ecoli_core_w_GAM',cobramodel.reactions)
    rq2, l = collate_stats(model, parameter, biomass_index, pred, verbose=True)
    RQ2.append(rq2)
    Loss.append(l)

# Print stats averaged over all iterations
rqt = 'R2 (biomass)' if xfold < 2 else 'Q2 (biomass)'
print(rqt, '= %.4f (+/- %.4f) Loss = %.4f (+/- %.4f)' \
      % (np.mean(RQ2), np.std(RQ2), np.mean(Loss), np.std(Loss)))

nbr parameters: 8904
train = 0.71 test = -0.10 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 8904
train = 0.67 test = 0.54 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 8904
train = 0.64 test = 0.60 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 8904
train = 0.72 test = 0.46 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 8904
train = 0.68 test = 0.48 loss-train = -1.000000 loss-test = -1.000000 iter=0
Stats for All fluxes CPU-time 416.7927
R2 = 0.6856 (+/- 0.0293) Constraint = -1.0000 (+/- 0.0000)
Q2 = 0.3978 (+/- 0.2509) Constraint = -1.0000 (+/- 0.0000)
(50, 154) (50, 154)
RQ2 = 0.8113039196567445
Loss_Vout = 0.002969156360921121
Loss_SV = 0.16527662
Loss_Vin = 0.2739699
Constraints = 0.14740522061993575
nbr parameters: 8904
train = 0.73 test = -0.01 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 8904
train = 0.60 test = 0.39 loss-train = -1.000000 loss-test = -1.000000 iter=0
n

In [3]:
# Some exploration of the results

# R2 on all fluxes, variance weighted or not, or all individual values
print(r2_score(model.Y, pred, multioutput='variance_weighted'), r2_score(model.Y, pred, multioutput='uniform_average'))
# print(r2_score(model.Y, pred, multioutput='raw_values'))

# Some fluxes make the R2 scoring bad because they have always the same value in training set
# Examples: 15 is for ATPM, always at 8.39 (lower bound value); 72 is an uptake which is always at 10
print(r2_score(model.Y[:,15], pred[:,15]))

# Random Forests can predict a constant value, ANN has troubles for that
print(model.Y[:,15], pred[:,15])

# If looking at the MSE, it makes more sense (R2 is probably not the best metric when having constant fluxes in true)
from sklearn.metrics import mean_squared_error
print(mean_squared_error(model.Y[:,15], pred[:,15]))

0.48614344477986793 -1.2180439571858431e+26
0.0
[8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39
 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39
 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39
 8.39 8.39 8.39 8.39 8.39 8.39 8.39 8.39] [10.39412975  8.57355118  9.88906574 10.21946144  7.51073647  7.57185173
  9.16762066  8.76229858  7.47470617  6.65266848  9.75116158  9.57881832
  9.89524555  7.70101881  8.34381294  8.96830845 10.93606377  6.22824049
  7.8714118   7.28311253  6.42173862  8.47281075  7.37325764  7.04062748
  7.79740715  8.56834507  9.06470966  7.90090179  7.27300882  7.94634581
  8.16026115  7.83511114  7.31363678  6.20226717  9.84066486  7.28922367
  7.69981194  8.13519859  9.15987682 10.77380276  8.08413792  6.53094149
  7.67902803  7.42254543  7.61065483  6.87567663  7.30626917  7.1021719
  6.94989586  7.48793745]
1.4797355783434958


In [None]:
"""
e_coli_core_UB_10
RQ2 = 0.5093122289792134
Loss_Vout = 0.009020895545336305
Loss_SV = 0.16743167
Loss_Vin = 0.3259917
Constraints = 0.16748141771709016
R2 (biomass) = 0.8226 (+/- 0.0483) Loss = 0.1144 (+/- 0.0082)
Q2 (biomass) = 0.3874 (+/- 0.2477) Loss = 0.2167 (+/- 0.0745)

e_coli_core_EB_10
RQ2 = 0.8126782171521297
Loss_Vout = 0.003443758610330688
Loss_SV = 0.05173849
Loss_Vin = 0.49004155
Constraints = 0.18174126733456805
R2 (biomass) = 0.8301 (+/- 0.0130) Loss = 0.1838 (+/- 0.0022)
Q2 (biomass) = 0.6621 (+/- 0.1461) Loss = 0.2656 (+/- 0.0210)

e_coli_core_UB_50
Q2 (biomass) = 0.8664 (+/- 0.0081) Loss = 0.1489 (+/- 0.0206)

e_coli_core_UB_100
Q2 (biomass) = 0.8512 (+/- 0.0546) Loss = 0.1108 (+/- 0.0046)

e_coli_core_UB_500
Q2 (biomass) = 0.9323 (+/- 0.0113) Loss = 0.0559 (+/- 0.0068)

e_coli_core_UB_1000
Q2 (biomass) = 0.9496 (+/- 0.0069) Loss = 0.0494 (+/- 0.0038)

e_coli_core_UB_5000
RQ2 = 0.9654334994082111
Loss_Vout = 0.0005803402165036408
Loss_SV = 0.087595426
Loss_Vin = 0.05468053
Constraints = 0.04761876524817484
Q2 (biomass) = 0.9629 (+/- 0.0041) Loss = 0.0479 (+/- 0.0026)
"""