# Install conda on your Colab environment

Ignore this first cell if you are running the notebook in a local environment.

One can still run it locally but it will have no effect.

In [1]:
# Run this cell first - it will install a conda distribution (mamba)
# on your Drive then restart the kernel automatically 
# (don't worry about the crashing/restarting kernel messages)
# It HAS to be runned FIRST everytime you use the notebook in colab

import os
import sys
RunningInCOLAB  = 'google.colab' in str(get_ipython())

if RunningInCOLAB:
    !pip install -q condacolab
    import condacolab
    condacolab.install()

# Set up your Colab or local environment
# Then import libraries

Run this cell in both cases of use (local or Colab)

In [8]:
import os
import sys
RunningInCOLAB  = 'google.colab' in str(get_ipython())

if RunningInCOLAB:
    
    # Check everything is fine with conda in Colab
    import condacolab
    condacolab.check()
    
    # Mount your drive environment in the colab runtime
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)
    
    # Change this variable to your path on Google Drive to which the repo has been cloned
    # If you followed the colab notebook 'repo_cloning.ipynb', nothing to change here
    repo_path_in_drive = '/content/drive/My Drive/Github/amn_release/'
    # Change directory to your repo cloned in your drive
    DIRECTORY = repo_path_in_drive
    os.chdir(repo_path_in_drive)
    # Copy the environment given in the environment_amn_light.yml
    !mamba env update -n base -f environment_amn_light.yml
    
    # This is one of the few Colab-compatible font
    font = 'Liberation Sans'
    
else:
    
    # In this case the local root of the repo is our working directory
    DIRECTORY = './'
    font = 'arial'

# printing the working directory files. One can check you see the same folders and files as in the git webpage.
print(os.listdir(DIRECTORY))

from Library.Build_Model import *

# We declare this function here and not in the
# function-storing python file to modify it easily
# as it can change the printouts of the methods
def printout(filename, Stats, model, time): 
    # printing Stats
    if Stats == None:
        print('Stats for %s failed CPU-time %.4f' % (filename, time))
        return
    print('Stats for %s CPU-time %.4f' % (filename, time))
    print('R2 = %.4f (+/- %.4f) Constraint = %.4f (+/- %.4f)' % \
          (Stats.train_objective[0], Stats.train_objective[1],
           Stats.train_loss[0], Stats.train_loss[1]))
    print('Q2 = %.4f (+/- %.4f) Constraint = %.4f (+/- %.4f)' % \
          (Stats.test_objective[0], Stats.test_objective[1],
           Stats.test_loss[0], Stats.test_loss[1]))

def collate_stats(model, parameter, measurement, Y, verbose=False):

    if verbose: print(Y.shape, parameter.Y.shape)
    Y_true = parameter.Y[:, measurement]
    Y_pred = Y[:, measurement]
    RQ2 = r2_score(Y_true, Y_pred, multioutput='variance_weighted')
    if verbose: print('RQ2 =', RQ2)

    X = tf.convert_to_tensor(np.float32(model.X)) # Loss computed of tf tensors
    Y = tf.convert_to_tensor(np.float32(Y))
    
    err = Y_true - Y_pred
    L1 = np.linalg.norm(err.reshape(err.shape[0], 1), axis=1)
    L2, _ = Loss_SV(Y, model.S)
    L3, _ = Loss_Vin(Y, model.Pin, X, model.mediumbound, parameter)
    L4, _ = Loss_Vpos(Y, parameter)

    L1 = np.square(L1.reshape(1000, 1))
    L2 = np.square(L2.numpy())
    L3 = np.square(L3.numpy())
    L4 = np.square(L4.numpy())

    if verbose: 
        print('Loss_Vout =', np.mean(L1))
        print('Loss_SV =', np.mean(L2))
        print('Loss_Vin =', np.mean(L3))
        print('Loss_Vpos =', np.mean(L4))

    L = (L1+L2+L3+L4)/4
    L = np.mean(L, axis=0)[0]
    if verbose: print('Constraints =', L)
    return RQ2, L

['.git', '.gitignore', '.ipynb_checkpoints', 'Build_Dataset.ipynb', 'Build_Experimental.ipynb', 'Build_Model_AMN.ipynb', 'Build_Model_ANN_Dense.ipynb', 'Build_Model_MM.ipynb', 'Build_Model_RC.ipynb', 'Build_Model_RF.ipynb', 'Dataset_experimental', 'Dataset_input', 'Dataset_model', 'Duplicate_Model.ipynb', 'environment_amn.yml', 'environment_amn_light.yml', 'Figures', 'Figures.ipynb', 'Library', 'LICENSE', 'README.md', 'Reservoir', 'Result', 'Tutorial.ipynb']


In [10]:
# Create and train an ANN regression model (dense architecture) for E. coli 

# What you can change
seed = 2
np.random.seed(seed=seed)
# trainname = 'e_coli_core_EB'
trainname = 'iML1515_UB'
xfold = 5
Maxloop = 3
# End of what you can change

# Load training set
trainingfile = DIRECTORY+'Dataset_model/'+trainname
cobramodel = cobra.io.read_sbml_model(trainingfile+'.xml')
parameter = TrainingSet()
parameter.load(trainingfile)

for epochs in [100]: # range(0, 101, 5):
    RQ2, Loss, RQ2_ori, Loss_ori = [], [], [], []
    for Nloop in range(Maxloop):
        model = Neural_Model(trainingfile = trainingfile,
                         model_type = 'ANN_Dense',
                         #objective=['BIOMASS_Ecoli_core_w_GAM'],  
                         scaler=True,
                         n_hidden = 1, hidden_dim = 500,
                         epochs = epochs, xfold = xfold)

 

        SIZE = 1000

        ID = np.random.choice(model.X.shape[0], 

                            size=SIZE, replace=True)

        Xtrain,  Ytrain  = model.X[ID,:], model.Y[ID,:]

        model.X, model.Y = Xtrain, Ytrain

        parameter.X, parameter.Y = Xtrain, Ytrain

        # print(model.Y[0])
        # Train and evaluate
        start_time = time.time()
        """try:
            reservoir, pred, stats, _ = train_evaluate_model(model, verbose=False)
        except:
            reservoir, pred, stats, _ = None, np.zeros(model.Y.shape), None, None"""
        reservoir, pred, stats, _ = train_evaluate_model(model, verbose=False)
        # Printing cross-validation results
        delta_time = time.time() - start_time
        printout('All fluxes', stats, reservoir, delta_time)
 
        # Collate all predicted Y and get stats and constraints
        biomass_index = get_index_from_id('BIOMASS_Ec_iML1515_core_75p37M', cobramodel.reactions) # 'BIOMASS_Ecoli_core_w_GAM'
        rq2, l = collate_stats(model, parameter, biomass_index, pred, verbose=True)
        RQ2.append(rq2)
        Loss.append(l)

    # Print stats averaged over all iterations
    rqt = 'R2 (biomass)' if xfold < 2 else 'Q2 (biomass)'
    print('**** Epoch', epochs, rqt, '= %.4f (+/- %.4f) Loss = %.4f (+/- %.4f) epochs = %d' \
          % (np.mean(RQ2), np.std(RQ2), np.mean(Loss), np.std(Loss), epochs))

nbr parameters: 295050
---------- 550
---------- 550
train = 0.99 test = 0.99 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 295050
---------- 550
---------- 550
train = 0.99 test = 0.99 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 295050
---------- 550
---------- 550
train = 0.98 test = 0.98 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 295050
---------- 550
---------- 550
train = 0.99 test = 0.98 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 295050
---------- 550
---------- 550
train = 0.98 test = 0.98 loss-train = -1.000000 loss-test = -1.000000 iter=0
---------- 550
Stats for All fluxes CPU-time 326.2052
R2 = 0.9850 (+/- 0.0031) Constraint = -1.0000 (+/- 0.0000)
Q2 = 0.9844 (+/- 0.0023) Constraint = -1.0000 (+/- 0.0000)
(1000, 550) (1000, 550)
RQ2 = 0.23296355664280544
New Loss_Vout = 0.030904789038871533
New Loss_SV = 0.0011422953
New Loss_Vin = 5.8182195e-05
New Loss_Vpos = 0.0
Constraints = 0

# Create and Train ANN




In [2]:
# What you can change
seed = 2
np.random.seed(seed=seed)
trainname = 'e_coli_core_UB_50'
xfold = 5
Maxloop = 3
# End of what you can change

# Load training set
trainingfile = DIRECTORY+'Dataset_model/'+trainname
cobramodel = cobra.io.read_sbml_model(trainingfile+'.xml')
parameter = TrainingSet()
parameter.load(trainingfile)
RQ2, Loss = [], []

for Nloop in range(Maxloop):
    model = Neural_Model(trainingfile = trainingfile,
                         model_type = 'ANN_Dense',
                         scaler=True, batch_size=5,
                         train_rate=1e-3, activation="relu",
                         n_hidden = 1, hidden_dim = 50,
                         epochs = 500, xfold = xfold)
    # Train and evaluate
    start_time = time.time()
    try:
        reservoir, pred, stats, _ = train_evaluate_model(model, verbose=False)
    except:
        reservoir, pred, stats, _ = None, np.zeros(model.Y.shape), None, None
    # Printing cross-validation results
    delta_time = time.time() - start_time
    printout('All fluxes', stats, reservoir, delta_time)
 
    # Collate all predicted Y and get stats and constraints
    biomass_index = get_index_from_id('BIOMASS_Ecoli_core_w_GAM',cobramodel.reactions)
    rq2, l = collate_stats(model, parameter, biomass_index, pred, verbose=True)
    RQ2.append(rq2)
    Loss.append(l)

# Print stats averaged over all iterations
rqt = 'R2 (biomass)' if xfold < 2 else 'Q2 (biomass)'
print(rqt, '= %.4f (+/- %.4f) Loss = %.4f (+/- %.4f)' \
      % (np.mean(RQ2), np.std(RQ2), np.mean(Loss), np.std(Loss)))

nbr parameters: 8904
train = 0.71 test = -0.10 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 8904
train = 0.67 test = 0.54 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 8904
train = 0.64 test = 0.60 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 8904
train = 0.72 test = 0.46 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 8904
train = 0.68 test = 0.48 loss-train = -1.000000 loss-test = -1.000000 iter=0
Stats for All fluxes CPU-time 416.7927
R2 = 0.6856 (+/- 0.0293) Constraint = -1.0000 (+/- 0.0000)
Q2 = 0.3978 (+/- 0.2509) Constraint = -1.0000 (+/- 0.0000)
(50, 154) (50, 154)
RQ2 = 0.8113039196567445
Loss_Vout = 0.002969156360921121
Loss_SV = 0.16527662
Loss_Vin = 0.2739699
Constraints = 0.14740522061993575
nbr parameters: 8904
train = 0.73 test = -0.01 loss-train = -1.000000 loss-test = -1.000000 iter=0
nbr parameters: 8904
train = 0.60 test = 0.39 loss-train = -1.000000 loss-test = -1.000000 iter=0
n

In [None]:
# Create and train an ANN classification model (dense architecture) for P. putida

# What you can change 
seed = 10
np.random.seed(seed=seed)  
trainname = 'IJN1463_EXP' # can change EB by UB
# End of What you can change

# Create model 
trainingfile = DIRECTORY+'Dataset_input/'+trainname
X, Y = read_XY(trainingfile, 196)
model = Neural_Model(model_type = 'ANN_Dense',
                     n_hidden = 1, hidden_dim = 100, batch_size=5,
                     regression = False, activation = 'sigmoid', 
                     input_dim = 196, output_dim = 1,
                     epochs = 500, xfold = 10,
                     verbose=True) 

model.X, model.Y = X, Y
model.printout()

# Train
start_time = time.time()
reservoir, pred, stats, _ = train_evaluate_model(model, verbose=False)
delta_time = time.time() - start_time

# Printing cross-validation results
printout('dump', stats, model, delta_time)