# Regression / Classification with sklearn

In [1]:
# Regression with various learner

from Library.Import import *
from Library.Utilities import Linear, MLP, XGB, LeaveXout, read_XY
from sklearn.metrics import r2_score, accuracy_score, f1_score, matthews_corrcoef

DIRECTORY = './'
problempath = f'{DIRECTORY}Dataset_input/Dataset_open_ML/Regression/'
problems = os.listdir(problempath)
xfold = 5
niter = 5

problems = [
    'raw_geographical_origin_of_music', # 0
    'raw_pumadyn32nh',
    'raw_white_wine',
    'raw_kin8nm',
    'raw_cars', # 4
    'raw_airfoil_self_noise', #5
    'raw_QSAR_fish_toxicity', # 6
    'raw_space_ga',
    'raw_concrete_compressive_strength', # 8
    'raw_grid_stability',
    'raw_miami_housing',
    'raw_cpu_activity', # 11
    'raw_naval_propulsion_plant',
    'raw_energy_efficiency' # 13
]
scoring_function = r2_score
for learner in [Linear, MLP, XGB]:
    for j in [11]: # list(range(14)):
        problem = problems[j].split('.')[0]
        filename = f'{problempath}{problem}'
        feature, X, y = read_XY(filename, scaling='XY')
        r2_avr, r2_dev, _ = LeaveXout(X, y.ravel(), feature, 
                                      learner=learner, 
                                      scoring_function=scoring_function, 
                                      xfold=xfold, niter=niter,verbose=False)
        print(f'{problem} Size: {X.shape} Method: {learner.__name__} Score: {scoring_function.__name__} '
              f'R2: {r2_avr:.3f}±{r2_dev:.3f} '
              f'for {xfold}-fold-CV and {niter} iter')


raw_cpu_activity Size: (8192, 21) Method: Linear Score: r2_score R2: 0.721±0.003 for 5-fold-CV and 5 iter
raw_cpu_activity Size: (8192, 21) Method: MLP Score: r2_score R2: 0.980±0.000 for 5-fold-CV and 5 iter
raw_cpu_activity Size: (8192, 21) Method: XGB Score: r2_score R2: 0.984±0.001 for 5-fold-CV and 5 iter


In [2]:
# Classification with various learner

from Library.Import import *
from Library.Utilities import bayes_classifier, svm_classifier, XGB, GP, MLP
from Library.Utilities import LeaveXout, read_XY
from sklearn.metrics import r2_score, accuracy_score, f1_score, matthews_corrcoef

DIRECTORY = './'
problempath = f'{DIRECTORY}Dataset_input/Dataset_classification/'
problems = os.listdir(problempath)
xfold = 5
niter = 5

problems = [
    'and', #0
    'or',  
    'linear', #2
    'triangle', #3
    'xor',    
    'checkerboard', #5
    'circle',
    'sine',  #7
    'concentric', #8
    'dots' #9
]

scoring_function = f1_score
for learner in [svm_classifier, bayes_classifier, MLP, XGB]:
    for j in range(9):
        problem = problems[j].split('.')[0]
        filename = f'{problempath}{problem}'
        feature, X, y = read_XY(filename, scaling='XY')
        acc_avr, acc_dev, _ = LeaveXout(X, y.ravel(), feature, 
                                      learner=learner, scoring_function=scoring_function, 
                                      xfold=xfold, niter=niter, verbose=False)
        print(f'{problem} Size: {X.shape} Method: {learner.__name__} Score: {scoring_function.__name__} '
              f'Acc: {acc_avr:.3f}±{acc_dev:.3f} '
              f'for {xfold}-fold-CV and {niter} iter')

and Size: (500, 2) Method: svm_classifier Score: f1_score Acc: 0.791±0.006 for 5-fold-CV and 5 iter
or Size: (500, 2) Method: svm_classifier Score: f1_score Acc: 0.947±0.002 for 5-fold-CV and 5 iter
linear Size: (500, 2) Method: svm_classifier Score: f1_score Acc: 0.975±0.003 for 5-fold-CV and 5 iter
triangle Size: (500, 2) Method: svm_classifier Score: f1_score Acc: 0.870±0.005 for 5-fold-CV and 5 iter
xor Size: (500, 2) Method: svm_classifier Score: f1_score Acc: 0.546±0.039 for 5-fold-CV and 5 iter
checkerboard Size: (500, 2) Method: svm_classifier Score: f1_score Acc: 0.707±0.019 for 5-fold-CV and 5 iter
circle Size: (500, 2) Method: svm_classifier Score: f1_score Acc: 0.000±0.000 for 5-fold-CV and 5 iter
sine Size: (500, 2) Method: svm_classifier Score: f1_score Acc: 0.794±0.006 for 5-fold-CV and 5 iter
concentric Size: (500, 2) Method: svm_classifier Score: f1_score Acc: 0.808±0.000 for 5-fold-CV and 5 iter
and Size: (500, 2) Method: bayes_classifier Score: f1_score Acc: 0.887±0.

# Reservoir Computing

## AMN-QP to create a reservoir for various species

In [None]:
# Create, train and evaluate AMN_QP models
# on GR FBA simulated training set
# Repeat the process with different seeds
# This cell takes several hours to execute
# Save the best model in a reservoir

from Library.Import import *
from Library.Build_Model import Neural_Model, model_input
from Library.Build_Model import evaluate_model, train_evaluate_model
from sklearn.metrics import r2_score

DIRECTORY = './'
seed = 10
np.random.seed(seed=seed)
species = [ 
    'e_coli_core', #0
    'iEK1008', 
    'iIT341', #2
    'iJN1463', 
    'iML1515', #4
    'iMM904',
    'iPC815', #6
    'iYO844',
    'iYS1720', 
    'iYS854', # 9
    'iML1515EXP' #10
]

model_type = 'AMN_QP' # 'AMN_QP' 'ANN_Dense'

for i in [4]: # list(range(11))    
    trainname = f'{species[i]}_train'
    trainingfile = f'{DIRECTORY}Dataset_input/{trainname}'
    reservoirname = f'{trainname}_{model_type}'
    reservoirfile = f'{DIRECTORY}Reservoir/{reservoirname}'
    if 'EXP' in trainname:
        n_hidden, hidden_dim, epochs, niter = 1, 500, 1000, 0 # AMN 1, 500, 1000 / 500, 0 ANN Dense 0, 0, 100, 0
    else:
        n_hidden, hidden_dim, epochs, niter = 1, 100, 100, 1
    Maxloop, Q2, PRED = 3, [], [] # 5, [], []
    
    # Training
    for Nloop in range(Maxloop):
        model = Neural_Model(trainingfile=trainingfile,
                             model_type=model_type,
                             scaler=True,
                             n_hidden=n_hidden, hidden_dim=hidden_dim,
                             output_dim=1,
                             epochs=epochs, 
                             xfold=5, niter=niter,
                             verbose=False)
        model.batch_size = 100 if model.X.shape[0] > 1000 else 10
        print(f'{species[i]} Y {model.Y.shape} minY {np.min(model.Y):.2f} '
              f'maxY {np.max(model.Y):.2f} Objective {model.objective} '
              f'Unique {len(set(list(model.Y[:,0])))}')
        
        # Train and evaluate
        start_time = time.time()
        reservoir, pred, stats, _ = train_evaluate_model(model, verbose=True)
        delta_time = time.time() - start_time

        # Printing cross-validation results
        stats.printout(reservoirname, delta_time)
        r2 = r2_score(model.Y[:,0], pred[:,0], multioutput='variance_weighted')
        print(f'Iter {Nloop} Collated Q2 growth {r2:.4f}')
        r2 = r2_score(model.Y, pred[:,:model.Y.shape[1]], multioutput='variance_weighted')
        print(f'Iter {Nloop} Collated Q2 all {r2:.4f}')
        Q2.append(r2)
        PRED.append(pred[:, 0])
        if r2 == max(Q2):  # save the best model
            reservoir.save(reservoirfile)

    # Some printing
    Q2, PRED = np.asarray(Q2), np.asarray(PRED)
    print(f'{trainname} Averaged Q2 = {np.mean(Q2):4f} ± {np.std(Q2):.4f} Best Q2 = {np.max(Q2):.4f}')
    reservoir.load(reservoirfile, output_dim=1)
    reservoir.printout()
    X, Y = model_input(reservoir, verbose=False)
    print(X.shape, Y.shape)
    pred, _ = evaluate_model(reservoir.model, X, Y, reservoir, verbose=False)
    y = pred[:,:model.Y.shape[1]]     
    r2 = r2_score(model.Y[:,0], y[:,0], multioutput='variance_weighted')
    print(f'Final R2 growth {r2:.4f}')


## RC for Regression / Classification

### Regression

In [3]:
# Create, train and evaluate RC models
# on ML regression problems

from Library.Import import *
from Library.Utilities import read_XY
from Library.Build_Reservoir import RC_run, RC_write_multiple

DIRECTORY = './'
problems = [
    'raw_geographical_origin_of_music', # 0
    'raw_pumadyn32nh',
    'raw_white_wine', # 2 *
    'raw_kin8nm', # 3 *
    'raw_cars', # 4 *
    'raw_airfoil_self_noise', # 5 *
    'raw_QSAR_fish_toxicity', # 6 *
    'raw_space_ga', #7 *
    'raw_concrete_compressive_strength', # 8 *
    'raw_grid_stability', # 9 *
    'raw_miami_housing', # 10
    'raw_cpu_activity', # 11 *
    'raw_naval_propulsion_plant', # 12
    'raw_energy_efficiency' # 13 *
]
species = [ 
    'e_coli_core', #0
    'iEK1008', 
    'iIT341', #2
    'iJN1463', 
    'iML1515', #4
    'iMM904',
    'iPC815', #6
    'iYO844',
    'iYS1720', 
    'iYS854', #9
    'iML1515EXP', #10
]

seed = 1
np.random.seed(seed=seed)
xfold = 5
repeat = 3 # 3
precision = 0
train_rate = 1.0e-4
n_hidden_prior = -1 # -1 binary input,  >0 ANN 
hidden_dim_prior = 0
activation_prior='' # '' or 'sharp_sigmoid' or 'relu'
n_hidden_post = -1 #  -1 a scaler is applied, >0 a ANN is used
hidden_dim_post = 0
temperature = False
multiple = -1 # -1 no stats > 0 nbr of reservoirs to get stats 
weight_pred_true_media = 0 # Loss to collect already generated media
failure = 10

run = 'selective_obj' # (fixed, generative, selective) x (obj, phe)
    
if run == 'generative_phe': 
    mode = 'AMN_phenotype' 
    epochs = 500
    n_hidden_prior = 1 
    hidden_dim_prior = 28 
    activation_prior='sharp_sigmoid'
    n_hidden_post = 1 
    hidden_dim_post = 500

if run == 'fixed_phe':
    mode = 'AMN_phenotype' 
    epochs = 500
    n_hidden_post = 1 #  0
    hidden_dim_post = 500

if run == 'generative_obj': 
    mode = 'AMN_objective'
    epochs = 500
    n_hidden_prior = 1
    hidden_dim_prior = 28 
    activation_prior='sharp_sigmoid' 
    weight_pred_true_media = 0.5

if run == 'selective_obj': # For iML1515EXP only
    mode = 'AMN_objective'
    epochs = 1000
    n_hidden_prior = 3 
    hidden_dim_prior = 280
    activation_prior='gumbel_softmax' 
    weight_pred_true_media = 0
    n_hidden_post = -1 # 0 or -1
    temperature = True
    
for i in [10]: # range(11)
    s = species[i]
    reservoirname = f'{s}_train_AMN_QP' # for iML1515EXP : _train_AMN_QP_10
    reservoirfile = f'{DIRECTORY}Reservoir/{reservoirname}' 
    for j in [13]: # range(14)
        problem = problems[j].split('.')[0]
        trainingfile = f'{DIRECTORY}Dataset_input/Dataset_open_ML/Regression/{problem}'
        if 'fixed' in run:
            trainingfile = f'{trainingfile}_{s}_binary'
        resultfile = f'{DIRECTORY}Result/{problem}_{s}_{run}'
        H, X, Y = read_XY(trainingfile, nY=1, scaling='XY')
        start_time = time.time()
        model, pred, R2_avr, R2_dev, \
        Q2_avr, Q2_dev, Q2_best, Med = \
        RC_run(reservoirfile, X, Y,
               mode=mode, 
               n_hidden_prior=n_hidden_prior, 
               n_hidden_post=n_hidden_post,
               hidden_dim_prior=hidden_dim_prior,
               hidden_dim_post=hidden_dim_post, 
               activation_prior=activation_prior,
               train_rate=train_rate, 
               precision=precision,
               temperature=temperature,
               failure=failure,
               weight_pred_true_media=weight_pred_true_media,
               repeat=repeat, xfold=xfold, epochs=epochs, verbose=True)
        delta_time = time.time() - start_time
        print(f'{problem} {s} {run} hidden-size: {hidden_dim_prior} \
R2: {R2_avr:.4f} ± {R2_dev:.4f} Q2: {Q2_avr:.4f} ± {Q2_dev:.4f} Q2-best: {Q2_best:.4f} cpu time {delta_time:.2f} Media found {Med:.2f}')
        RC_write_multiple(reservoirfile, resultfile, model, 
                          Y, pred, 
                          multiple=multiple, 
                          precision_X=True, verbose=False)


number of reactions: 3682=1
number of metabolites: 1877
filtered measurements size: 1
RC reservoir file: ./Reservoir/iML1515EXP_train_AMN_QP
RC model type: RC
RC number constraint: 1
RC precsion: 0
RC model input dim: 8
RC model output dim: 1
training set size (768, 8) (768, 1)
reservoir S, Pin, Pout matrices (1877, 3682) (28, 3682) (1, 3682)
RC training epochs: 1000
RC training scoring: r2_score
RC training learn rate: 0.0001
RC training dropout: 0.25
RC training batch size: 10
RC training validation iter: 0
RC training xfold: 5
--------prior network --------
training file: None
model type: ANN_Dense
model number constraints: 0
model scaler: 0.0
model input dim: 8
model output dim: 280
no training set provided
nbr hidden layer: 3
hidden layer size: 280
activation function: gumbel_softmax
--------reservoir network-----
training file: ./Dataset_input/iML1515EXP_train
model type: AMN_QP
model number constraints: 3
model scaler: 1.0
model input dim: 28
model output dim: 1
training set siz

### Classification

In [1]:
# Create, train and evaluate RC models
# on ML classification problems

from Library.Import import *
from Library.Utilities import read_XY
from Library.Build_Reservoir import RC_run, RC_write_multiple
from sklearn.metrics import r2_score, accuracy_score, f1_score, matthews_corrcoef

DIRECTORY = './'
problems = [
    'and', #0
    'or',  
    'linear', #2
    'triangle', #3
    'xor',    
    'checkerboard', #5
    'circle',
    'sine',  #7
    'concentric', #8
    'dots' #9
]

species = [ 
    'e_coli_core', #0
    'iEK1008', 
    'iIT341', #2
    'iJN1463', 
    'iML1515', #4
    'iMM904',
    'iPC815', #6
    'iYO844',
    'iYS1720', #8
    'iYS854', 
    'iML1515EXP' #10
]

seed = 1
np.random.seed(seed=seed)
xfold = 5 # 10 # 25 gives 5 in test set
repeat = 3 # 3
precision = 0
train_rate = 1.0e-4
temperature = False
multiple = 1 # -1 no stats > 0 nbr of reservoirs to get stats 
weight_pred_true_media = 0 # Loss to collect already generated media
failure = 10

print("TensorFlow version:", tf.__version__)

run = 'selective_obj' # (fixed, generative, selective) x (obj, phe)
scoring_function = accuracy_score

if run == 'generative_phe': 
    mode = 'AMN_phenotype' 
    epochs = 500
    n_hidden_prior = 1 
    hidden_dim_prior = 28 
    activation_prior='sharp_sigmoid'
    n_hidden_post = 1 
    hidden_dim_post = 500

if run == 'fixed_phe':
    mode = 'AMN_phenotype' 
    epochs = 500
    n_hidden_prior = -1
    hidden_dim_prior = -1 
    activation_prior=''
    n_hidden_post = 1 
    hidden_dim_post = 500

if run == 'generative_obj': 
    mode = 'AMN_objective'
    epochs = 500
    n_hidden_prior = 1
    hidden_dim_prior = 28 
    activation_prior='sharp_sigmoid' 
    n_hidden_post = -1 
    hidden_dim_post = 0
    weight_pred_true_media = 0.5

if run == 'selective_obj': # For iML1515EXP only
    mode = 'AMN_objective'
    epochs = 1000
    n_hidden_prior = 3 
    hidden_dim_prior = 280 # 280
    activation_prior='gumbel_softmax' 
    n_hidden_post = -1 
    hidden_dim_post = 0
    weight_pred_true_media = 1
    temperature = True

for i in [10]: # range(11)
        s = species[i]
        reservoirname = f'{s}_train_AMN_QP'
        reservoirfile = f'{DIRECTORY}Reservoir/{reservoirname}' 
        for j in [9]: # range(10):
            problem = problems[j].split('.')[0]
            trainingfile = f'{DIRECTORY}Dataset_input/Dataset_classification/{problem}'
            if 'fixed' in run: 
                trainingfile = f'{trainingfile}_{s}_binary'
            resultfile = f'{DIRECTORY}Result/{problem}_{s}_{run}'
            H, X, Y = read_XY(trainingfile, nY=1, scaling='XY')
            start_time = time.time()
            model, pred, Acc_train_avr, Acc_train_dev, \
            Acc_test_avr, Acc_test_dev, Acc_best, Med = \
            RC_run(reservoirfile, X, Y,
                   mode=mode, 
                   scoring_function=scoring_function,
                   n_hidden_prior=n_hidden_prior, 
                   n_hidden_post=n_hidden_post,
                   hidden_dim_prior=hidden_dim_prior,
                   hidden_dim_post=hidden_dim_post, 
                   activation_prior=activation_prior,
                   train_rate=train_rate, 
                   precision=precision,
                   temperature=temperature,
                   failure=failure,
                   weight_pred_true_media=weight_pred_true_media,
                   repeat=repeat, xfold=xfold, epochs=epochs, verbose=True)
            delta_time = time.time() - start_time
            print(f'{problem} {s} {run} hidden-size: {hidden_dim_prior} \
Acc-train: {Acc_train_avr:.4f} ± {Acc_train_dev:.4f} Acc-test: {Acc_test_avr:.4f} ± {Acc_test_dev:.4f} \
Acc-best: {Acc_best:.4f} cpu time {delta_time:.2f} Media found {Med:.2f}')
            RC_write_multiple(reservoirfile, resultfile, model, 
                              Y, pred, 
                              multiple=multiple, 
                              precision_X=True, verbose=False)

TensorFlow version: 2.13.0
number of reactions: 3682=1
number of metabolites: 1877
filtered measurements size: 1
RC reservoir file: ./Reservoir/iML1515EXP_train_AMN_QP
RC model type: RC
RC number constraint: 1
RC precsion: 0
RC model input dim: 2
RC model output dim: 1
training set size (500, 2) (500, 1)
reservoir S, Pin, Pout matrices (1877, 3682) (28, 3682) (1, 3682)
RC training epochs: 1000
RC training scoring: accuracy_score
RC training learn rate: 0.0001
RC training dropout: 0.25
RC training batch size: 10
RC training validation iter: 0
RC training xfold: 5
--------prior network --------
training file: None
model type: ANN_Dense
model number constraints: 0
model scaler: 0.0
model input dim: 2
model output dim: 280
no training set provided
nbr hidden layer: 3
hidden layer size: 280
activation function: gumbel_softmax
--------reservoir network-----
training file: ./Dataset_input/iML1515EXP_train
model type: AMN_QP
model number constraints: 3
model scaler: 1.0
model input dim: 28
mod