# Regression with sklearn

In [1]:
# test with various learner
from Library.Import import *
from Library.Utilities import Linear, MLP, XGB, LeaveXout, read_XY
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score

DIRECTORY = './'
problempath = f'{DIRECTORY}Dataset_input/Dataset_open_ML/Regression/'
problems = os.listdir(problempath)
xfold = 5
niter = 5
regression = True

problems = [
    'raw_geographical_origin_of_music', # 0
    'raw_pumadyn32nh',
    'raw_white_wine',
    'raw_kin8nm',
    'raw_cars', # 4
    'raw_airfoil_self_noise', #5
    'raw_QSAR_fish_toxicity', # 6
    'raw_space_ga',
    'raw_concrete_compressive_strength', # 8
    'raw_grid_stability',
    'raw_miami_housing',
    'raw_cpu_activity', # 11
    'raw_naval_propulsion_plant',
    'raw_energy_efficiency' # 13
]

for j in [11]: # list(range(14)):
    problem = problems[j].split('.')[0]
    filename = f'{problempath}{problem}'
    feature, X, y = read_XY(filename, scaling='XY')

    # Train X, y for regression
    for learner in [Linear, MLP, XGB]:
        r2_avr, r2_dev, _ = LeaveXout(X, y.ravel(), feature, 
                                      learner=learner, regression=regression, 
                                      xfold=xfold, niter=niter, 
                                      selection=False, verbose=True)
        print(f'{problem} Size: {X.shape} Method: {learner.__name__} '
              f'R2: {r2_avr:.3f}±{r2_dev:.3f} '
              f'for {xfold}-fold-CV and {niter} iter')

raw_cpu_activity Size: (8192, 21) Method: Linear R2: 0.721±0.003 for 5-fold-CV and 5 iter




In [None]:
# Some printing
print(y_preds.shape)
print(r2)
yt, yp = y, y_preds[2]
print(r2_score(yt, yp))
for i in range(len(yt)):
    print(yt[i,0], yp[i])

(5, 8192)
[0.9825056217864363, 0.9846748362963192, 0.9842124251817258, 0.9841775772253266, 0.9775207845832263]
0.9842124251817258
0.9090909090909092 0.9013500809669495
0.888888888888889 0.8783550262451172
0.8585858585858587 0.8369823694229126
0.8181818181818182 0.814479410648346
0.797979797979798 0.7668014168739319
0.9292929292929294 0.9268829822540283
0.8282828282828284 0.8223488330841064
0.9090909090909092 0.8988349437713623
0.8787878787878789 0.8578177094459534
0.8686868686868687 0.8673478364944458
0.8787878787878789 0.9107837080955505
0.9393939393939394 0.9301302433013916
0.8484848484848485 0.8636117577552795
0.9292929292929294 0.9134025573730469
0.0 -0.003780284198001027
0.9494949494949496 0.9001536965370178
0.8181818181818182 0.8199779987335205
0.9191919191919192 0.8971131443977356
0.8383838383838385 0.8674895167350769
0.9090909090909092 0.9234932661056519
0.888888888888889 0.8911862373352051
0.8989898989898991 0.9003933668136597
0.8787878787878789 0.8476260900497437
0.7676767676

# Reservoir Computing

## AMN-QP to create a reservoir for various species

In [None]:
# Create, train and evaluate AMN_QP models
# on GR FBA simulated training set
# Repeat the process with different seeds
# This cell takes several hours to execute
# Save the best model in a reservoir

from Library.Import import *
from Library.Build_Model import Neural_Model, model_input
from Library.Build_Model import evaluate_model, train_evaluate_model
from sklearn.metrics import r2_score

DIRECTORY = './'
seed = 10
np.random.seed(seed=seed)
species = [ 
    'e_coli_core', #0
    'iEK1008', 
    'iIT341', #2
    'iJN1463', 
    'iML1515', #4
    'iMM904',
    'iPC815', #6
    'iYO844',
    'iYS1720', 
    'iYS854', # 9
    'iML1515EXP', #10
    'iML1515EXP_2800', 
]

model_type = 'AMN_QP' # 'AMN_QP' 'ANN_Dense'

for i in [4]: # list(range(11))    
    trainname = f'{species[i]}_train'
    trainingfile = f'{DIRECTORY}Dataset_input/{trainname}'
    reservoirname = f'{trainname}_{model_type}'
    reservoirfile = f'{DIRECTORY}Reservoir/{reservoirname}'
    if 'EXP' in trainname:
        n_hidden, hidden_dim, epochs, niter = 1, 500, 1000, 0 # AMN 1, 500, 1000 / 500, 0 ANN Dense 0, 0, 100, 0
    else:
        n_hidden, hidden_dim, epochs, niter = 1, 100, 100, 1
    Maxloop, Q2, PRED = 3, [], [] # 5, [], []
    
    # Training
    for Nloop in range(Maxloop):
        model = Neural_Model(trainingfile=trainingfile,
                             model_type=model_type,
                             scaler=True,
                             n_hidden=n_hidden, hidden_dim=hidden_dim,
                             output_dim=1,
                             epochs=epochs, 
                             xfold=5, niter=niter,
                             verbose=False)
        model.batch_size = 100 if model.X.shape[0] > 1000 else 10
        print(f'{species[i]} Y {model.Y.shape} minY {np.min(model.Y):.2f} '
              f'maxY {np.max(model.Y):.2f} Objective {model.objective} '
              f'Unique {len(set(list(model.Y[:,0])))}')
        
        # Train and evaluate
        start_time = time.time()
        reservoir, pred, stats, _ = train_evaluate_model(model, verbose=True)
        delta_time = time.time() - start_time

        # Printing cross-validation results
        stats.printout(reservoirname, delta_time)
        r2 = r2_score(model.Y[:,0], pred[:,0], multioutput='variance_weighted')
        print(f'Iter {Nloop} Collated Q2 growth {r2:.4f}')
        r2 = r2_score(model.Y, pred[:,:model.Y.shape[1]], multioutput='variance_weighted')
        print(f'Iter {Nloop} Collated Q2 all {r2:.4f}')
        Q2.append(r2)
        PRED.append(pred[:, 0])
        if r2 == max(Q2):  # save the best model
            reservoir.save(reservoirfile)

    # Some printing
    Q2, PRED = np.asarray(Q2), np.asarray(PRED)
    print(f'{trainname} Averaged Q2 = {np.mean(Q2):4f} ± {np.std(Q2):.4f} Best Q2 = {np.max(Q2):.4f}')
    reservoir.load(reservoirfile, output_dim=1)
    reservoir.printout()
    X, Y = model_input(reservoir, verbose=False)
    print(X.shape, Y.shape)
    pred, _ = evaluate_model(reservoir.model, X, Y, reservoir, verbose=False)
    y = pred[:,:model.Y.shape[1]]     
    r2 = r2_score(model.Y[:,0], y[:,0], multioutput='variance_weighted')
    print(f'Final R2 growth {r2:.4f}')


iML1515 Y (500, 1) minY 0.00 maxY 0.00 Objective None Unique 1
AMN scaler: 6.5
QP input shape: (500, 28) (500, 4)
-------train (400, 28) (400, 4)
-------test  (100, 28) (100, 4)
----------------------------------- AMN_QP
Dense layer n_hidden: 1 hidden_dim: 100 input_dim: 28 output_dim: 3682 activation: relu trainable: True
Instructions for updating:
Use `tf.linalg.matmul` instead
AMN output shapes for PoutV: (None, 3682) SV: (None, 1) Pin: (None, 1) Pko: (None, 1)  V: (None, 3682) outputs: (None, 7367)
nbr parameters: 374782
Loss Vout: 3.8E-04
Loss SV:   1.0E-02
Loss Vin:  2.6E-03
Loss Vko:  0.0E+00
Loss Vout: 5.3E-08
Loss SV:   7.1E-07
Loss Vin:  7.3E-08
Loss Vko:  0.0E+00
Loss Vout: 5.4E-08
Loss SV:   7.6E-07
Loss Vin:  6.0E-08
Loss Vko:  0.0E+00
train = 1.00 test = 1.00 loss-train = 0.000000 loss-test = 0.000000 Media found = 0 / 0
Loss Vout: 5.3E-08
Loss SV:   7.2E-07
Loss Vin:  7.0E-08
Loss Vko:  0.0E+00
Stats for iML1515_train_AMN_QP CPU-time 73.3550
iML1515_train_AMN_QP R2 = 1.0

## RC for regression

In [5]:
# Create, train and evaluate RC models
# on ML regression problems

from Library.Import import *
from Library.Utilities import read_XY
from Library.Build_Reservoir import RC_run, RC_write_multiple

DIRECTORY = './'
problems = [
    'raw_geographical_origin_of_music', # 0
    'raw_pumadyn32nh',
    'raw_white_wine', # 2 *
    'raw_kin8nm', # 3 *
    'raw_cars', # 4 *
    'raw_airfoil_self_noise', # 5 *
    'raw_QSAR_fish_toxicity', # 6 *
    'raw_space_ga', #7 *
    'raw_concrete_compressive_strength', # 8 *
    'raw_grid_stability', # 9 *
    'raw_miami_housing', # 10
    'raw_cpu_activity', # 11 *
    'raw_naval_propulsion_plant', # 12
    'raw_energy_efficiency' # 13 *
]
species = [ 
    'e_coli_core', #0
    'iEK1008', 
    'iIT341', #2
    'iJN1463', 
    'iML1515', #4
    'iMM904',
    'iPC815', #6
    'iYO844',
    'iYS1720', 
    'iYS854', # 9
    'iML1515EXP', #10
]

seed = 1
np.random.seed(seed=seed)
xfold = 5
repeat = 3 # 3
precision = 0
train_rate = 1.0e-4
n_hidden_prior = -1 # -1 binary input,  >0 ANN 
hidden_dim_prior = 0
activation_prior='' # '' or 'sharp_sigmoid' or 'relu'
n_hidden_post = -1 #  -1 a scaler is applied, >0 a ANN is used
hidden_dim_post = 0
temperature = False
multiple = -1 # -1 no stats > 0 nbr of reservoirs to get stats 
weight_pred_true_media = 0 # Loss to collect already generated media
failure = 10

run = 'selective-obj' # (fixed, generative, selective) x (obj, phe)
    
if run == 'generative-phe': 
    mode = 'AMN_phenotype' 
    epochs = 500
    n_hidden_prior = 1 
    hidden_dim_prior = 28 
    activation_prior='sharp_sigmoid'
    n_hidden_post = 1 
    hidden_dim_post = 500

if run == 'fixed-phe':
    mode = 'AMN_phenotype' 
    epochs = 500
    n_hidden_post = 0 #  
    hidden_dim_post = 500

if run == 'generative-obj': 
    mode = 'AMN_objective'
    epochs = 500
    n_hidden_prior = 1
    hidden_dim_prior = 28 
    activation_prior= 'sharp_sigmoid'
    weight_pred_true_media = 0.5

if run == 'selective-obj': # For iML1515EXP only
    mode = 'AMN_objective'
    epochs = 1000
    n_hidden_prior = 3 
    hidden_dim_prior = 280
    activation_prior='gumbel_softmax' 
    weight_pred_true_media = 1 
    temperature = True
    
for i in [10]:
    s = species[i]
    reservoirname = f'{s}_train_AMN_QP' # for iML1515EXP : _train_AMN_QP_10
    reservoirfile = f'{DIRECTORY}Reservoir/{reservoirname}' 
    for j in [13]:
        problem = problems[j].split('.')[0]
        trainingfile = f'{DIRECTORY}Dataset_input/Dataset_open_ML/Regression/{problem}'
        if n_hidden_prior == -1: # no prior
            trainingfile = f'{trainingfile}_{s}_binary'
        resultfile = f'{DIRECTORY}Result/{problem}_{s}_{mode}_{str(precision)}'
        H, X, Y = read_XY(trainingfile, nY=1, scaling='XY')
        start_time = time.time()
        model, pred, R2_avr, R2_dev, Q2_avr, Q2_dev, Med = \
        RC_run(reservoirfile, X, Y,
               mode=mode, 
               n_hidden_prior=n_hidden_prior, 
               n_hidden_post=n_hidden_post,
               hidden_dim_prior=hidden_dim_prior,
               hidden_dim_post=hidden_dim_post, 
               activation_prior=activation_prior,
               train_rate=train_rate, 
               precision=precision,
               temperature=temperature,
               failure=failure,
               weight_pred_true_media=weight_pred_true_media,
               repeat=repeat, xfold=xfold, epochs=epochs, verbose=True)
        delta_time = time.time() - start_time
        print(f'{problem} {s} {mode} precision: {precision:.2f} hidden-size: {hidden_dim_prior} \
R2: {R2_avr:.2f} ± {R2_dev:.4f} Q2: {Q2_avr:.2f} ± {Q2_dev:.4f} cpu time {delta_time:.2f} Media found {Med:.2f}')
        RC_write_multiple(reservoirfile, resultfile, model, 
                          Y, pred, 
                          multiple=multiple, 
                          precision_X=True, verbose=False)


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x0000026126AF5070>>
Traceback (most recent call last):
  File "c:\Users\tnhoang\AppData\Local\anaconda3\envs\reservoir\lib\site-packages\ipykernel\ipkernel.py", line 790, in _clean_thread_parent_frames
    active_threads = {thread.ident for thread in threading.enumerate()}
KeyboardInterrupt: 


In [1]:
from Library.Import import *
from Library.Utilities import read_XY
from Library.Build_Reservoir import RC_run, RC_write_multiple
import time
import numpy as np

# Configuration and Input Section
DIRECTORY = './'
species = 'iML1515EXP'
run = 'selective-obj'
# Hyperparameters and settings 
seed = 1
np.random.seed(seed=seed)
xfold = 5
repeat = 3 # 3
precision = 0
train_rate = 1.0e-4
n_hidden_prior = -1 # -1 binary input,  >0 ANN 
hidden_dim_prior = 0
activation_prior='' # '' or 'sharp_sigmoid' or 'relu'
n_hidden_post = -1 #  -1 a scaler is applied, >0 a ANN is used
hidden_dim_post = 0
temperature = False
multiple = -1 # -1 no stats > 0 nbr of reservoirs to get stats 
weight_pred_true_media = 0 # Loss to collect already generated media
failure = 10

if run == 'generative-obj': 
    mode = 'AMN_objective'
    epochs = 500
    n_hidden_prior = 1
    hidden_dim_prior = 28 
    activation_prior= 'sharp_sigmoid'
    weight_pred_true_media = 0.5

if run == 'selective-obj': # For iML1515EXP only
    mode = 'AMN_objective'
    epochs = 1000
    n_hidden_prior = 3 
    hidden_dim_prior = 280
    activation_prior='gumbel_softmax' 
    weight_pred_true_media = 1 
    temperature = True

In [3]:
# Data loading function
def load_data(dataset_path):
    H, X, Y = read_XY(dataset_path, nY=1, scaling='XY')
    return X, Y

In [None]:
AMN_RC_v9\AMN_RC_v9\test\data\and.csv