In [4]:
import numpy as np
import pickle
import pandas as pd
import os
from os.path import join
import warnings
warnings.filterwarnings("ignore")
from sklearn.metrics import r2_score
from sklearn.linear_model import ElasticNet, LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn import preprocessing
from scipy import stats
import xgboost as xgb
from hyperopt import fmin, tpe, rand, hp, Trials

from tensorflow.keras import regularizers, initializers, optimizers, models, layers
from tensorflow.keras.losses import MSE
from tensorflow.keras.activations import relu

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import matplotlib as mpl
# plt.style.use('CCB_plot_style_0v4.mplstyle')
c_styles      = mpl.rcParams['axes.prop_cycle'].by_key()['color']   # fetch the defined color styles
high_contrast = ['#004488', '#DDAA33', '#BB5566', '#000000']

datasets_dir = "../../data"

## Loading training and test data:

In [21]:
split = "Brassicaceae"

data_train = pd.read_pickle(join(datasets_dir, "splits", split, "training_data.pkl"))
data_test = pd.read_pickle(join(datasets_dir, "splits", split, "test_data.pkl"))
data_val = pd.read_pickle(join(datasets_dir, "splits", split, "val_data.pkl"))

# data_train["geomean_kcat"] = np.log10(data_train["geomean_kcat"])
# data_test["geomean_kcat"] = np.log10(data_test["geomean_kcat"])

data_train["log10_kcat"] = np.log10(data_train["kcat"])
data_test["log10_kcat"] = np.log10(data_test["kcat"])
data_val["log10_kcat"] = np.log10(data_val["kcat"])

data_train.rename(columns = {"Enzyme rep" : "ESM2"}, inplace = True)
data_test.rename(columns = {"Enzyme rep" : "ESM2"}, inplace = True)
data_val.rename(columns = {"Enzyme rep" : "ESM2"}, inplace = True)

data_train['Temperature'] = data_train['Temperature'].replace('-', np.nan)
data_test['Temperature'] = data_test['Temperature'].replace('-', np.nan)
data_val['Temperature'] = data_val['Temperature'].replace('-', np.nan)
data_train['pH'] = data_train['pH'].replace('-', np.nan)
data_test['pH'] = data_test['pH'].replace('-', np.nan)
data_val['pH'] = data_val['pH'].replace('-', np.nan)
data_train['Type'] = data_train['Type'].replace('wildtype', 1)
data_train['Type'] = data_train['Type'].replace('mutant', 2)
data_test['Type'] = data_test['Type'].replace('wildtype', 1)
data_test['Type'] = data_test['Type'].replace('mutant', 2)
data_val['Type'] = data_val['Type'].replace('wildtype', 1)
data_val['Type'] = data_val['Type'].replace('mutant', 2)

data_train['MACCS FP'] = data_train['MACCS FP'].astype(str)
data_test['MACCS FP'] = data_test['MACCS FP'].astype(str)
data_val['MACCS FP'] = data_val['MACCS FP'].astype(str)

len(data_train), len(data_test), len(data_val)

(341, 113, 7)

In [22]:
train_indices = list(np.load(join(datasets_dir, "splits", split, "CV_train_indices_Seed plants.npy"), allow_pickle = True))
test_indices = list(np.load(join(datasets_dir, "splits", split, "CV_test_indices_Seed plants.npy"), allow_pickle = True))

In [24]:
data_train[data_train['GNN FP'].apply(lambda x: not isinstance(x, np.ndarray))]


Unnamed: 0,ECs,Organism,Uniprot IDs,PMID,Type,kcat,Temperature,pH,Substrates,Products,...,max_kcat_for_RID,difference_fp,structural_fp,max_kcat_for_EC,MACCS FP,frac_of_max_UID,frac_of_max_RID,frac_of_max_EC,GNN FP,log10_kcat
35,2.1.2.1,Arabidopsis thaliana,Q94JQ3,20518745,wildtype AtSHMT3,3.8,30.0,8.5,Tetrahydropteroyl hepta-L-glutamate;L-Serine;,"5,10-Methylenetetrahydropteroyl hepta-L-glutam...",...,3.8,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -10.0, 0.0, 0.0...",1100110100000001001000110110000001101101111010...,21.8,0000000000000000000001000110000000000010000101...,0.240506,1.0,0.174312,,0.579784


In [25]:
data_test = data_test[~data_test['GNN FP'].isnull()]

nan_rows = data_train[data_train['GNN FP'].apply(lambda x: not isinstance(x, np.ndarray))]

# Get the indices of these rows
indices_with_nan = nan_rows.index.tolist()
indices_with_nan.reverse()

for ind, sub_list in enumerate(train_indices):
    for elem in sub_list:
        if elem in indices_with_nan:
            sub_list.remove(elem)

for ind, sub_list in enumerate(train_indices):
    for num in indices_with_nan:
        train_indices[ind] = [elem - 1 if elem > num else elem for elem in sub_list]

removed = {}

for ind, sub_list in enumerate(test_indices):
    for elem in sub_list:
        if elem in indices_with_nan:
            sub_list.remove(elem)

for ind, sub_list in enumerate(test_indices):
    for num in indices_with_nan:
        test_indices[ind] = [elem - 1 if elem > num else elem for elem in sub_list]      


data_train = data_train[data_train['GNN FP'].apply(lambda x: isinstance(x, np.ndarray))]

In [28]:
data_train.head(40)

Unnamed: 0,ECs,Organism,Uniprot IDs,PMID,Type,kcat,Temperature,pH,Substrates,Products,...,max_kcat_for_RID,difference_fp,structural_fp,max_kcat_for_EC,MACCS FP,frac_of_max_UID,frac_of_max_RID,frac_of_max_EC,GNN FP,log10_kcat
0,1.1.1.22,Arabidopsis thaliana,Q9LIA8,18057039,1,1.92,22.0,8.7,UDPglucose;NAD+;H2O;,UDPglucuronate;NADH;H+;,...,2.52,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1100111100000001001000110110010001001101111100...,2.52,0000000000000000000000000000010000000100000001...,1.0,0.761905,0.761905,"[0.72350174, 39.56145, 221.54413, 12.396297, 1...",0.283301
1,1.1.1,Arabidopsis thaliana,Q9FZ42,21169366,wildtype GST-tag,1.7,,7.0,NADPH;(E)-2-Pentenal;H+;,NADP+;(E)-2-Penten-1-ol;,...,9.5,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1100111100000001001000110110010001001111111100...,44.0,0000000000000000000001000010010000000010000101...,0.232877,0.178947,0.038636,"[0.63306403, 28.175425, 283.89398, 11.895383, ...",0.230449
2,2.5.1.18,Arabidopsis thaliana,O80852,29732642,wildtype reduced,0.85,,,"Reduced glutathione;1-Chloro-2,4-dinitrobenzene;","S-(2,4-Dinitrophenyl)glutathione;HCl;",...,0.98,"[0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0,...",1100010000000000100000000000000001001001001000...,0.98,0000000000000000000000001000000000000000000001...,0.867347,0.867347,0.867347,"[0.0, 9.202225, 101.13074, 0.07061178, 29.3235...",-0.070581
3,2.5.1.47,Arabidopsis thaliana,P47998,16166087,1,1780.0,25.0,7.0,O-Acetyl-L-serine;Sulfide;,Acetate;L-Cysteine;,...,2170.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1000000000000000000000000000000001000001001000...,2170.0,0000000000000000000000000000000000000000000000...,0.820276,0.820276,0.820276,"[0.014854667, 0.0, 0.018850282, 0.0, 0.0, 0.0,...",3.25042
4,1.1.1.40,Arabidopsis thaliana,Q9LYG3,18288573,wildtype isoform NADP-ME2,75.0,30.0,7.0,NADPH;Pyruvate;CO2;H+;,NADP+;L-Malate;,...,284.1,"[0.0, 0.0, 0.0, 0.0, 20.0, 0.0, 0.0, 0.0, 0.0,...",1100111100000001001000110110010001001111111100...,324.1,0000000000000000000000000000000000000000000000...,0.23141,0.263992,0.23141,"[0.0, 0.0, 0.0, 0.092052005, 0.0, 0.0, 0.0, 0....",1.875061
5,1.1.1.25,Arabidopsis thaliana,Q9SQT8,16784230,"mutant S338A, delta1-88, C-terminal his-tag",33.2,22.0,8.8,NADP+;Shikimate;,NADPH;3-Dehydroshikimate;H+;,...,428.0,"[0.0, 0.0, 0.0, 0.0, 20.0, 0.0, 0.0, 0.0, 0.0,...",1100111100000001001000110110010001001101111100...,428.0,0000000000000000000001000010010000000010000101...,1.0,0.07757,0.07757,"[0.27131316, 8.377903, 80.6544, 0.0, 55.43959,...",1.521138
6,2.6.1.1,Arabidopsis thaliana,P46248,9535706,1,176.0,25.0,8.0,L-Aspartate;2-Oxoglutarate;,Oxaloacetate;L-Glutamate;,...,217.0,"[0.0, 0.0, 0.0, 0.0, -20.0, 0.0, 0.0, 0.0, 0.0...",1000000000000000000000000000000001000001001000...,574.0,0000000000000000000000000000000000000000000000...,0.630824,0.81106,0.30662,"[0.0, 3.11121, 43.91027, 0.00024537742, 18.954...",2.245513
7,1.1.1,Cochlearia officinalis,A7DY56,24583623,wildtype C-terminal His6-tag,0.57,30.0,5.0,"NADPH;2,4-Dimethyl-3-pentanone;H+;","NADP+;2,4-Dimethyl-3-pentanol;",...,0.57,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1100111100000001001000110110010001001111111100...,44.0,0000000000000000000001000010010000000010000101...,0.048305,1.0,0.012955,"[0.0, 2.1905894, 41.609715, 0.24987578, 14.393...",-0.244125
8,4.2.1.91,Arabidopsis thaliana,Q9FNJ8,17726025,wildtype Nus-fused ADT5,1.1,37.0,7.5,L-Arogenate;,L-Phenylalanine;CO2;H2O;,...,6.1,"[0.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 0.0,...",1100000000000000000000000000000001000001001000...,6.1,0000000000000000000000000000000000000000000000...,1.0,0.180328,0.180328,"[0.09043772, 3.4662678, 88.56494, 0.0002453774...",0.041393
9,4.4.1.14,Arabidopsis thaliana,Q06402,12968022,wildtype ACS2 Isozyme,0.5,30.0,8.2,S-Adenosyl-L-methionine;,5'-Deoxy-5'-(methylthio)adenosine;1-Aminocyclo...,...,9.6,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1100110100000000000000100010010001100001001100...,9.6,0000000000000000000001000010000000000010000001...,1.0,0.052083,0.052083,"[0.18087544, 14.578674, 135.77676, 0.062591635...",-0.30103


In [8]:
train_X = np.array(list(data_train["ESM2"]))
train_X = np.concatenate([train_X, np.array(list(data_train["Temperature"]))[:, np.newaxis], np.array(list(data_train["pH"]))[:, np.newaxis]], axis = 1)
train_Y = np.array(list(data_train["log10_kcat"]))

test_X = np.array(list(data_test["ESM2"]))
test_X = np.concatenate([test_X, np.array(list(data_test["Temperature"]))[:, np.newaxis], np.array(list(data_test["pH"]))[:, np.newaxis]], axis = 1)
test_Y = np.array(list(data_test["log10_kcat"]))

train_X = train_X.astype(float)
test_X = test_X.astype(float)

In [9]:
mean_y, std_y = np.mean(train_Y), np.std(train_Y)
train_Y = (train_Y-mean_y)/std_y
test_Y = (test_Y-mean_y)/std_y

scaler = preprocessing.StandardScaler().fit(train_X[:, 2048:])
train_X[:, 2048:] = scaler.transform(train_X[:, 2048:])
test_X[:, 2048:] = scaler.transform(test_X[:, 2048:])

ValueError: Found array with 0 feature(s) (shape=(340, 0)) while a minimum of 1 is required by StandardScaler.

## 1. Training and validation machine learning models

### (a) Linear Regression

#### (i) Performing hyperparameter optimization

In [6]:
def cross_validation_neg_r2_linear_regression(param):
    R2 = []
    for i in range(5):
        train_index, test_index  = train_indices[i], test_indices[i]

        reg = ElasticNet(alpha = param["alpha"], l1_ratio = param["l1_ratio"]).fit(train_X[train_index], train_Y[train_index])
        y_valid_pred = reg.predict(train_X[test_index])
        R2.append(r2_score(np.reshape(train_Y[test_index], (-1)),  y_valid_pred))
    return(-np.mean(R2))


#Defining search space for hyperparameter optimizationhp.uniform("reg_alpha", 0, 5)
space_linear_regression = {'alpha': hp.uniform('alpha', 0,5),
                            'l1_ratio': hp.uniform('l1_ratio', 0,1)}
    

In [7]:
'''trials = Trials()
best = fmin(fn = cross_validation_neg_r2_linear_regression, space = space_linear_regression,
            algo=rand.suggest, max_evals = 2000, trials=trials)''';

#### Best set of hyperparameters:

In [8]:
#param = trials.argmin

In [9]:
param = {'alpha': 0.3960857176137572, 'l1_ratio': 0.003735725013911728}

#### (ii) Training and validating the final model
Training the model and validating it on the test set:

In [10]:
test_Y = (test_Y+mean_y)*std_y

In [11]:
reg = ElasticNet(alpha = param["alpha"], l1_ratio = param["l1_ratio"]).fit(train_X, train_Y)
y_test_pred = reg.predict(test_X)
y_test_pred = (y_test_pred+mean_y)*std_y

MSE_dif_fp_test = np.mean(abs(np.reshape(test_Y, (-1)) - y_test_pred)**2)
R2_dif_fp_test = r2_score(np.reshape(test_Y, (-1)), y_test_pred)
Pearson = stats.pearsonr(np.reshape(test_Y, (-1)), y_test_pred)

print(np.round(Pearson[0],3) , np.round(MSE_dif_fp_test,3), np.round(R2_dif_fp_test,3))

0.542 1.014 0.293


### (b) Random forest

In [12]:
#create input matrices:
train_X = np.array(list(data_train["DRFP"]))
train_X = np.concatenate([train_X, np.array(list(data_train["ESM1b_ts"]))], axis = 1)
train_Y = np.array(list(data_train["log10_kcat"]))

test_X = np.array(list(data_test["DRFP"]))
test_X = np.concatenate([test_X, np.array(list(data_test["ESM1b_ts"]))], axis = 1)
test_Y = np.array(list(data_test["log10_kcat"]))


scaler = preprocessing.StandardScaler().fit(train_X)
train_X = scaler.transform(train_X)
test_X = scaler.transform(test_X)

In [13]:
def cross_validation_neg_r2_random_forest(param):
    R2 = []
    for i in range(5):
        train_index, test_index  = train_indices[i], test_indices[i]

        reg = RandomForestRegressor(max_depth = param["max_depth"],
                                    min_samples_leaf = param["min_samples_leaf"],
                                    n_estimators = param["n_estimators"]).fit(train_X[train_index], train_Y[train_index])
        y_valid_pred = reg.predict(train_X[test_index])
        R2.append(r2_score(np.reshape(train_Y[test_index], (-1)),  y_valid_pred))
    return(-np.mean(R2))

#Defining search space for hyperparameter optimization
space_random_forest = {'n_estimators': hp.choice('n_estimators', [50, 100, 200]),
                      'max_depth': hp.choice('max_depth', [5,6,7,8,9,10,11,12,13,14,15,16]),
                       'min_samples_leaf': hp.choice('min_samples_leaf', [1,2,5,10,20])}

In [14]:
'''trials = Trials()
best = fmin(fn = cross_validation_neg_r2_random_forest, space = space_random_forest,
            algo=rand.suggest, max_evals = 2000, trials=trials)''';

Best set of hyperparameters:

In [15]:
#trials.argmin

In [16]:
param = {'max_depth': 15, 'min_samples_leaf': 1, 'n_estimators': 100}

#### (ii) Training and validating the final model
Training the model and validating it on the test set:

In [17]:
reg = RandomForestRegressor(max_depth = param["max_depth"],
                                    min_samples_leaf = param["min_samples_leaf"],
                                    n_estimators = param["n_estimators"]).fit(train_X, train_Y)
y_test_pred = reg.predict(test_X)

MSE_dif_fp_test = np.mean(abs(np.reshape(test_Y, (-1)) - y_test_pred)**2)
R2_dif_fp_test = r2_score(np.reshape(test_Y, (-1)), y_test_pred)
Pearson = stats.pearsonr(np.reshape(test_Y, (-1)), y_test_pred)

print(np.round(Pearson[0],3) ,np.round(MSE_dif_fp_test,3), np.round(R2_dif_fp_test,3))

0.622 0.911 0.364


### (c) Neural Network

In [18]:
train_X = np.array(list(data_train["DRFP"]))
train_X = np.concatenate([train_X, np.array(list(data_train["ESM1b_ts"]))], axis = 1)
train_Y = np.array(list(data_train["log10_kcat"]))

test_X = np.array(list(data_test["DRFP"]))
test_X = np.concatenate([test_X, np.array(list(data_test["ESM1b_ts"]))], axis = 1)
test_Y = np.array(list(data_test["log10_kcat"]))

mean_y, std_y = np.mean(train_Y), np.std(train_Y)
train_Y = (train_Y-mean_y)/std_y
test_Y = (test_Y-mean_y)/std_y

scaler = preprocessing.StandardScaler().fit(train_X[:, 2048:])
train_X[:, 2048:] = scaler.transform(train_X[:, 2048:])
test_X[:, 2048:] = scaler.transform(test_X[:, 2048:])

In [13]:
def build_model(learning_rate=0.001, decay =10e-6, momentum=0.9, l2_parameter= 0.1, hidden_layer_size1 = 256,
               hidden_layer_size2 = 64, input_dim = 1280, third_layer = True): 
    model = models.Sequential()
    model.add(layers.Dense(units = hidden_layer_size1,
                           kernel_regularizer=regularizers.l2(l2_parameter),
                           kernel_initializer = initializers.TruncatedNormal(
                               mean=0.0, stddev= np.sqrt(2./ input_dim), seed=None),
                           activation='relu', input_shape=(input_dim,)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(units= hidden_layer_size2,
                           kernel_regularizer=regularizers.l2(l2_parameter),
                           kernel_initializer = initializers.TruncatedNormal(
                               mean=0.0, stddev = np.sqrt(2./ hidden_layer_size1), seed=None),
                           activation='relu'))
    model.add(layers.BatchNormalization())
    if third_layer == True:
        model.add(layers.Dense(units= 16,
                               kernel_regularizer=regularizers.l2(l2_parameter),
                               kernel_initializer = initializers.TruncatedNormal(
                                   mean=0.0, stddev = np.sqrt(2./ hidden_layer_size2), seed=None),
                               activation='relu'))
        model.add(layers.BatchNormalization())
     
    model.add(layers.Dense(1, kernel_regularizer=regularizers.l2(l2_parameter),
                           kernel_initializer = initializers.TruncatedNormal(
                               mean=0.0, stddev = np.sqrt(2./ 16), seed=None)))
    model.compile(optimizer=optimizers.SGD(learning_rate=learning_rate,  momentum=momentum, nesterov=True),
                  loss='mse',  metrics=['mse'])
    return model



def cross_validation_neg_r2_fcnn(param):
    
    param["num_epochs"] = int(np.round(param["num_epochs"]))

    
    R2 = []
    for i in range(5):
        train_index, test_index  = train_indices[i], test_indices[i]
        model = build_model(input_dim = 1280+2048, 
                            learning_rate= param["learning_rate"],
                            decay = param["decay"],
                            momentum = param["momentum"], 
                            l2_parameter = param["l2_parameter"],
                            hidden_layer_size1 = param["hidden_layer_size1"],
                            hidden_layer_size2 = param["hidden_layer_size2"]) 

        model.fit(np.array(train_X[train_index]), np.array(train_Y[train_index]),
                            epochs = param["num_epochs"],
                            batch_size = param["batch_size"],
                            verbose=0)

        R2.append(r2_score( np.reshape(train_Y[test_index], (-1)),
                           model.predict(np.array(train_X[test_index])).reshape(-1) ))
    return(-np.mean(R2))

In [18]:
space = {"learning_rate": hp.uniform("learning_rate", 1e-6, 1e-2),
        "hidden_layer_size1": hp.choice("hidden_layer_size1", [256,128,64]),
        "hidden_layer_size2": hp.choice("hidden_layer_size2", [128,64,32]),
        "batch_size": hp.choice("batch_size", [8,16,32,64,96]),
        "decay": hp.uniform("decay", 1e-9, 1e-5),
        "l2_parameter": hp.uniform("l2_parameter", 0, 0.01),
        "momentum": hp.uniform("momentum", 0.1, 1),
        "num_epochs": hp.uniform("num_epochs", 20, 100)}
    
trials = Trials()
best = fmin(fn = cross_validation_neg_r2_fcnn, space = space, algo=rand.suggest, max_evals= 10, trials=trials);

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

job exception: index 340 is out of bounds for axis 0 with size 340



  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]


IndexError: index 340 is out of bounds for axis 0 with size 340

In [20]:
test_indices

[[0,
  258,
  260,
  5,
  134,
  264,
  10,
  15,
  20,
  284,
  30,
  288,
  171,
  44,
  174,
  303,
  49,
  53,
  54,
  314,
  189,
  319,
  64,
  196,
  69,
  324,
  199,
  328,
  329,
  74,
  330,
  79,
  338,
  84,
  341,
  345,
  222,
  94,
  224,
  350,
  99,
  229,
  104,
  109,
  237,
  114,
  244,
  124,
  125,
  254],
 [1,
  6,
  11,
  16,
  21,
  29,
  34,
  37,
  43,
  46,
  51,
  57,
  61,
  65,
  66,
  71,
  72,
  76,
  81,
  86,
  89,
  90,
  95,
  100,
  110,
  120,
  121,
  123,
  127,
  139,
  142,
  146,
  149,
  150,
  158,
  160,
  162,
  164,
  169,
  173,
  175,
  176,
  182,
  187,
  188,
  193,
  194,
  198,
  203,
  207,
  212,
  215,
  218,
  219,
  220,
  223,
  233,
  236,
  238,
  239,
  242,
  243,
  250,
  252,
  255,
  256,
  257,
  270,
  271,
  274,
  275,
  276,
  295,
  297,
  298,
  299,
  301,
  302,
  310,
  315,
  318,
  325,
  337,
  340],
 [2,
  7,
  12,
  17,
  22,
  25,
  28,
  36,
  39,
  40,
  45,
  50,
  55,
  56,
  60,
  67,
  70,
  73

In [21]:
param = {'batch_size': 96,
         'decay': 8.925865617547346e-06,
         'hidden_layer_size1': 128,
         'hidden_layer_size2': 64,
         'l2_parameter': 0.0033008915899278156,
         'learning_rate': 0.006808549614442447,
         'momentum': 0.9054104435951468,
         'num_epochs': 62.68663708309369}

In [26]:
model = build_model(input_dim = 1280+2048, 
                            learning_rate = param["learning_rate"],
                            decay = param["decay"],
                            momentum = param["momentum"], 
                            l2_parameter = param["l2_parameter"], 
                            hidden_layer_size1 = param["hidden_layer_size1"],
                            hidden_layer_size2 = param["hidden_layer_size2"]) 

model.fit(np.array(train_X), np.array(train_Y),
                    epochs = 50,# int(np.round(param["num_epochs"])),
                    batch_size = param["batch_size"],
                    verbose=1)

y_test_pred = model.predict(np.array(test_X))
r2_score(test_Y, y_test_pred.reshape(-1))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


0.3237192981547061

In [46]:
y_test_pred = (y_test_pred.reshape(-1) + mean_y)*std_y
test_Y = (test_Y + mean_y)*std_y

MSE_dif_fp_test = np.mean(abs(np.reshape(test_Y, (-1)) - y_test_pred.reshape(-1))**2)
R2_dif_fp_test = r2_score(np.reshape(test_Y, (-1)), y_test_pred.reshape(-1))
Pearson = stats.pearsonr(np.reshape(test_Y, (-1)), y_test_pred.reshape(-1))

print(np.round(Pearson[0],3) ,np.round(MSE_dif_fp_test,3), np.round(R2_dif_fp_test,3))

0.593 0.969 0.324
