### E Karvelis | 5/24/2023
### Purpose
Test the performance of trained models
## How to use
1. Copy this notebook to the working directory where your model was trained.
2. Make sure all output, if any, were cleared.
3. Run the blocks (needs transformer_1_config.txt or lstm_1_config.txt in the working directory).

In [None]:
# import modules
from transformer_1 import *

import sys
sys.path.append('/data/karvelis03/dl_kcat/scripts/')
from prep_data import *
from pred_kcat import *
from glob import glob

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for f in glob('./*_1_config.txt'):
    with open(f, 'r') as fo:
        exec(fo.read())

# Spearman rank correlation

In [None]:
results = []
for model_file in glob('./best_model*'):
    
    test = ModelTest(model_file, data_file, meta_file=meta_file)
    test.path_set_size = 10
    
    res = test.corr_test(verbose=True)
    results.append(res)

print ('\n\nResults for each fold:\n')
print (f'              Stat   |   p-value')
spearman_rs, pearson_rs = [], []
for res in results:
    print (f"Spearman: {res['spearman'].correlation:.3e}  |  {res['spearman'].pvalue:.3e}")
    print (f"Pearson:  {res['pearson'][0]:.3e}  |  {res['pearson'][1]:.3e}")
    spearman_rs.append(res['spearman'].correlation)
    pearson_rs.append(res['pearson'][0])
    
spearman_rs = np.array(spearman_rs)
pearson_rs = np.array(pearson_rs)

print (f'\n\nAVG +/- 1SEM, (n={spearman_rs.shape[0]} folds):')
print (f"Spearman: {np.mean(spearman_rs):.3f} +/- {sem(spearman_rs):.3f}")
print (f"Pearson:  {np.mean(pearson_rs):.3f} +/- {sem(pearson_rs):.3f}")

In [None]:
tests = {}
for model_file in glob('./best_model*'):
    
    tests[model_file] = ModelTest(model_file, data_file, meta_file=meta_file)
    tests[model_file].path_set_size = 10
    
    tests[model_file].plot_preds()

In [None]:
print ('\n\nResults for each fold:\n')
print (f'                      Stat   |   p-value')

spearman_rs_train, spearman_rs_test, pearson_rs_train, pearson_rs_test = [], [], [], []
for model_file in tests:
    test = tests[model_file]
    res = {'Spearman (test)':None, 'Spearman (train)':None, 'Pearson (test)':None, 'Pearson (train)':None}
    res['Spearman (test)']  = spearmanr(test.preds['test'],  test.targets['test'])
    res['Spearman (train)'] = spearmanr(test.preds['train'], test.targets['train'])
    res['Pearson (test)']   = pearsonr(test.preds['test'],   test.targets['test'])
    res['Pearson (train)']  = pearsonr(test.preds['train'],  test.targets['train'])
    
    print (f"Spearman (train): {res['Spearman (train)'].correlation:.3e}  |  {res['Spearman (train)'].pvalue:.3e}")
    print (f"Spearman  (test): {res['Spearman (test)'].correlation:.3e}  |  {res['Spearman (test)'].pvalue:.3e}")
    print (f"Pearson (train):  {res['Pearson (train)'][0]:.3e}  |  {res['Pearson (train)'][1]:.3e}")
    print (f"Pearson  (test):  {res['Pearson (test)'][0]:.3e}  |  {res['Pearson (test)'][1]:.3e}\n")
    
    spearman_rs_train.append(res['Spearman (train)'].correlation)
    pearson_rs_train.append(res['Pearson (train)'][0])
    spearman_rs_test.append(res['Spearman (test)'].correlation)
    pearson_rs_test.append(res['Pearson (test)'][0])
    
spearman_rs_train = np.array(spearman_rs_train)
pearson_rs_train = np.array(pearson_rs_train)
spearman_rs_test = np.array(spearman_rs_test)
pearson_rs_test = np.array(pearson_rs_test)

print (f'\n\nAVG +/- 1SEM, (n={spearman_rs_train.shape[0]} folds):')
print (f"Spearman (train): {np.mean(spearman_rs_train):.3f} +/- {sem(spearman_rs_train):.3f}")
print (f"Pearson  (train):  {np.mean(pearson_rs_train):.3f} +/- {sem(pearson_rs_train):.3f}\n")
print (f"Spearman  (test): {np.mean(spearman_rs_test):.3f} +/- {sem(spearman_rs_test):.3f}")
print (f"Pearson   (test):  {np.mean(pearson_rs_test):.3f} +/- {sem(pearson_rs_test):.3f}")

# Loop through each CV Fold

In [None]:
for model_file in glob('./best_model_cvfold*.pt'):
    print ('-----------------------------------------------------')
    print (f'FOLD: {model_file.split('cvfold')[-1].split('.')[0]}')
    print ('-----------------------------------------------------\n')
    test = ModelTest(model_file, data_file, meta_file=meta_file)
    _ = test.plot_train_var_pred(figname='best_model_cvfold1_train.png')
    _ = test.plot_test_var_pred(figname='best_model_cvfold1_train.png')
    test.path_set_size = 10
    res = test.spearman_test()
    print (res)

# CV Fold 2

In [None]:
model_file = '/data/karvelis03/dl_kcat/transformer_1s/denseweight/job12-1/stoch_labels/test/best_model_cvfold2.pt'
test = ModelTest(model_file, data_file, meta_file=meta_file)

In [None]:
_ = test.plot_train_var_pred(figname='best_model_cvfold2_train.png')

In [None]:
_ = test.plot_test_var_pred(figname='best_model_cvfold2_train.png')

In [None]:
test.path_set_size = 10
res = test.spearman_test()
print (res)

# CV Fold 3

In [None]:
model_file = '/data/karvelis03/dl_kcat/transformer_1s/denseweight/job12-1/stoch_labels/test/best_model_cvfold3.pt'
test = ModelTest(model_file, data_file, meta_file=meta_file)

In [None]:
_ = test.plot_train_var_pred(figname='best_model_cvfold3_train.png')

In [None]:
_ = test.plot_test_var_pred(figname='best_model_cvfold3_train.png')

In [None]:
test.path_set_size = 10
res = test.spearman_test()
print (res)