In [14]:
import math
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import os

import shutil
import sklearn
from sklearn.model_selection import KFold
import gpytorch
from gpytorch.models import ExactGP
from gpytorch.likelihoods import DirichletClassificationLikelihood
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel, MaternKernel

from sklearn.metrics import confusion_matrix
import itertools
from sklearn.metrics import precision_score, recall_score, roc_auc_score, matthews_corrcoef, balanced_accuracy_score, confusion_matrix, f1_score, roc_curve,precision_recall_curve, auc

import sys
sys.path.append('/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/')
from RF_GSCV import * # RF_GSCV contains the calculate metrics function to get the TP, TN, FP, FN scores 
from RF_atomver import prediction_type

In [15]:

class DirichletGPModel(ExactGP):
    """
    A Dirichlet Gaussian Process (GP) model for multi-class classification.
    This model uses a Gaussian Process with a Dirichlet prior to handle multi-class classification tasks.
    It extends the ExactGP class from GPyTorch, a library for Gaussian Processes in PyTorch.
    Attributes:
        mean_module (gpytorch.means.ConstantMean): The mean module for the GP, initialized with a constant mean function for each class.
        covar_module (gpytorch.kernels.ScaleKernel): The covariance module for the GP, using a scaled RBF kernel for each class.

    Args:
        train_x (torch.Tensor): Training data features.
        train_y (torch.Tensor): Training data labels.
        likelihood (gpytorch.likelihoods.Likelihood): The likelihood function.
        num_classes (int): The number of classes for the classification task.
    """
    def __init__(self, train_x, train_y, likelihood, num_classes):
        super(DirichletGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = ConstantMean(batch_shape=torch.Size((num_classes,)))
        self.covar_module = ScaleKernel(MaternKernel(nu=0.5, batch_shape=torch.Size((num_classes,))),
            batch_shape=torch.Size((num_classes,))
        )

    def forward(self, x):
        """
        Forward pass through the GP model.
        Args:
            x (torch.Tensor): Input data features.
        Returns:
            gpytorch.distributions.MultivariateNormal: The multivariate normal distribution representing the GP posterior.
        """
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


In [16]:
class Trainer: 
    def __init__(self,model, likelihood, iterations): 
        self.model = model
        self.likelihood = likelihood 
        smoke_test = ('CI' in os.environ)
        self.n_iterations = 2 if smoke_test else iterations
        self.optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
        self.loss_fn = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
        
    def train(self, train_x, train_y): 
        self.model.train()
        self.likelihood.train()
        predictions = [] 
        for i in range(self.n_iterations): 
            self.optimizer.zero_grad()
            output = self.model(train_x)
            loss = -self.loss_fn(output, self.likelihood.transformed_targets).sum()
            loss.backward()
            if (i%10==0): 
                print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
                    i + 1, self.n_iterations, loss.item(),
                    self.model.covar_module.base_kernel.lengthscale.mean().item(),
                    self.model.likelihood.second_noise_covar.noise.mean().item()
                ))
             
            self.optimizer.step() 
    def predict(self, input): 
        """
        Make predictions using the GP model.

        Args:
            input (torch.Tensor): The input data for making predictions.
        
        Returns:
            dist (gpytorch.distributions.MultivariateNormal): The distribution representing the GP posterior.
            observed_pred (gpytorch.distributions.MultivariateNormal): The predicted distribution considering the likelihood.
            pred_means (torch.Tensor): The means of the predicted distributions.
            class_pred (torch.Tensor): The predicted class labels.
        """
        self.model.eval()
        self.likelihood.eval()

        with gpytorch.settings.fast_pred_var(), torch.no_grad():
            dist = self.model(input)     # output distribution
            pred_means = dist.loc          # means of distributino 
            observed_pred = self.likelihood(self.model(input))    # likelihood predictions mean and var  

            class_pred = self.model(input).loc.max(0)[1]
            
        return dist, observed_pred, pred_means, class_pred
    

    def evaluate(self, x_input, y_true): 
        """
        Evaluate the GP model.

        Args:
            x_input (torch.Tensor): The input data features.
            y_true (torch.Tensor): The true labels for the input data.
        
        Returns:
            y_pred (numpy.ndarray): The predicted class labels.
        """
        y_pred = self.model(x_input).loc.max(0)[1].numpy()
        
        return y_pred

    def gp_results(self, x_input, y_true, plot_title=None): 
        """
        Calculate evaluation metrics and print results.

        Args:
            x_input (torch.Tensor): The input data features.
            y_true (torch.Tensor or numpy.ndarray): The true labels for the input data.
            plot_title (str, optional): The title for the confusion matrix plot.
        
        Returns:
            dict: A dictionary containing evaluation metrics and confusion matrix components.
        """
        y_pred = self.evaluate(x_input, y_true) 
        if isinstance(y_true, torch.Tensor):
            y_true = y_true.numpy().reshape(-1)
        # plot_confusion_matrix(y_true, y_pred, ['0','1'], title=plot_title)
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred)
        dist = self.model(x_input)     # get predicted distributions 
        pred_means = dist.loc          # means for predicted dist  

        recall = recall_score(y_true, y_pred)
        tp, tn, fp, fn = calculate_metrics(y_true, y_pred) 
        sensitivity = tp / (tp + fn) 
        specificity = tn / (tn + fp) 
        cm = confusion_matrix(y_true, y_pred)
        print(f'accuracy: {accuracy:.4f}, precision: {precision:.4f}, recall: {recall:.4f}, specificity: {specificity:.4f}, cm: {cm}')
        return {'accuracy': accuracy, 'precision': precision,  'recall':recall, 'specificity':specificity, 'TN': tn, 'FN': fn, 'FP': fp, 'TP': tp }

       

In [17]:
def make_torch_tens_float(filepath, filename): 
    trainX_df = pd.read_csv(filepath+filename+'_trainX.csv')
    trainy_df = pd.read_csv(filepath+filename+'_train_y.csv')
    testX_df = pd.read_csv(filepath+filename+'_testX.csv')
    testy_df = pd.read_csv(filepath+filename+'_test_y.csv')

    train_x_temp = trainX_df.to_numpy().astype("double") # double 
    test_x_temp = testX_df.to_numpy().astype("double") #double 
    
    train_y_temp = trainy_df.to_numpy().flatten().astype("double") #double 
    test_y_temp = testy_df.to_numpy().flatten().astype("double") #double 
    
    # trainX = torch.from_numpy(train_x_temp)
    # trainy = torch.from_numpy(train_y_temp)
    # testX = torch.from_numpy(test_x_temp)
    # testy = torch.from_numpy(test_y_temp)
    trainX = torch.as_tensor(train_x_temp, dtype=torch.float32)
    trainy = torch.as_tensor(train_y_temp, dtype=torch.float32)
    testX = torch.as_tensor(test_x_temp, dtype=torch.float32)
    testy = torch.as_tensor(test_y_temp, dtype=torch.float32)
    return trainX, trainy, testX, testy

In [18]:
GP_path= '/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/GP/GP_matern_kernel/'
def save_results(trainX, trainy, testX, testy, root_name, n_iterations=300, n_samples=100):
    """
    Train a Dirichlet Gaussian Process model and save the training and test performance results.

    This function trains a Dirichlet GP model on the given training data, evaluates it on both the training
    and test data, and saves various performance metrics and predictions to pandas DataFrames.

    Args:
        trainX (torch.Tensor): The training data features.
        trainy (torch.Tensor): The training data labels.
        testX (torch.Tensor): The test data features.
        testy (torch.Tensor): The test data labels.
        root_name (str): The root name used for labeling the model in the results.
        n_iterations (int, optional): The number of training iterations. Default is 300.
        n_samples (int, optional): The number of samples for prediction. Default is 100.

    Returns:
        train_perf_df (pd.DataFrame): DataFrame containing performance metrics and predictions for the training data.
        test_perf_df (pd.DataFrame): DataFrame containing performance metrics and predictions for the test data.
    """
    likelihood = DirichletClassificationLikelihood(trainy.long(), learn_additional_noise=True)
    model = DirichletGPModel(trainX, likelihood.transformed_targets, likelihood, num_classes=likelihood.num_classes)
    # n_iterations = 300
    trainer = Trainer(model, likelihood, n_iterations)
    trainer.train(trainX, trainy) 
  
    train_dist, train_observed_pred, train_pred_means, train_pred  = trainer.predict(trainX)
    train_results = trainer.gp_results(trainX, trainy)
    test_dist, test_observed_pred, test_pred_means, test_pred  = trainer.predict(testX)
    test_results = trainer.gp_results(testX, testy)
    
    train_observed_pred.mean.numpy()
    train_pred_variance2D = train_observed_pred.variance.numpy()
    test_observed_pred.mean.numpy()
    test_pred_variance2D=test_observed_pred.variance.numpy()
    
    train_pred_samples = train_dist.sample(torch.Size((256,))).exp()
    train_probabilities = (train_pred_samples / train_pred_samples.sum(-2, keepdim=True)).mean(0)

    train_prob_stds = (train_pred_samples / train_pred_samples.sum(-2, keepdim=True)).std(0)

    test_pred_samples = test_dist.sample(torch.Size((100,))).exp()

    test_probabilities = (test_pred_samples / test_pred_samples.sum(-2, keepdim=True)).mean(0)
    test_prob_stds = (test_pred_samples / test_pred_samples.sum(-2, keepdim=True)).std(0)

 
    train_perf_df = pd.DataFrame()
    test_perf_df = pd.DataFrame()
    train_perf_df['mean_pred_class0'] = train_observed_pred.mean.numpy()[0,]
    train_perf_df['mean_pred_class1'] = train_observed_pred.mean.numpy()[1,]
    train_perf_df['y'] = trainy
    train_perf_df['y_pred'] = train_pred_means.max(0)[1]
    train_perf_df['var_pred_class0']=train_observed_pred.variance.numpy()[0,]
    train_perf_df['var_pred_class1']=train_observed_pred.variance.numpy()[1,]
    train_perf_df['pred_prob_class0'] = train_probabilities.numpy()[0,]
    train_perf_df['pred_prob_class1'] = train_probabilities.numpy()[1,]
    train_perf_df['pred_prob_std_class0'] = train_prob_stds.numpy()[0,]
    train_perf_df['pred_prob_std_class1'] = train_prob_stds.numpy()[1,]
    train_perf_df['model'] = f'{root_name}_GP_Dirichlet_matern'
    train_perf_df['subset'] = 'train' 
    train_cm = confusion_matrix(trainy, train_perf_df['y_pred'])
    cm_flattened = train_cm.flatten().tolist()
    train_perf_df['cm']= [cm_flattened]* len(train_perf_df)
    train_perf_df['prediction_type'] = train_perf_df.apply(lambda x: prediction_type(x['y'], x['y_pred']), axis=1)
    train_perf_df['ROC-AUC'] = roc_auc_score(trainy, train_perf_df['y_pred'])
    train_perf_df['MCC'] = matthews_corrcoef(trainy, train_perf_df['y_pred'])
    train_perf_df['Balanced Accuracy'] = balanced_accuracy_score(trainy, train_perf_df['y_pred'])
    train_perf_df['f1'] = f1_score(trainy, train_perf_df['y_pred'])
    
    test_perf_df['mean_pred_class0'] = test_observed_pred.mean.numpy()[0,]
    test_perf_df['mean_pred_class1'] = test_observed_pred.mean.numpy()[1,]
    test_perf_df['y'] = testy
    test_perf_df['y_pred'] = test_pred_means.max(0)[1]
    test_perf_df['var_pred_class0']=test_observed_pred.variance.numpy()[0,]
    test_perf_df['var_pred_class1']=test_observed_pred.variance.numpy()[1,]
    test_perf_df['pred_prob_class0'] = test_probabilities.numpy()[0,]
    test_perf_df['pred_prob_class1'] = test_probabilities.numpy()[1,]
    test_perf_df['pred_prob_std_class0'] =test_prob_stds.numpy()[0,]
    test_perf_df['pred_prob_std_class1'] = test_prob_stds.numpy()[1,]
    test_perf_df['model'] = f'{root_name}_GP_Dirichlet_matern'
    test_perf_df['subset'] = 'test' 
    test_cm = confusion_matrix(testy, test_perf_df['y_pred'])
    test_cm_flattened = test_cm.flatten().tolist()
    test_perf_df['cm']= [test_cm_flattened]* len(test_perf_df)
    test_perf_df['prediction_type'] = test_perf_df.apply(lambda x: prediction_type(x['y'], x['y_pred']), axis=1)
    test_perf_df['ROC-AUC'] = roc_auc_score(testy, test_perf_df['y_pred'])
    test_perf_df['MCC'] = matthews_corrcoef(testy, test_perf_df['y_pred'])
    test_perf_df['Balanced Accuracy'] = balanced_accuracy_score(testy, test_perf_df['y_pred'])
    test_perf_df['f1'] = f1_score(testy, test_perf_df['y_pred'])
    with open(f'{GP_path}{root_name}_GP_Dirichlet_matern_model.pkl', 'wb') as f: 
        pickle.dump(model,f)
    with open(f'{GP_path}{root_name}_GP_Dirichlet_matern_likelihood.pkl', 'wb') as f: 
        pickle.dump(likelihood,f)
    for k, val in train_results.items(): 
        train_perf_df[k] = val
    for k, val in test_results.items():
        test_perf_df[k] = val
    return train_perf_df, test_perf_df


In [19]:
data_dir = '/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/'
samplings = ['scaled', 'UNDER', 'SMOTE', 'ADASYN'] 
feat_types = ['moe', 'mfp']
neks = ['2', '3', '5', '9']
GP_path= '/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/GP/GP_matern_kernel/' 
for nek in neks:
    print(f'NEK{nek}')
    bind_inhib = ['binding', 'inhibition']
    if nek in ['3','5']: 
        bind_inhib = ['binding']
    else: 
        bind_inhib = ['binding', 'inhibition']
    for bi in bind_inhib: 
        if bi == 'binding': 
            this_bi = 'bind' 
        if bi == 'inhibition': 
            this_bi = 'inhib'
        nek_path= f'{data_dir}NEK{nek}/{this_bi}/'
        for feat in feat_types: 
            for samp in samplings: 
                print(f'NEK{nek} {bi} {feat} {samp}')
                file_root = f'NEK{nek}_{bi}_{feat}_{samp}'
                trainX, trainy, testX, testy = make_torch_tens_float(nek_path,file_root) 
                print(f'trainX:{trainX.shape}, train y: {trainy.shape}, testX: {testX.shape}, test y: {testy.shape}')
                train_perf_df, test_perf_df = save_results(trainX, trainy, testX, testy,file_root, n_iterations=300)
                train_perf_df['NEK'] = f'NEK{nek}_{bi}'
                train_perf_df['strategy'] = f'{samp}'
                train_perf_df['feat_type'] = f'{feat}'
                test_perf_df['NEK'] = f'NEK{nek}_{bi}'
                test_perf_df['strategy'] = f'{samp}'
                test_perf_df['feat_type'] = f'{feat}'
                train_perf_df.to_csv(f'{GP_path}{file_root}_train_GP_matern.csv',index=False) 
                test_perf_df.to_csv(f'{GP_path}{file_root}_test_GP_matern.csv',index=False) 
                print() 

NEK2
NEK2 binding moe scaled
trainX:torch.Size([1125, 306]), train y: torch.Size([1125]), testX: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.108   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.483   lengthscale: 1.119   noise: 0.809
Iter 21/300 - Loss: 4.469   lengthscale: 1.801   noise: 1.072
Iter 31/300 - Loss: 3.758   lengthscale: 2.796   noise: 1.316
Iter 41/300 - Loss: 3.394   lengthscale: 4.012   noise: 1.427
Iter 51/300 - Loss: 3.283   lengthscale: 5.080   noise: 1.419
Iter 61/300 - Loss: 3.238   lengthscale: 5.995   noise: 1.345
Iter 71/300 - Loss: 3.212   lengthscale: 6.756   noise: 1.240
Iter 81/300 - Loss: 3.197   lengthscale: 7.395   noise: 1.121
Iter 91/300 - Loss: 3.169   lengthscale: 7.970   noise: 0.998
Iter 101/300 - Loss: 3.152   lengthscale: 8.453   noise: 0.876
Iter 111/300 - Loss: 3.145   lengthscale: 8.898   noise: 0.761
Iter 121/300 - Loss: 3.131   lengthscale: 9.313   noise: 0.659
Iter 131/300 - Loss: 3.116   lengthscale: 9.7

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9600, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1080    0]
 [  45    0]]
accuracy: 0.9576, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[271   0]
 [ 12   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



NEK2 binding moe UNDER
trainX:torch.Size([90, 306]), train y: torch.Size([90]), testX: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.931   lengthscale: 1.264   noise: 1.297
Iter 21/300 - Loss: 5.437   lengthscale: 2.051   noise: 1.982
Iter 31/300 - Loss: 5.260   lengthscale: 3.155   noise: 2.571
Iter 41/300 - Loss: 5.202   lengthscale: 4.419   noise: 3.001
Iter 51/300 - Loss: 5.183   lengthscale: 5.572   noise: 3.285
Iter 61/300 - Loss: 5.175   lengthscale: 6.518   noise: 3.455
Iter 71/300 - Loss: 5.170   lengthscale: 7.291   noise: 3.539
Iter 81/300 - Loss: 5.167   lengthscale: 7.937   noise: 3.563
Iter 91/300 - Loss: 5.164   lengthscale: 8.492   noise: 3.546
Iter 101/300 - Loss: 5.162   lengthscale: 8.981   noise: 3.504
Iter 111/300 - Loss: 5.160   lengthscale: 9.417   noise: 3.444
Iter 121/300 - Loss: 5.158   lengthscale: 9.811   noise: 3.372
Iter 131/300 - Loss: 5.157   lengthscale: 10.167   noi



trainX:torch.Size([2160, 306]), train y: torch.Size([2160]), testX: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 6.847   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.479   lengthscale: 1.319   noise: 1.237
Iter 21/300 - Loss: 4.810   lengthscale: 2.171   noise: 1.450
Iter 31/300 - Loss: 4.417   lengthscale: 3.128   noise: 1.284
Iter 41/300 - Loss: 4.110   lengthscale: 4.067   noise: 0.875
Iter 51/300 - Loss: 3.919   lengthscale: 4.853   noise: 0.456
Iter 61/300 - Loss: 3.780   lengthscale: 5.460   noise: 0.201
Iter 71/300 - Loss: 3.705   lengthscale: 5.940   noise: 0.095
Iter 81/300 - Loss: 3.660   lengthscale: 6.339   noise: 0.056
Iter 91/300 - Loss: 3.621   lengthscale: 6.685   noise: 0.038
Iter 101/300 - Loss: 3.603   lengthscale: 6.991   noise: 0.029
Iter 111/300 - Loss: 3.596   lengthscale: 7.268   noise: 0.024
Iter 121/300 - Loss: 3.581   lengthscale: 7.521   noise: 0.020
Iter 131/300 - Loss: 3.574   lengthscale: 7.757   noise: 0.017
Iter 141/30



accuracy: 0.9995, precision: 0.9991, recall: 1.0000, specificity: 0.9991, cm: [[1079    1]
 [   0 1080]]
accuracy: 0.9647, precision: 0.7500, recall: 0.2500, specificity: 0.9963, cm: [[270   1]
 [  9   3]]

NEK2 binding moe ADASYN
trainX:torch.Size([2158, 306]), train y: torch.Size([2158]), testX: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 6.890   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.538   lengthscale: 1.321   noise: 1.253
Iter 21/300 - Loss: 4.850   lengthscale: 2.183   noise: 1.532
Iter 31/300 - Loss: 4.445   lengthscale: 3.165   noise: 1.371
Iter 41/300 - Loss: 4.142   lengthscale: 4.127   noise: 0.937
Iter 51/300 - Loss: 3.935   lengthscale: 4.931   noise: 0.492
Iter 61/300 - Loss: 3.815   lengthscale: 5.554   noise: 0.218
Iter 71/300 - Loss: 3.737   lengthscale: 6.046   noise: 0.103
Iter 81/300 - Loss: 3.687   lengthscale: 6.455   noise: 0.060
Iter 91/300 - Loss: 3.652   lengthscale: 6.808   noise: 0.041
Iter 101/300 - Loss: 3.624   len



accuracy: 0.9995, precision: 0.9991, recall: 1.0000, specificity: 0.9991, cm: [[1079    1]
 [   0 1078]]
accuracy: 0.9682, precision: 1.0000, recall: 0.2500, specificity: 1.0000, cm: [[271   0]
 [  9   3]]

NEK2 binding mfp scaled
trainX:torch.Size([1125, 2048]), train y: torch.Size([1125]), testX: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.093   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.102   lengthscale: 1.270   noise: 0.806
Iter 21/300 - Loss: 3.525   lengthscale: 2.048   noise: 0.978
Iter 31/300 - Loss: 3.258   lengthscale: 2.768   noise: 1.018
Iter 41/300 - Loss: 3.195   lengthscale: 3.268   noise: 0.968
Iter 51/300 - Loss: 3.164   lengthscale: 3.597   noise: 0.878
Iter 61/300 - Loss: 3.139   lengthscale: 3.823   noise: 0.771
Iter 71/300 - Loss: 3.127   lengthscale: 3.993   noise: 0.663
Iter 81/300 - Loss: 3.113   lengthscale: 4.129   noise: 0.562
Iter 91/300 - Loss: 3.099   lengthscale: 4.247   noise: 0.475
Iter 101/300 - Loss: 3.092   l

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9600, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1080    0]
 [  45    0]]
accuracy: 0.9576, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[271   0]
 [ 12   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



NEK2 binding mfp UNDER
trainX:torch.Size([90, 2048]), train y: torch.Size([90]), testX: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.106   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.922   lengthscale: 1.294   noise: 1.297
Iter 21/300 - Loss: 5.419   lengthscale: 2.156   noise: 1.983
Iter 31/300 - Loss: 5.285   lengthscale: 2.930   noise: 2.605
Iter 41/300 - Loss: 5.247   lengthscale: 3.127   noise: 3.113
Iter 51/300 - Loss: 5.231   lengthscale: 2.952   noise: 3.489
Iter 61/300 - Loss: 5.222   lengthscale: 2.618   noise: 3.746
Iter 71/300 - Loss: 5.216   lengthscale: 2.232   noise: 3.909
Iter 81/300 - Loss: 5.212   lengthscale: 1.868   noise: 4.007
Iter 91/300 - Loss: 5.209   lengthscale: 1.571   noise: 4.062
Iter 101/300 - Loss: 5.207   lengthscale: 1.337   noise: 4.092
Iter 111/300 - Loss: 5.206   lengthscale: 1.148   noise: 4.108
Iter 121/300 - Loss: 5.205   lengthscale: 0.994   noise: 4.117
Iter 131/300 - Loss: 5.204   lengthscale: 0.870   no



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[45  0]
 [ 0 45]]
accuracy: 0.0565, precision: 0.0430, recall: 1.0000, specificity: 0.0148, cm: [[  4 267]
 [  0  12]]

NEK2 binding mfp SMOTE
trainX:torch.Size([2160, 2048]), train y: torch.Size([2160]), testX: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 5.986   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.619   lengthscale: 1.297   noise: 0.786
Iter 21/300 - Loss: 3.852   lengthscale: 2.016   noise: 0.668
Iter 31/300 - Loss: 3.576   lengthscale: 2.629   noise: 0.352
Iter 41/300 - Loss: 3.449   lengthscale: 3.034   noise: 0.153
Iter 51/300 - Loss: 3.377   lengthscale: 3.302   noise: 0.072
Iter 61/300 - Loss: 3.350   lengthscale: 3.499   noise: 0.042
Iter 71/300 - Loss: 3.326   lengthscale: 3.658   noise: 0.029
Iter 81/300 - Loss: 3.328   lengthscale: 3.797   noise: 0.022
Iter 91/300 - Loss: 3.305   lengthscale: 3.923   noise: 0.018
Iter 101/300 - Loss: 3.302   lengthscal



accuracy: 0.9954, precision: 0.9991, recall: 0.9917, specificity: 0.9991, cm: [[1079    1]
 [   9 1071]]
accuracy: 0.9647, precision: 1.0000, recall: 0.1667, specificity: 1.0000, cm: [[271   0]
 [ 10   2]]





NEK2 binding mfp ADASYN
trainX:torch.Size([2168, 2048]), train y: torch.Size([2168]), testX: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 6.020   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.624   lengthscale: 1.296   noise: 0.785
Iter 21/300 - Loss: 3.864   lengthscale: 2.007   noise: 0.659
Iter 31/300 - Loss: 3.578   lengthscale: 2.610   noise: 0.343
Iter 41/300 - Loss: 3.448   lengthscale: 3.007   noise: 0.149
Iter 51/300 - Loss: 3.389   lengthscale: 3.270   noise: 0.070
Iter 61/300 - Loss: 3.350   lengthscale: 3.462   noise: 0.041
Iter 71/300 - Loss: 3.334   lengthscale: 3.617   noise: 0.029
Iter 81/300 - Loss: 3.323   lengthscale: 3.753   noise: 0.022
Iter 91/300 - Loss: 3.314   lengthscale: 3.875   noise: 0.018
Iter 101/300 - Loss: 3.322   lengthscale: 3.989   noise: 0.015
Iter 111/300 - Loss: 3.309   lengthscale: 4.097   noise: 0.013
Iter 121/300 - Loss: 3.305   lengthscale: 4.200   noise: 0.012
Iter 131/300 - Loss: 3.302   lengthscale: 4.299



accuracy: 0.9954, precision: 0.9991, recall: 0.9917, specificity: 0.9991, cm: [[1079    1]
 [   9 1079]]
accuracy: 0.9647, precision: 1.0000, recall: 0.1667, specificity: 1.0000, cm: [[271   0]
 [ 10   2]]

NEK2 inhibition moe scaled
trainX:torch.Size([1635, 306]), train y: torch.Size([1635]), testX: torch.Size([409, 306]), test y: torch.Size([409])
Iter 1/300 - Loss: 7.102   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.534   lengthscale: 1.286   noise: 0.809
Iter 21/300 - Loss: 4.523   lengthscale: 2.111   noise: 1.070
Iter 31/300 - Loss: 3.813   lengthscale: 3.224   noise: 1.298
Iter 41/300 - Loss: 3.524   lengthscale: 4.419   noise: 1.387
Iter 51/300 - Loss: 3.447   lengthscale: 5.378   noise: 1.365
Iter 61/300 - Loss: 3.410   lengthscale: 6.082   noise: 1.285
Iter 71/300 - Loss: 3.379   lengthscale: 6.613   noise: 1.174
Iter 81/300 - Loss: 3.362   lengthscale: 7.036   noise: 1.050
Iter 91/300 - Loss: 3.345   lengthscale: 7.387   noise: 0.922
Iter 101/300 - Loss: 3.321   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9315, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1523    0]
 [ 112    0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9315, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[381   0]
 [ 28   0]]

NEK2 inhibition moe UNDER
trainX:torch.Size([224, 306]), train y: torch.Size([224]), testX: torch.Size([409, 306]), test y: torch.Size([409])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.930   lengthscale: 1.287   noise: 1.297
Iter 21/300 - Loss: 5.428   lengthscale: 2.098   noise: 1.980
Iter 31/300 - Loss: 5.207   lengthscale: 3.203   noise: 2.552
Iter 41/300 - Loss: 5.074   lengthscale: 4.519   noise: 2.888
Iter 51/300 - Loss: 4.989   lengthscale: 5.815   noise: 2.947
Iter 61/300 - Loss: 4.934   lengthscale: 6.931   noise: 2.771
Iter 71/300 - Loss: 4.892   lengthscale: 7.848   noise: 2.440
Iter 81/300 - Loss: 4.856   lengthscale: 8.610   noise: 2.031
Iter 91/300 - Loss: 4.825   lengthscale: 9.264   noise: 1.607
Iter 101/300 - Loss: 4.798   lengthscale: 9.844   noise: 1.218
Iter 111/300 - Loss: 4.776   lengthscale: 10.368   noise: 0.899
Iter 121/3



trainX:torch.Size([3046, 306]), train y: torch.Size([3046]), testX: torch.Size([409, 306]), test y: torch.Size([409])
Iter 1/300 - Loss: 6.910   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.563   lengthscale: 1.320   noise: 1.261
Iter 21/300 - Loss: 4.840   lengthscale: 2.176   noise: 1.573
Iter 31/300 - Loss: 4.387   lengthscale: 3.171   noise: 1.382
Iter 41/300 - Loss: 4.105   lengthscale: 4.105   noise: 0.904
Iter 51/300 - Loss: 3.928   lengthscale: 4.844   noise: 0.464
Iter 61/300 - Loss: 3.810   lengthscale: 5.412   noise: 0.209
Iter 71/300 - Loss: 3.725   lengthscale: 5.872   noise: 0.103
Iter 81/300 - Loss: 3.675   lengthscale: 6.265   noise: 0.062
Iter 91/300 - Loss: 3.646   lengthscale: 6.612   noise: 0.043
Iter 101/300 - Loss: 3.625   lengthscale: 6.926   noise: 0.033
Iter 111/300 - Loss: 3.610   lengthscale: 7.214   noise: 0.027
Iter 121/300 - Loss: 3.596   lengthscale: 7.480   noise: 0.023
Iter 131/300 - Loss: 3.586   lengthscale: 7.729   noise: 0.019
Iter 141/30



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[1523    0]
 [   0 1523]]
accuracy: 0.9633, precision: 0.8421, recall: 0.5714, specificity: 0.9921, cm: [[378   3]
 [ 12  16]]





NEK2 inhibition moe ADASYN
trainX:torch.Size([3037, 306]), train y: torch.Size([3037]), testX: torch.Size([409, 306]), test y: torch.Size([409])
Iter 1/300 - Loss: 6.922   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.587   lengthscale: 1.320   noise: 1.264
Iter 21/300 - Loss: 4.878   lengthscale: 2.175   noise: 1.605
Iter 31/300 - Loss: 4.431   lengthscale: 3.176   noise: 1.433
Iter 41/300 - Loss: 4.142   lengthscale: 4.125   noise: 0.952
Iter 51/300 - Loss: 3.967   lengthscale: 4.886   noise: 0.496
Iter 61/300 - Loss: 3.835   lengthscale: 5.476   noise: 0.224
Iter 71/300 - Loss: 3.758   lengthscale: 5.954   noise: 0.110
Iter 81/300 - Loss: 3.709   lengthscale: 6.361   noise: 0.065
Iter 91/300 - Loss: 3.683   lengthscale: 6.720   noise: 0.045
Iter 101/300 - Loss: 3.661   lengthscale: 7.045   noise: 0.034
Iter 111/300 - Loss: 3.639   lengthscale: 7.342   noise: 0.028
Iter 121/300 - Loss: 3.631   lengthscale: 7.615   noise: 0.023
Iter 131/300 - Loss: 3.618   lengthscale: 7.87



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[1523    0]
 [   0 1514]]
accuracy: 0.9584, precision: 0.7619, recall: 0.5714, specificity: 0.9869, cm: [[376   5]
 [ 12  16]]





NEK2 inhibition mfp scaled
trainX:torch.Size([1635, 2048]), train y: torch.Size([1635]), testX: torch.Size([409, 2048]), test y: torch.Size([409])
Iter 1/300 - Loss: 7.078   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.885   lengthscale: 1.296   noise: 0.802
Iter 21/300 - Loss: 3.573   lengthscale: 2.100   noise: 0.936
Iter 31/300 - Loss: 3.392   lengthscale: 2.755   noise: 0.947
Iter 41/300 - Loss: 3.339   lengthscale: 3.189   noise: 0.880
Iter 51/300 - Loss: 3.301   lengthscale: 3.472   noise: 0.776
Iter 61/300 - Loss: 3.285   lengthscale: 3.669   noise: 0.661
Iter 71/300 - Loss: 3.275   lengthscale: 3.816   noise: 0.551
Iter 81/300 - Loss: 3.261   lengthscale: 3.935   noise: 0.453
Iter 91/300 - Loss: 3.248   lengthscale: 4.039   noise: 0.372
Iter 101/300 - Loss: 3.240   lengthscale: 4.133   noise: 0.308
Iter 111/300 - Loss: 3.233   lengthscale: 4.219   noise: 0.258
Iter 121/300 - Loss: 3.228   lengthscale: 4.300   noise: 0.220
Iter 131/300 - Loss: 3.232   lengthscale: 4.



accuracy: 0.9339, precision: 1.0000, recall: 0.0357, specificity: 1.0000, cm: [[1523    0]
 [ 108    4]]
accuracy: 0.9340, precision: 1.0000, recall: 0.0357, specificity: 1.0000, cm: [[381   0]
 [ 27   1]]





NEK2 inhibition mfp UNDER
trainX:torch.Size([224, 2048]), train y: torch.Size([224]), testX: torch.Size([409, 2048]), test y: torch.Size([409])
Iter 1/300 - Loss: 7.106   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.892   lengthscale: 1.291   noise: 1.296
Iter 21/300 - Loss: 5.388   lengthscale: 2.134   noise: 1.978
Iter 31/300 - Loss: 5.277   lengthscale: 2.763   noise: 2.611
Iter 41/300 - Loss: 5.244   lengthscale: 2.972   noise: 3.126
Iter 51/300 - Loss: 5.231   lengthscale: 2.962   noise: 3.502
Iter 61/300 - Loss: 5.225   lengthscale: 2.863   noise: 3.759
Iter 71/300 - Loss: 5.222   lengthscale: 2.732   noise: 3.928
Iter 81/300 - Loss: 5.220   lengthscale: 2.587   noise: 4.035
Iter 91/300 - Loss: 5.218   lengthscale: 2.434   noise: 4.100
Iter 101/300 - Loss: 5.215   lengthscale: 2.271   noise: 4.138
Iter 111/300 - Loss: 5.213   lengthscale: 2.098   noise: 4.158
Iter 121/300 - Loss: 5.211   lengthscale: 1.915   noise: 4.167
Iter 131/300 - Loss: 5.208   lengthscale: 1.727



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[112   0]
 [  0 112]]
accuracy: 0.3007, precision: 0.0757, recall: 0.8214, specificity: 0.2625, cm: [[100 281]
 [  5  23]]

NEK2 inhibition mfp SMOTE
trainX:torch.Size([3046, 2048]), train y: torch.Size([3046]), testX: torch.Size([409, 2048]), test y: torch.Size([409])
Iter 1/300 - Loss: 6.187   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.573   lengthscale: 1.302   noise: 0.774
Iter 21/300 - Loss: 3.825   lengthscale: 2.023   noise: 0.597
Iter 31/300 - Loss: 3.595   lengthscale: 2.588   noise: 0.302
Iter 41/300 - Loss: 3.484   lengthscale: 2.959   noise: 0.133
Iter 51/300 - Loss: 3.427   lengthscale: 3.216   noise: 0.065
Iter 61/300 - Loss: 3.392   lengthscale: 3.414   noise: 0.039
Iter 71/300 - Loss: 3.376   lengthscale: 3.583   noise: 0.028
Iter 81/300 - Loss: 3.368   lengthscale: 3.736   noise: 0.021
Iter 91/300 - Loss: 3.361   lengthscale: 3.877   noise: 0.017
Iter 101/300 - Loss: 3.358   len



accuracy: 0.9961, precision: 0.9974, recall: 0.9947, specificity: 0.9974, cm: [[1519    4]
 [   8 1515]]
accuracy: 0.9633, precision: 0.8824, recall: 0.5357, specificity: 0.9948, cm: [[379   2]
 [ 13  15]]

NEK2 inhibition mfp ADASYN
trainX:torch.Size([3027, 2048]), train y: torch.Size([3027]), testX: torch.Size([409, 2048]), test y: torch.Size([409])
Iter 1/300 - Loss: 6.062   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.545   lengthscale: 1.297   noise: 0.772
Iter 21/300 - Loss: 3.816   lengthscale: 2.002   noise: 0.585
Iter 31/300 - Loss: 3.586   lengthscale: 2.558   noise: 0.293
Iter 41/300 - Loss: 3.473   lengthscale: 2.926   noise: 0.129
Iter 51/300 - Loss: 3.420   lengthscale: 3.182   noise: 0.064
Iter 61/300 - Loss: 3.385   lengthscale: 3.379   noise: 0.039
Iter 71/300 - Loss: 3.367   lengthscale: 3.545   noise: 0.028
Iter 81/300 - Loss: 3.366   lengthscale: 3.693   noise: 0.022
Iter 91/300 - Loss: 3.351   lengthscale: 3.830   noise: 0.018
Iter 101/300 - Loss: 3.343 



accuracy: 0.9967, precision: 0.9973, recall: 0.9960, specificity: 0.9974, cm: [[1519    4]
 [   6 1498]]
accuracy: 0.9633, precision: 0.8824, recall: 0.5357, specificity: 0.9948, cm: [[379   2]
 [ 13  15]]

NEK3
NEK3 binding moe scaled
trainX:torch.Size([1122, 306]), train y: torch.Size([1122]), testX: torch.Size([282, 306]), test y: torch.Size([282])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.518   lengthscale: 1.062   noise: 0.809
Iter 21/300 - Loss: 4.522   lengthscale: 1.602   noise: 1.072
Iter 31/300 - Loss: 3.856   lengthscale: 2.323   noise: 1.317
Iter 41/300 - Loss: 3.515   lengthscale: 3.159   noise: 1.432
Iter 51/300 - Loss: 3.408   lengthscale: 3.899   noise: 1.435
Iter 61/300 - Loss: 3.374   lengthscale: 4.490   noise: 1.377
Iter 71/300 - Loss: 3.356   lengthscale: 5.185   noise: 1.291
Iter 81/300 - Loss: 3.341   lengthscale: 5.912   noise: 1.192
Iter 91/300 - Loss: 3.319   lengthscale: 6.673   noise: 1.089
Iter 101/300 - Loss: 3.311 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9430, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1058    0]
 [  64    0]]
accuracy: 0.9397, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[265   0]
 [ 17   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



NEK3 binding moe UNDER
trainX:torch.Size([128, 306]), train y: torch.Size([128]), testX: torch.Size([282, 306]), test y: torch.Size([282])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.931   lengthscale: 1.259   noise: 1.297
Iter 21/300 - Loss: 5.439   lengthscale: 2.052   noise: 1.982
Iter 31/300 - Loss: 5.263   lengthscale: 3.154   noise: 2.573
Iter 41/300 - Loss: 5.206   lengthscale: 4.437   noise: 3.009
Iter 51/300 - Loss: 5.188   lengthscale: 5.570   noise: 3.302
Iter 61/300 - Loss: 5.182   lengthscale: 6.404   noise: 3.483
Iter 71/300 - Loss: 5.180   lengthscale: 6.993   noise: 3.582
Iter 81/300 - Loss: 5.178   lengthscale: 7.424   noise: 3.625
Iter 91/300 - Loss: 5.177   lengthscale: 7.759   noise: 3.630
Iter 101/300 - Loss: 5.177   lengthscale: 8.032   noise: 3.611
Iter 111/300 - Loss: 5.176   lengthscale: 8.263   noise: 3.576
Iter 121/300 - Loss: 5.176   lengthscale: 8.462   noise: 3.531
Iter 131/300 - Loss: 5.175   lengthscale: 8.634   no



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[64  0]
 [ 0 64]]
accuracy: 0.6064, precision: 0.1017, recall: 0.7059, specificity: 0.6000, cm: [[159 106]
 [  5  12]]

NEK3 binding moe SMOTE
trainX:torch.Size([2116, 306]), train y: torch.Size([2116]), testX: torch.Size([282, 306]), test y: torch.Size([282])
Iter 1/300 - Loss: 6.937   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.616   lengthscale: 1.319   noise: 1.270
Iter 21/300 - Loss: 4.951   lengthscale: 2.186   noise: 1.660
Iter 31/300 - Loss: 4.580   lengthscale: 3.182   noise: 1.575
Iter 41/300 - Loss: 4.318   lengthscale: 4.161   noise: 1.138
Iter 51/300 - Loss: 4.118   lengthscale: 4.998   noise: 0.637
Iter 61/300 - Loss: 3.983   lengthscale: 5.667   noise: 0.295
Iter 71/300 - Loss: 3.906   lengthscale: 6.207   noise: 0.138
Iter 81/300 - Loss: 3.864   lengthscale: 6.660   noise: 0.077
Iter 91/300 - Loss: 3.828   lengthscale: 7.052   noise: 0.052
Iter 101/300 - Loss: 3.820   lengthscale:



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[1058    0]
 [   0 1058]]
accuracy: 0.9255, precision: 0.2500, recall: 0.1176, specificity: 0.9774, cm: [[259   6]
 [ 15   2]]

NEK3 binding moe ADASYN
trainX:torch.Size([2113, 306]), train y: torch.Size([2113]), testX: torch.Size([282, 306]), test y: torch.Size([282])
Iter 1/300 - Loss: 6.942   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.620   lengthscale: 1.321   noise: 1.270
Iter 21/300 - Loss: 4.958   lengthscale: 2.186   noise: 1.661
Iter 31/300 - Loss: 4.592   lengthscale: 3.181   noise: 1.578
Iter 41/300 - Loss: 4.318   lengthscale: 4.160   noise: 1.144
Iter 51/300 - Loss: 4.129   lengthscale: 5.001   noise: 0.641
Iter 61/300 - Loss: 4.001   lengthscale: 5.674   noise: 0.297
Iter 71/300 - Loss: 3.920   lengthscale: 6.219   noise: 0.139
Iter 81/300 - Loss: 3.875   lengthscale: 6.677   noise: 0.078
Iter 91/300 - Loss: 3.831   lengthscale: 7.074   noise: 0.052
Iter 101/300 - Loss: 3.812   len



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[1058    0]
 [   0 1055]]
accuracy: 0.9184, precision: 0.2000, recall: 0.1176, specificity: 0.9698, cm: [[257   8]
 [ 15   2]]

NEK3 binding mfp scaled
trainX:torch.Size([1122, 2048]), train y: torch.Size([1122]), testX: torch.Size([282, 2048]), test y: torch.Size([282])
Iter 1/300 - Loss: 7.086   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.163   lengthscale: 1.266   noise: 0.806
Iter 21/300 - Loss: 3.675   lengthscale: 2.041   noise: 0.988
Iter 31/300 - Loss: 3.417   lengthscale: 2.777   noise: 1.054
Iter 41/300 - Loss: 3.348   lengthscale: 3.288   noise: 1.036
Iter 51/300 - Loss: 3.324   lengthscale: 3.628   noise: 0.974
Iter 61/300 - Loss: 3.310   lengthscale: 3.862   noise: 0.894
Iter 71/300 - Loss: 3.296   lengthscale: 4.032   noise: 0.808
Iter 81/300 - Loss: 3.287   lengthscale: 4.172   noise: 0.725
Iter 91/300 - Loss: 3.278   lengthscale: 4.288   noise: 0.649
Iter 101/300 - Loss: 3.273   l

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9430, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1058    0]
 [  64    0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9397, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[265   0]
 [ 17   0]]

NEK3 binding mfp UNDER
trainX:torch.Size([128, 2048]), train y: torch.Size([128]), testX: torch.Size([282, 2048]), test y: torch.Size([282])
Iter 1/300 - Loss: 7.106   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.914   lengthscale: 1.285   noise: 1.296
Iter 21/300 - Loss: 5.395   lengthscale: 2.143   noise: 1.976
Iter 31/300 - Loss: 5.261   lengthscale: 2.951   noise: 2.579
Iter 41/300 - Loss: 5.226   lengthscale: 3.325   noise: 3.053
Iter 51/300 - Loss: 5.213   lengthscale: 3.419   noise: 3.388
Iter 61/300 - Loss: 5.209   lengthscale: 3.394   noise: 3.608
Iter 71/300 - Loss: 5.207   lengthscale: 3.328   noise: 3.745
Iter 81/300 - Loss: 5.206   lengthscale: 3.248   noise: 3.824
Iter 91/300 - Loss: 5.205   lengthscale: 3.163   noise: 3.867
Iter 101/300 - Loss: 5.205   lengthscale: 3.074   noise: 3.886
Iter 111/300 - Loss: 5.204   lengthscale: 2.980   noise: 3.891
Iter 121/300



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[64  0]
 [ 0 64]]
accuracy: 0.4929, precision: 0.0909, recall: 0.8235, specificity: 0.4717, cm: [[125 140]
 [  3  14]]

NEK3 binding mfp SMOTE
trainX:torch.Size([2116, 2048]), train y: torch.Size([2116]), testX: torch.Size([282, 2048]), test y: torch.Size([282])
Iter 1/300 - Loss: 6.221   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.724   lengthscale: 1.299   noise: 0.790
Iter 21/300 - Loss: 3.990   lengthscale: 2.019   noise: 0.717
Iter 31/300 - Loss: 3.719   lengthscale: 2.634   noise: 0.403
Iter 41/300 - Loss: 3.583   lengthscale: 3.043   noise: 0.179
Iter 51/300 - Loss: 3.521   lengthscale: 3.314   noise: 0.081
Iter 61/300 - Loss: 3.490   lengthscale: 3.513   noise: 0.045
Iter 71/300 - Loss: 3.459   lengthscale: 3.673   noise: 0.030
Iter 81/300 - Loss: 3.446   lengthscale: 3.814   noise: 0.023
Iter 91/300 - Loss: 3.444   lengthscale: 3.941   noise: 0.019
Iter 101/300 - Loss: 3.438   lengthscal



accuracy: 0.9981, precision: 1.0000, recall: 0.9962, specificity: 1.0000, cm: [[1058    0]
 [   4 1054]]
accuracy: 0.9574, precision: 0.8571, recall: 0.3529, specificity: 0.9962, cm: [[264   1]
 [ 11   6]]

NEK3 binding mfp ADASYN
trainX:torch.Size([2130, 2048]), train y: torch.Size([2130]), testX: torch.Size([282, 2048]), test y: torch.Size([282])
Iter 1/300 - Loss: 6.194   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.720   lengthscale: 1.297   noise: 0.788
Iter 21/300 - Loss: 3.990   lengthscale: 2.006   noise: 0.709
Iter 31/300 - Loss: 3.712   lengthscale: 2.613   noise: 0.394
Iter 41/300 - Loss: 3.581   lengthscale: 3.016   noise: 0.173
Iter 51/300 - Loss: 3.515   lengthscale: 3.283   noise: 0.079
Iter 61/300 - Loss: 3.474   lengthscale: 3.479   noise: 0.044
Iter 71/300 - Loss: 3.460   lengthscale: 3.636   noise: 0.030
Iter 81/300 - Loss: 3.443   lengthscale: 3.774   noise: 0.023
Iter 91/300 - Loss: 3.429   lengthscale: 3.900   noise: 0.018
Iter 101/300 - Loss: 3.431   l



accuracy: 0.9981, precision: 1.0000, recall: 0.9963, specificity: 1.0000, cm: [[1058    0]
 [   4 1068]]
accuracy: 0.9539, precision: 0.8333, recall: 0.2941, specificity: 0.9962, cm: [[264   1]
 [ 12   5]]

NEK5
NEK5 binding moe scaled
trainX:torch.Size([989, 306]), train y: torch.Size([989]), testX: torch.Size([248, 306]), test y: torch.Size([248])
Iter 1/300 - Loss: 7.106   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.561   lengthscale: 1.284   noise: 0.809
Iter 21/300 - Loss: 4.609   lengthscale: 2.118   noise: 1.072
Iter 31/300 - Loss: 3.972   lengthscale: 3.221   noise: 1.317
Iter 41/300 - Loss: 3.640   lengthscale: 4.467   noise: 1.437
Iter 51/300 - Loss: 3.546   lengthscale: 5.522   noise: 1.445
Iter 61/300 - Loss: 3.502   lengthscale: 6.294   noise: 1.391
Iter 71/300 - Loss: 3.490   lengthscale: 6.858   noise: 1.307
Iter 81/300 - Loss: 3.471   lengthscale: 7.301   noise: 1.210
Iter 91/300 - Loss: 3.455   lengthscale: 7.669   noise: 1.109
Iter 101/300 - Loss: 3.453   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9221, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[912   0]
 [ 77   0]]
accuracy: 0.9194, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[228   0]
 [ 20   0]]

NEK5 binding moe UNDER
trainX:torch.Size([154, 306]), train y: torch.Size([154]), testX: torch.Size([248, 306]), test y: torch.Size([248])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.931   lengthscale: 1.264   noise: 1.297
Iter 21/300 - Loss: 5.437   lengthscale: 2.063   noise: 1.982


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Iter 31/300 - Loss: 5.251   lengthscale: 3.161   noise: 2.569
Iter 41/300 - Loss: 5.168   lengthscale: 4.490   noise: 2.979
Iter 51/300 - Loss: 5.123   lengthscale: 5.840   noise: 3.202
Iter 61/300 - Loss: 5.099   lengthscale: 7.022   noise: 3.262
Iter 71/300 - Loss: 5.083   lengthscale: 7.984   noise: 3.208
Iter 81/300 - Loss: 5.072   lengthscale: 8.761   noise: 3.080
Iter 91/300 - Loss: 5.064   lengthscale: 9.402   noise: 2.913
Iter 101/300 - Loss: 5.057   lengthscale: 9.946   noise: 2.727
Iter 111/300 - Loss: 5.050   lengthscale: 10.420   noise: 2.534
Iter 121/300 - Loss: 5.045   lengthscale: 10.841   noise: 2.342
Iter 131/300 - Loss: 5.039   lengthscale: 11.219   noise: 2.156
Iter 141/300 - Loss: 5.035   lengthscale: 11.561   noise: 1.977
Iter 151/300 - Loss: 5.030   lengthscale: 11.874   noise: 1.807
Iter 161/300 - Loss: 5.026   lengthscale: 12.161   noise: 1.646
Iter 171/300 - Loss: 5.023   lengthscale: 12.427   noise: 1.493
Iter 181/300 - Loss: 5.019   lengthscale: 12.674   nois



trainX:torch.Size([1824, 306]), train y: torch.Size([1824]), testX: torch.Size([248, 306]), test y: torch.Size([248])
Iter 1/300 - Loss: 6.965   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.647   lengthscale: 1.320   noise: 1.273
Iter 21/300 - Loss: 4.995   lengthscale: 2.184   noise: 1.699
Iter 31/300 - Loss: 4.625   lengthscale: 3.191   noise: 1.658
Iter 41/300 - Loss: 4.367   lengthscale: 4.186   noise: 1.231
Iter 51/300 - Loss: 4.181   lengthscale: 5.032   noise: 0.711
Iter 61/300 - Loss: 4.051   lengthscale: 5.708   noise: 0.338
Iter 71/300 - Loss: 3.965   lengthscale: 6.257   noise: 0.158
Iter 81/300 - Loss: 3.931   lengthscale: 6.719   noise: 0.087
Iter 91/300 - Loss: 3.898   lengthscale: 7.122   noise: 0.057
Iter 101/300 - Loss: 3.866   lengthscale: 7.479   noise: 0.043
Iter 111/300 - Loss: 3.856   lengthscale: 7.802   noise: 0.034
Iter 121/300 - Loss: 3.834   lengthscale: 8.097   noise: 0.028
Iter 131/300 - Loss: 3.824   lengthscale: 8.370   noise: 0.024
Iter 141/30



accuracy: 0.9995, precision: 0.9989, recall: 1.0000, specificity: 0.9989, cm: [[911   1]
 [  0 912]]
accuracy: 0.9516, precision: 0.7857, recall: 0.5500, specificity: 0.9868, cm: [[225   3]
 [  9  11]]

NEK5 binding moe ADASYN
trainX:torch.Size([1831, 306]), train y: torch.Size([1831]), testX: torch.Size([248, 306]), test y: torch.Size([248])
Iter 1/300 - Loss: 6.974   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.672   lengthscale: 1.321   noise: 1.278
Iter 21/300 - Loss: 5.025   lengthscale: 2.189   noise: 1.738
Iter 31/300 - Loss: 4.662   lengthscale: 3.209   noise: 1.738
Iter 41/300 - Loss: 4.406   lengthscale: 4.221   noise: 1.317
Iter 51/300 - Loss: 4.225   lengthscale: 5.093   noise: 0.766
Iter 61/300 - Loss: 4.083   lengthscale: 5.795   noise: 0.365
Iter 71/300 - Loss: 3.996   lengthscale: 6.367   noise: 0.171
Iter 81/300 - Loss: 3.943   lengthscale: 6.849   noise: 0.094
Iter 91/300 - Loss: 3.920   lengthscale: 7.267   noise: 0.062
Iter 101/300 - Loss: 3.892   lengths



accuracy: 0.9995, precision: 0.9989, recall: 1.0000, specificity: 0.9989, cm: [[911   1]
 [  0 919]]
accuracy: 0.9435, precision: 0.7143, recall: 0.5000, specificity: 0.9825, cm: [[224   4]
 [ 10  10]]

NEK5 binding mfp scaled
trainX:torch.Size([989, 2048]), train y: torch.Size([989]), testX: torch.Size([248, 2048]), test y: torch.Size([248])
Iter 1/300 - Loss: 7.091   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.223   lengthscale: 1.291   noise: 0.806
Iter 21/300 - Loss: 3.783   lengthscale: 2.103   noise: 0.988
Iter 31/300 - Loss: 3.526   lengthscale: 2.808   noise: 1.052
Iter 41/300 - Loss: 3.468   lengthscale: 3.279   noise: 1.029
Iter 51/300 - Loss: 3.449   lengthscale: 3.578   noise: 0.960
Iter 61/300 - Loss: 3.429   lengthscale: 3.772   noise: 0.872
Iter 71/300 - Loss: 3.419   lengthscale: 3.913   noise: 0.778
Iter 81/300 - Loss: 3.405   lengthscale: 4.021   noise: 0.686
Iter 91/300 - Loss: 3.400   lengthscale: 4.112   noise: 0.602
Iter 101/300 - Loss: 3.386   lengths

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9221, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[912   0]
 [ 77   0]]
accuracy: 0.9194, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[228   0]
 [ 20   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



NEK5 binding mfp UNDER
trainX:torch.Size([154, 2048]), train y: torch.Size([154]), testX: torch.Size([248, 2048]), test y: torch.Size([248])
Iter 1/300 - Loss: 7.106   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.907   lengthscale: 1.287   noise: 1.296
Iter 21/300 - Loss: 5.385   lengthscale: 2.144   noise: 1.974
Iter 31/300 - Loss: 5.254   lengthscale: 2.924   noise: 2.573
Iter 41/300 - Loss: 5.219   lengthscale: 3.299   noise: 3.038
Iter 51/300 - Loss: 5.207   lengthscale: 3.424   noise: 3.360
Iter 61/300 - Loss: 5.203   lengthscale: 3.443   noise: 3.567
Iter 71/300 - Loss: 5.201   lengthscale: 3.426   noise: 3.690
Iter 81/300 - Loss: 5.200   lengthscale: 3.398   noise: 3.757
Iter 91/300 - Loss: 5.200   lengthscale: 3.368   noise: 3.788
Iter 101/300 - Loss: 5.199   lengthscale: 3.338   noise: 3.796
Iter 111/300 - Loss: 5.199   lengthscale: 3.306   noise: 3.791
Iter 121/300 - Loss: 5.199   lengthscale: 3.272   noise: 3.777
Iter 131/300 - Loss: 5.199   lengthscale: 3.235   



trainX:torch.Size([1824, 2048]), train y: torch.Size([1824]), testX: torch.Size([248, 2048]), test y: torch.Size([248])
Iter 1/300 - Loss: 6.302   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.800   lengthscale: 1.302   noise: 0.792
Iter 21/300 - Loss: 4.059   lengthscale: 2.040   noise: 0.740
Iter 31/300 - Loss: 3.782   lengthscale: 2.678   noise: 0.430
Iter 41/300 - Loss: 3.654   lengthscale: 3.108   noise: 0.194
Iter 51/300 - Loss: 3.584   lengthscale: 3.400   noise: 0.088
Iter 61/300 - Loss: 3.554   lengthscale: 3.617   noise: 0.048
Iter 71/300 - Loss: 3.518   lengthscale: 3.797   noise: 0.031
Iter 81/300 - Loss: 3.513   lengthscale: 3.956   noise: 0.023
Iter 91/300 - Loss: 3.514   lengthscale: 4.102   noise: 0.019
Iter 101/300 - Loss: 3.505   lengthscale: 4.237   noise: 0.016
Iter 111/300 - Loss: 3.492   lengthscale: 4.365   noise: 0.014
Iter 121/300 - Loss: 3.487   lengthscale: 4.485   noise: 0.012
Iter 131/300 - Loss: 3.484   lengthscale: 4.601   noise: 0.011
Iter 141/



accuracy: 0.9984, precision: 0.9989, recall: 0.9978, specificity: 0.9989, cm: [[911   1]
 [  2 910]]
accuracy: 0.9556, precision: 0.9091, recall: 0.5000, specificity: 0.9956, cm: [[227   1]
 [ 10  10]]

NEK5 binding mfp ADASYN
trainX:torch.Size([1805, 2048]), train y: torch.Size([1805]), testX: torch.Size([248, 2048]), test y: torch.Size([248])
Iter 1/300 - Loss: 6.306   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.794   lengthscale: 1.299   noise: 0.792
Iter 21/300 - Loss: 4.064   lengthscale: 2.016   noise: 0.751
Iter 31/300 - Loss: 3.794   lengthscale: 2.633   noise: 0.445
Iter 41/300 - Loss: 3.662   lengthscale: 3.049   noise: 0.203
Iter 51/300 - Loss: 3.582   lengthscale: 3.331   noise: 0.092
Iter 61/300 - Loss: 3.547   lengthscale: 3.541   noise: 0.049
Iter 71/300 - Loss: 3.537   lengthscale: 3.713   noise: 0.032
Iter 81/300 - Loss: 3.511   lengthscale: 3.865   noise: 0.024
Iter 91/300 - Loss: 3.494   lengthscale: 4.004   noise: 0.019
Iter 101/300 - Loss: 3.515   lengt



accuracy: 0.9983, precision: 0.9989, recall: 0.9978, specificity: 0.9989, cm: [[911   1]
 [  2 891]]
accuracy: 0.9516, precision: 1.0000, recall: 0.4000, specificity: 1.0000, cm: [[228   0]
 [ 12   8]]

NEK9
NEK9 binding moe scaled
trainX:torch.Size([1126, 306]), train y: torch.Size([1126]), testX: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.486   lengthscale: 0.930   noise: 0.809
Iter 21/300 - Loss: 4.471   lengthscale: 1.399   noise: 1.072
Iter 31/300 - Loss: 3.763   lengthscale: 2.014   noise: 1.314
Iter 41/300 - Loss: 3.401   lengthscale: 2.740   noise: 1.422
Iter 51/300 - Loss: 3.297   lengthscale: 3.345   noise: 1.410
Iter 61/300 - Loss: 3.255   lengthscale: 3.784   noise: 1.336
Iter 71/300 - Loss: 3.233   lengthscale: 4.216   noise: 1.232
Iter 81/300 - Loss: 3.209   lengthscale: 4.834   noise: 1.115
Iter 91/300 - Loss: 3.190   lengthscale: 5.590   noise: 0.994
Iter 101/300 - Loss: 3.177   le

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9574, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1078    0]
 [  48    0]]
accuracy: 0.9541, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[270   0]
 [ 13   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



NEK9 binding moe UNDER
trainX:torch.Size([96, 306]), train y: torch.Size([96]), testX: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.106   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.929   lengthscale: 1.303   noise: 1.297
Iter 21/300 - Loss: 5.434   lengthscale: 2.161   noise: 1.980
Iter 31/300 - Loss: 5.253   lengthscale: 3.292   noise: 2.565
Iter 41/300 - Loss: 5.186   lengthscale: 4.597   noise: 2.981
Iter 51/300 - Loss: 5.156   lengthscale: 5.902   noise: 3.233
Iter 61/300 - Loss: 5.138   lengthscale: 7.081   noise: 3.347
Iter 71/300 - Loss: 5.127   lengthscale: 8.093   noise: 3.358
Iter 81/300 - Loss: 5.118   lengthscale: 8.951   noise: 3.298
Iter 91/300 - Loss: 5.112   lengthscale: 9.683   noise: 3.195
Iter 101/300 - Loss: 5.106   lengthscale: 10.318   noise: 3.066
Iter 111/300 - Loss: 5.101   lengthscale: 10.877   noise: 2.923
Iter 121/300 - Loss: 5.097   lengthscale: 11.376   noise: 2.773
Iter 131/300 - Loss: 5.093   lengthscale: 11.825   



Iter 1/300 - Loss: 6.889   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.537   lengthscale: 1.319   noise: 1.252
Iter 21/300 - Loss: 4.859   lengthscale: 2.177   noise: 1.529
Iter 31/300 - Loss: 4.458   lengthscale: 3.154   noise: 1.372
Iter 41/300 - Loss: 4.166   lengthscale: 4.114   noise: 0.940
Iter 51/300 - Loss: 3.970   lengthscale: 4.922   noise: 0.496
Iter 61/300 - Loss: 3.853   lengthscale: 5.552   noise: 0.220
Iter 71/300 - Loss: 3.763   lengthscale: 6.055   noise: 0.104
Iter 81/300 - Loss: 3.726   lengthscale: 6.475   noise: 0.061
Iter 91/300 - Loss: 3.684   lengthscale: 6.839   noise: 0.042
Iter 101/300 - Loss: 3.669   lengthscale: 7.162   noise: 0.032
Iter 111/300 - Loss: 3.645   lengthscale: 7.454   noise: 0.026
Iter 121/300 - Loss: 3.641   lengthscale: 7.721   noise: 0.022
Iter 131/300 - Loss: 3.626   lengthscale: 7.968   noise: 0.019
Iter 141/300 - Loss: 3.616   lengthscale: 8.200   noise: 0.016
Iter 151/300 - Loss: 3.612   lengthscale: 8.419   noise: 0.014
Ite



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[1078    0]
 [   0 1078]]
accuracy: 0.9470, precision: 0.0000, recall: 0.0000, specificity: 0.9926, cm: [[268   2]
 [ 13   0]]

NEK9 binding moe ADASYN
trainX:torch.Size([2164, 306]), train y: torch.Size([2164]), testX: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 6.896   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.552   lengthscale: 1.320   noise: 1.256
Iter 21/300 - Loss: 4.880   lengthscale: 2.178   noise: 1.557
Iter 31/300 - Loss: 4.485   lengthscale: 3.160   noise: 1.404
Iter 41/300 - Loss: 4.188   lengthscale: 4.128   noise: 0.960
Iter 51/300 - Loss: 3.996   lengthscale: 4.946   noise: 0.504
Iter 61/300 - Loss: 3.865   lengthscale: 5.587   noise: 0.224
Iter 71/300 - Loss: 3.778   lengthscale: 6.100   noise: 0.106
Iter 81/300 - Loss: 3.732   lengthscale: 6.529   noise: 0.062
Iter 91/300 - Loss: 3.700   lengthscale: 6.900   noise: 0.042
Iter 101/300 - Loss: 3.680   len



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[1078    0]
 [   0 1086]]
accuracy: 0.9470, precision: 0.0000, recall: 0.0000, specificity: 0.9926, cm: [[268   2]
 [ 13   0]]

NEK9 binding mfp scaled
trainX:torch.Size([1126, 2048]), train y: torch.Size([1126]), testX: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.090   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.111   lengthscale: 1.273   noise: 0.806
Iter 21/300 - Loss: 3.543   lengthscale: 2.043   noise: 0.979
Iter 31/300 - Loss: 3.285   lengthscale: 2.746   noise: 1.023
Iter 41/300 - Loss: 3.215   lengthscale: 3.232   noise: 0.979
Iter 51/300 - Loss: 3.187   lengthscale: 3.568   noise: 0.893
Iter 61/300 - Loss: 3.168   lengthscale: 3.797   noise: 0.790
Iter 71/300 - Loss: 3.151   lengthscale: 3.969   noise: 0.686
Iter 81/300 - Loss: 3.140   lengthscale: 4.108   noise: 0.589
Iter 91/300 - Loss: 3.130   lengthscale: 4.226   noise: 0.504
Iter 101/300 - Loss: 3.125   l

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9574, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1078    0]
 [  48    0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9541, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[270   0]
 [ 13   0]]

NEK9 binding mfp UNDER
trainX:torch.Size([96, 2048]), train y: torch.Size([96]), testX: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.919   lengthscale: 1.280   noise: 1.297
Iter 21/300 - Loss: 5.411   lengthscale: 2.137   noise: 1.979
Iter 31/300 - Loss: 5.276   lengthscale: 2.927   noise: 2.591
Iter 41/300 - Loss: 5.239   lengthscale: 3.192   noise: 3.083
Iter 51/300 - Loss: 5.225   lengthscale: 3.113   noise: 3.440
Iter 61/300 - Loss: 5.218   lengthscale: 2.885   noise: 3.681
Iter 71/300 - Loss: 5.214   lengthscale: 2.599   noise: 3.833
Iter 81/300 - Loss: 5.210   lengthscale: 2.293   noise: 3.922
Iter 91/300 - Loss: 5.207   lengthscale: 1.992   noise: 3.968
Iter 101/300 - Loss: 5.205   lengthscale: 1.723   noise: 3.989
Iter 111/300 - Loss: 5.204   lengthscale: 1.506   noise: 3.996
Iter 121/300 -



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[48  0]
 [ 0 48]]
accuracy: 0.7208, precision: 0.0417, recall: 0.2308, specificity: 0.7444, cm: [[201  69]
 [ 10   3]]

NEK9 binding mfp SMOTE
trainX:torch.Size([2156, 2048]), train y: torch.Size([2156]), testX: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 6.060   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.661   lengthscale: 1.297   noise: 0.786
Iter 21/300 - Loss: 3.902   lengthscale: 2.017   noise: 0.675
Iter 31/300 - Loss: 3.620   lengthscale: 2.632   noise: 0.358
Iter 41/300 - Loss: 3.483   lengthscale: 3.040   noise: 0.155
Iter 51/300 - Loss: 3.425   lengthscale: 3.311   noise: 0.073
Iter 61/300 - Loss: 3.390   lengthscale: 3.510   noise: 0.042
Iter 71/300 - Loss: 3.373   lengthscale: 3.672   noise: 0.029
Iter 81/300 - Loss: 3.361   lengthscale: 3.813   noise: 0.022
Iter 91/300 - Loss: 3.357   lengthscale: 3.941   noise: 0.018
Iter 101/300 - Loss: 3.355   lengthscal



accuracy: 0.9981, precision: 0.9991, recall: 0.9972, specificity: 0.9991, cm: [[1077    1]
 [   3 1075]]
accuracy: 0.9505, precision: 0.0000, recall: 0.0000, specificity: 0.9963, cm: [[269   1]
 [ 13   0]]

NEK9 binding mfp ADASYN
trainX:torch.Size([2166, 2048]), train y: torch.Size([2166]), testX: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 6.022   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.640   lengthscale: 1.296   noise: 0.785
Iter 21/300 - Loss: 3.888   lengthscale: 2.007   noise: 0.665
Iter 31/300 - Loss: 3.609   lengthscale: 2.613   noise: 0.348
Iter 41/300 - Loss: 3.475   lengthscale: 3.014   noise: 0.151
Iter 51/300 - Loss: 3.425   lengthscale: 3.279   noise: 0.071
Iter 61/300 - Loss: 3.378   lengthscale: 3.474   noise: 0.042
Iter 71/300 - Loss: 3.366   lengthscale: 3.632   noise: 0.029
Iter 81/300 - Loss: 3.352   lengthscale: 3.769   noise: 0.022
Iter 91/300 - Loss: 3.346   lengthscale: 3.893   noise: 0.018
Iter 101/300 - Loss: 3.341   l



accuracy: 0.9982, precision: 0.9991, recall: 0.9972, specificity: 0.9991, cm: [[1077    1]
 [   3 1085]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9541, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[270   0]
 [ 13   0]]

NEK9 inhibition moe scaled
trainX:torch.Size([313, 306]), train y: torch.Size([313]), testX: torch.Size([80, 306]), test y: torch.Size([80])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.616   lengthscale: 1.292   noise: 0.808
Iter 21/300 - Loss: 4.727   lengthscale: 2.124   noise: 1.072
Iter 31/300 - Loss: 4.210   lengthscale: 3.238   noise: 1.327
Iter 41/300 - Loss: 3.893   lengthscale: 4.531   noise: 1.479
Iter 51/300 - Loss: 3.750   lengthscale: 5.748   noise: 1.514
Iter 61/300 - Loss: 3.690   lengthscale: 6.699   noise: 1.470
Iter 71/300 - Loss: 3.659   lengthscale: 7.410   noise: 1.382
Iter 81/300 - Loss: 3.637   lengthscale: 7.952   noise: 1.269
Iter 91/300 - Loss: 3.619   lengthscale: 8.388   noise: 1.146
Iter 101/300 - Loss: 3.604   lengthscale: 8.755   noise: 1.018
Iter 111/300 - Loss: 3.590   lengthscale: 9.076   noise: 0.891
Iter 121/300

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



NEK9 inhibition moe UNDER
trainX:torch.Size([66, 306]), train y: torch.Size([66]), testX: torch.Size([80, 306]), test y: torch.Size([80])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.931   lengthscale: 1.254   noise: 1.297
Iter 21/300 - Loss: 5.436   lengthscale: 2.023   noise: 1.982
Iter 31/300 - Loss: 5.243   lengthscale: 3.098   noise: 2.566
Iter 41/300 - Loss: 5.144   lengthscale: 4.422   noise: 2.959
Iter 51/300 - Loss: 5.080   lengthscale: 5.791   noise: 3.132
Iter 61/300 - Loss: 5.036   lengthscale: 7.026   noise: 3.103
Iter 71/300 - Loss: 5.001   lengthscale: 8.079   noise: 2.916
Iter 81/300 - Loss: 4.972   lengthscale: 8.972   noise: 2.621
Iter 91/300 - Loss: 4.945   lengthscale: 9.745   noise: 2.261
Iter 101/300 - Loss: 4.920   lengthscale: 10.431   noise: 1.874
Iter 111/300 - Loss: 4.896   lengthscale: 11.053   noise: 1.494
Iter 121/300 - Loss: 4.875   lengthscale: 11.626   noise: 1.149
Iter 131/300 - Loss: 4.856   lengthscale: 12.158  



trainX:torch.Size([560, 306]), train y: torch.Size([560]), testX: torch.Size([80, 306]), test y: torch.Size([80])
Iter 1/300 - Loss: 6.977   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.673   lengthscale: 1.320   noise: 1.278
Iter 21/300 - Loss: 5.013   lengthscale: 2.193   noise: 1.733
Iter 31/300 - Loss: 4.642   lengthscale: 3.226   noise: 1.718
Iter 41/300 - Loss: 4.355   lengthscale: 4.273   noise: 1.306
Iter 51/300 - Loss: 4.130   lengthscale: 5.218   noise: 0.768
Iter 61/300 - Loss: 3.966   lengthscale: 5.997   noise: 0.364
Iter 71/300 - Loss: 3.860   lengthscale: 6.630   noise: 0.166
Iter 81/300 - Loss: 3.796   lengthscale: 7.160   noise: 0.089
Iter 91/300 - Loss: 3.754   lengthscale: 7.616   noise: 0.057
Iter 101/300 - Loss: 3.726   lengthscale: 8.021   noise: 0.042
Iter 111/300 - Loss: 3.707   lengthscale: 8.387   noise: 0.033
Iter 121/300 - Loss: 3.692   lengthscale: 8.722   noise: 0.027
Iter 131/300 - Loss: 3.680   lengthscale: 9.034   noise: 0.023
Iter 141/300 - 



accuracy: 0.9982, precision: 0.9964, recall: 1.0000, specificity: 0.9964, cm: [[279   1]
 [  0 280]]
accuracy: 0.9500, precision: 0.8571, recall: 0.6667, specificity: 0.9859, cm: [[70  1]
 [ 3  6]]

NEK9 inhibition moe ADASYN
trainX:torch.Size([560, 306]), train y: torch.Size([560]), testX: torch.Size([80, 306]), test y: torch.Size([80])
Iter 1/300 - Loss: 6.968   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.665   lengthscale: 1.320   noise: 1.276
Iter 21/300 - Loss: 5.024   lengthscale: 2.188   noise: 1.728
Iter 31/300 - Loss: 4.667   lengthscale: 3.212   noise: 1.718
Iter 41/300 - Loss: 4.385   lengthscale: 4.260   noise: 1.311
Iter 51/300 - Loss: 4.157   lengthscale: 5.218   noise: 0.770
Iter 61/300 - Loss: 3.991   lengthscale: 6.017   noise: 0.363
Iter 71/300 - Loss: 3.885   lengthscale: 6.669   noise: 0.164
Iter 81/300 - Loss: 3.821   lengthscale: 7.212   noise: 0.088
Iter 91/300 - Loss: 3.780   lengthscale: 7.680   noise: 0.056
Iter 101/300 - Loss: 3.753   lengthscale:



trainX:torch.Size([313, 2048]), train y: torch.Size([313]), testX: torch.Size([80, 2048]), test y: torch.Size([80])
Iter 1/300 - Loss: 7.090   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.452   lengthscale: 1.305   noise: 0.807
Iter 21/300 - Loss: 4.132   lengthscale: 2.165   noise: 1.023
Iter 31/300 - Loss: 3.731   lengthscale: 3.029   noise: 1.120
Iter 41/300 - Loss: 3.637   lengthscale: 3.634   noise: 1.097
Iter 51/300 - Loss: 3.598   lengthscale: 4.018   noise: 1.008
Iter 61/300 - Loss: 3.574   lengthscale: 4.266   noise: 0.889
Iter 71/300 - Loss: 3.555   lengthscale: 4.443   noise: 0.760
Iter 81/300 - Loss: 3.538   lengthscale: 4.581   noise: 0.633
Iter 91/300 - Loss: 3.524   lengthscale: 4.699   noise: 0.518
Iter 101/300 - Loss: 3.512   lengthscale: 4.804   noise: 0.420
Iter 111/300 - Loss: 3.502   lengthscale: 4.900   noise: 0.340
Iter 121/300 - Loss: 3.493   lengthscale: 4.990   noise: 0.276
Iter 131/300 - Loss: 3.486   lengthscale: 5.074   noise: 0.227
Iter 141/300 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.8978, precision: 1.0000, recall: 0.0303, specificity: 1.0000, cm: [[280   0]
 [ 32   1]]
accuracy: 0.8875, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[71  0]
 [ 9  0]]

NEK9 inhibition mfp UNDER
trainX:torch.Size([66, 2048]), train y: torch.Size([66]), testX: torch.Size([80, 2048]), test y: torch.Size([80])
Iter 1/300 - Loss: 7.106   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.932   lengthscale: 1.299   noise: 1.298
Iter 21/300 - Loss: 5.435   lengthscale: 2.168   noise: 1.993
Iter 31/300 - Loss: 5.298   lengthscale: 2.929   noise: 2.633
Iter 41/300 - Loss: 5.258   lengthscale: 3.092   noise: 3.164
Iter 51/300 - Loss: 5.241   lengthscale: 2.869   noise: 3.567
Iter 61/300 - Loss: 5.231   lengthscale: 2.491   noise: 3.853
Iter 71/300 - Loss: 5.224   lengthscale: 2.084   noise: 4.048
Iter 81/300 - Loss: 5.220   lengthscale: 1.728   noise: 4.179
Iter 91/300 - Loss: 5.218   lengthscale: 1.463   noise: 4.272
Iter 101/300 - Loss: 5.217   lengthscale: 



Iter 1/300 - Loss: 6.409   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.962   lengthscale: 1.307   noise: 0.844
Iter 21/300 - Loss: 4.193   lengthscale: 2.093   noise: 0.835
Iter 31/300 - Loss: 3.863   lengthscale: 2.836   noise: 0.526
Iter 41/300 - Loss: 3.705   lengthscale: 3.373   noise: 0.245
Iter 51/300 - Loss: 3.618   lengthscale: 3.748   noise: 0.107
Iter 61/300 - Loss: 3.574   lengthscale: 4.032   noise: 0.055
Iter 71/300 - Loss: 3.549   lengthscale: 4.267   noise: 0.035
Iter 81/300 - Loss: 3.532   lengthscale: 4.474   noise: 0.025
Iter 91/300 - Loss: 3.519   lengthscale: 4.664   noise: 0.020
Iter 101/300 - Loss: 3.509   lengthscale: 4.841   noise: 0.017
Iter 111/300 - Loss: 3.502   lengthscale: 5.007   noise: 0.014
Iter 121/300 - Loss: 3.495   lengthscale: 5.165   noise: 0.013
Iter 131/300 - Loss: 3.490   lengthscale: 5.316   noise: 0.011
Iter 141/300 - Loss: 3.485   lengthscale: 5.461   noise: 0.010
Iter 151/300 - Loss: 3.480   lengthscale: 5.600   noise: 0.009
Ite



accuracy: 0.9982, precision: 0.9964, recall: 1.0000, specificity: 0.9964, cm: [[279   1]
 [  0 280]]
accuracy: 0.9375, precision: 0.8333, recall: 0.5556, specificity: 0.9859, cm: [[70  1]
 [ 4  5]]

NEK9 inhibition mfp ADASYN
trainX:torch.Size([555, 2048]), train y: torch.Size([555]), testX: torch.Size([80, 2048]), test y: torch.Size([80])
Iter 1/300 - Loss: 6.238   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.909   lengthscale: 1.307   noise: 0.794
Iter 21/300 - Loss: 4.190   lengthscale: 2.092   noise: 0.797
Iter 31/300 - Loss: 3.857   lengthscale: 2.844   noise: 0.490
Iter 41/300 - Loss: 3.694   lengthscale: 3.392   noise: 0.222
Iter 51/300 - Loss: 3.610   lengthscale: 3.772   noise: 0.096
Iter 61/300 - Loss: 3.570   lengthscale: 4.056   noise: 0.050
Iter 71/300 - Loss: 3.549   lengthscale: 4.287   noise: 0.032
Iter 81/300 - Loss: 3.534   lengthscale: 4.487   noise: 0.024
Iter 91/300 - Loss: 3.525   lengthscale: 4.669   noise: 0.019
Iter 101/300 - Loss: 3.515   lengthscal



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[280   0]
 [  0 275]]
accuracy: 0.9375, precision: 0.8333, recall: 0.5556, specificity: 0.9859, cm: [[70  1]
 [ 4  5]]



In [20]:
data_dir = '/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/'
GP_path= '/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/GP/GP_matern_kernel/' 
train_results = [] 
test_results = [] 
for nek in neks:
    bind_inhib = ['binding', 'inhibition']
    if nek in ['3','5']: 
        bind_inhib = ['binding']
    for bi in bind_inhib: 
        if bi == 'binding': 
            this_bi = 'bind' 
        if bi == 'inhibition': 
            this_bi = 'inhib'
        nek_path= f'{data_dir}NEK{nek}/{this_bi}/'
        for feat in feat_types: 
            for samp in samplings: 
                print(f'NEK{nek} {this_bi} {feat} {samp}')
                file_root = f'NEK{nek}_{bi}_{feat}_{samp}'
                train = pd.read_csv(f'{GP_path}{file_root}_train_GP_matern.csv').iloc[0]
                test = pd.read_csv(f'{GP_path}{file_root}_test_GP_matern.csv').iloc[0]
                train_results.append(train)
                test_results.append(test) 

NEK2 bind moe scaled
NEK2 bind moe UNDER
NEK2 bind moe SMOTE
NEK2 bind moe ADASYN
NEK2 bind mfp scaled
NEK2 bind mfp UNDER
NEK2 bind mfp SMOTE
NEK2 bind mfp ADASYN
NEK2 inhib moe scaled
NEK2 inhib moe UNDER
NEK2 inhib moe SMOTE
NEK2 inhib moe ADASYN
NEK2 inhib mfp scaled
NEK2 inhib mfp UNDER
NEK2 inhib mfp SMOTE
NEK2 inhib mfp ADASYN
NEK3 bind moe scaled
NEK3 bind moe UNDER
NEK3 bind moe SMOTE
NEK3 bind moe ADASYN
NEK3 bind mfp scaled
NEK3 bind mfp UNDER
NEK3 bind mfp SMOTE
NEK3 bind mfp ADASYN
NEK5 bind moe scaled
NEK5 bind moe UNDER
NEK5 bind moe SMOTE
NEK5 bind moe ADASYN
NEK5 bind mfp scaled
NEK5 bind mfp UNDER
NEK5 bind mfp SMOTE
NEK5 bind mfp ADASYN
NEK9 bind moe scaled
NEK9 bind moe UNDER
NEK9 bind moe SMOTE
NEK9 bind moe ADASYN
NEK9 bind mfp scaled
NEK9 bind mfp UNDER
NEK9 bind mfp SMOTE
NEK9 bind mfp ADASYN
NEK9 inhib moe scaled
NEK9 inhib moe UNDER
NEK9 inhib moe SMOTE
NEK9 inhib moe ADASYN
NEK9 inhib mfp scaled
NEK9 inhib mfp UNDER
NEK9 inhib mfp SMOTE
NEK9 inhib mfp ADASYN


In [21]:
metric_cols = ['model','NEK', 'strategy', 'feat_type','cm','prediction_type', 'recall','ROC-AUC', 'MCC',
       'Balanced Accuracy', 'f1', 'accuracy', 'precision',
       'specificity', 'TN', 'FN', 'FP','TP']

train_df =  pd.DataFrame(train_results,columns=metric_cols)

train_df.to_csv(f'{GP_path}GP_maternkern_train_results.csv', index=False)
test_df =  pd.DataFrame(test_results,columns=metric_cols)
test_df.to_csv(f'{GP_path}GP_maternkern_test_results.csv',index=False)


In [22]:
test_df = pd.read_csv(GP_path+'GP_maternkern_test_results.csv')
test_df

Unnamed: 0,model,NEK,strategy,feat_type,cm,prediction_type,recall,ROC-AUC,MCC,Balanced Accuracy,f1,accuracy,precision,specificity,TN,FN,FP,TP
0,NEK2_binding_moe_scaled_GP_Dirichlet_matern,NEK2_binding,scaled,moe,"[271, 0, 12, 0]",TN,0.0,0.5,0.0,0.5,0.0,0.957597,0.0,1.0,271,12,0,0
1,NEK2_binding_moe_UNDER_GP_Dirichlet_matern,NEK2_binding,UNDER,moe,"[212, 59, 5, 7]",TN,0.583333,0.682811,0.174223,0.682811,0.179487,0.773852,0.106061,0.782288,212,5,59,7
2,NEK2_binding_moe_SMOTE_GP_Dirichlet_matern,NEK2_binding,SMOTE,moe,"[270, 1, 9, 3]",TN,0.25,0.623155,0.42046,0.623155,0.375,0.964664,0.75,0.99631,270,9,1,3
3,NEK2_binding_moe_ADASYN_GP_Dirichlet_matern,NEK2_binding,ADASYN,moe,"[271, 0, 9, 3]",TN,0.25,0.625,0.491899,0.625,0.4,0.968198,1.0,1.0,271,9,0,3
4,NEK2_binding_mfp_scaled_GP_Dirichlet_matern,NEK2_binding,scaled,mfp,"[271, 0, 12, 0]",TN,0.0,0.5,0.0,0.5,0.0,0.957597,0.0,1.0,271,12,0,0
5,NEK2_binding_mfp_UNDER_GP_Dirichlet_matern,NEK2_binding,UNDER,mfp,"[4, 267, 0, 12]",FP,1.0,0.50738,0.025196,0.50738,0.082474,0.056537,0.043011,0.01476,4,0,267,12
6,NEK2_binding_mfp_SMOTE_GP_Dirichlet_matern,NEK2_binding,SMOTE,mfp,"[271, 0, 10, 2]",TN,0.166667,0.583333,0.400918,0.583333,0.285714,0.964664,1.0,1.0,271,10,0,2
7,NEK2_binding_mfp_ADASYN_GP_Dirichlet_matern,NEK2_binding,ADASYN,mfp,"[271, 0, 10, 2]",TN,0.166667,0.583333,0.400918,0.583333,0.285714,0.964664,1.0,1.0,271,10,0,2
8,NEK2_inhibition_moe_scaled_GP_Dirichlet_matern,NEK2_inhibition,scaled,moe,"[381, 0, 28, 0]",TN,0.0,0.5,0.0,0.5,0.0,0.93154,0.0,1.0,381,28,0,0
9,NEK2_inhibition_moe_UNDER_GP_Dirichlet_matern,NEK2_inhibition,UNDER,moe,"[331, 50, 6, 22]",TN,0.785714,0.82724,0.433967,0.82724,0.44,0.863081,0.305556,0.868766,331,6,50,22
