In [40]:
import math
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import os

import shutil
import sklearn
from sklearn.model_selection import KFold
import gpytorch
from gpytorch.models import ExactGP
from gpytorch.likelihoods import DirichletClassificationLikelihood
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel

from sklearn.metrics import confusion_matrix
import itertools
from sklearn.metrics import precision_score, recall_score, roc_auc_score, matthews_corrcoef, balanced_accuracy_score, confusion_matrix, f1_score, roc_curve,precision_recall_curve, auc

import sys
sys.path.append('../../')
from RF_GSCV import * # RF_GSCV contains the calculate metrics function to get the TP, TN, FP, FN scores 


In [41]:

class DirichletGPModel(ExactGP):
    """
    A Dirichlet Gaussian Process (GP) model for multi-class classification.

    This model uses a Gaussian Process with a Dirichlet prior to handle multi-class classification tasks.
    It extends the ExactGP class from GPyTorch, a library for Gaussian Processes in PyTorch.

    Attributes:
        mean_module (gpytorch.means.ConstantMean): The mean module for the GP, initialized with a constant mean function for each class.
        covar_module (gpytorch.kernels.ScaleKernel): The covariance module for the GP, using a scaled RBF kernel for each class.

    Args:
        train_x (torch.Tensor): Training data features.
        train_y (torch.Tensor): Training data labels.
        likelihood (gpytorch.likelihoods.Likelihood): The likelihood function.
        num_classes (int): The number of classes for the classification task.
    """
    def __init__(self, train_x, train_y, likelihood, num_classes):
        super(DirichletGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = ConstantMean(batch_shape=torch.Size((num_classes,)))
        self.covar_module = ScaleKernel(
            RBFKernel(batch_shape=torch.Size((num_classes,))),
            batch_shape=torch.Size((num_classes,)),
        )

    def forward(self, x):
        """
        Forward pass through the GP model.

        Args:
            x (torch.Tensor): Input data features.

        Returns:
            gpytorch.distributions.MultivariateNormal: The multivariate normal distribution representing the GP posterior.
        """
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


In [42]:
class Trainer: 
    def __init__(self,model, likelihood, iterations): 
        self.model = model
        self.likelihood = likelihood 
        smoke_test = ('CI' in os.environ)
        self.n_iterations = 2 if smoke_test else iterations
        self.optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
        self.loss_fn = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
        
    def train(self, train_x, train_y): 
        self.model.train()
        self.likelihood.train()
        predictions = [] 
        for i in range(self.n_iterations): 
            self.optimizer.zero_grad()
            output = self.model(train_x)
            loss = -self.loss_fn(output, self.likelihood.transformed_targets).sum()
            loss.backward()
            if (i%10==0): 
                print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
                    i + 1, self.n_iterations, loss.item(),
                    self.model.covar_module.base_kernel.lengthscale.mean().item(),
                    self.model.likelihood.second_noise_covar.noise.mean().item()
                ))
             
          
            self.optimizer.step() 


    def predict(self, input): 
        """
        Make predictions using the GP model.

        Args:
            input (torch.Tensor): The input data for making predictions.
        
        Returns:
            dist (gpytorch.distributions.MultivariateNormal): The distribution representing the GP posterior.
            observed_pred (gpytorch.distributions.MultivariateNormal): The predicted distribution considering the likelihood.
            pred_means (torch.Tensor): The means of the predicted distributions.
            class_pred (torch.Tensor): The predicted class labels.
        """
        self.model.eval()
        self.likelihood.eval()

        with gpytorch.settings.fast_pred_var(), torch.no_grad():
            dist = self.model(input)     # output distribution
            pred_means = dist.loc          # means of distributino 
            observed_pred = self.likelihood(self.model(input))    # likelihood predictions mean and var  

            class_pred = self.model(input).loc.max(0)[1]
            
        return dist, observed_pred, pred_means, class_pred
    

    def evaluate(self, x_input, y_true): 
        """
        Evaluate the GP model.

        Args:
            x_input (torch.Tensor): The input data features.
            y_true (torch.Tensor): The true labels for the input data.
        
        Returns:
            y_pred (numpy.ndarray): The predicted class labels.
        """
        y_pred = self.model(x_input).loc.max(0)[1].numpy()
        
        return y_pred

    def gp_results(self, x_input, y_true, plot_title=None): 
        """
        Calculate evaluation metrics and print results.

        Args:
            x_input (torch.Tensor): The input data features.
            y_true (torch.Tensor or numpy.ndarray): The true labels for the input data.
            plot_title (str, optional): The title for the confusion matrix plot.
        
        Returns:
            dict: A dictionary containing evaluation metrics and confusion matrix components.
        """
        y_pred = self.evaluate(x_input, y_true) 
        if isinstance(y_true, torch.Tensor):
            y_true = y_true.numpy().reshape(-1)
        # plot_confusion_matrix(y_true, y_pred, ['0','1'], title=plot_title)
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred)
        dist = self.model(x_input)     # get predicted distributions 
        pred_means = dist.loc          # means for predicted dist  

        recall = recall_score(y_true, y_pred)
        tp, tn, fp, fn = calculate_metrics(y_true, y_pred) 
        sensitivity = tp / (tp + fn) 
        specificity = tn / (tn + fp) 
        cm = confusion_matrix(y_true, y_pred)
        print(f'accuracy: {accuracy:.4f}, precision: {precision:.4f}, recall: {recall:.4f}, specificity: {specificity:.4f}, cm: {cm}')
        return {'accuracy': accuracy, 'precision': precision,  'recall':recall, 'specificity':specificity, 'TN': tn, 'FN': fn, 'FP': fp, 'TP': tp }

       

In [4]:
def make_torch_tens(filepath, filename): 
    trainX_df = pd.read_csv(filepath+filename+'_trainX.csv')
    trainy_df = pd.read_csv(filepath+filename+'_train_y.csv')
    testX_df = pd.read_csv(filepath+filename+'_testX.csv')
    testy_df = pd.read_csv(filepath+filename+'_test_y.csv')

    train_x_temp = trainX_df.to_numpy().astype("long")
    test_x_temp = testX_df.to_numpy().astype("long")
    
    train_y_temp = trainy_df.to_numpy().flatten().astype("long")
    test_y_temp = testy_df.to_numpy().flatten().astype("long")
    
    trainX = torch.from_numpy(train_x_temp)
    trainy = torch.from_numpy(train_y_temp)
    testX = torch.from_numpy(test_x_temp)
    testy = torch.from_numpy(test_y_temp)
    print(f'train X: {trainX.shape}, train y: {trainy.shape}, test X: {testX.shape}, test y: {testy.shape}')
    return trainX, trainy, testX, testy
    


In [44]:
def save_results(trainX, trainy, testX, testy, root_name, n_iterations=300, n_samples=100):
    """
    Train a Dirichlet Gaussian Process model and save the training and test performance results.

    This function trains a Dirichlet GP model on the given training data, evaluates it on both the training
    and test data, and saves various performance metrics and predictions to pandas DataFrames.

    Args:
        trainX (torch.Tensor): The training data features.
        trainy (torch.Tensor): The training data labels.
        testX (torch.Tensor): The test data features.
        testy (torch.Tensor): The test data labels.
        root_name (str): The root name used for labeling the model in the results.
        n_iterations (int, optional): The number of training iterations. Default is 300.
        n_samples (int, optional): The number of samples for prediction. Default is 100.

    Returns:
        train_perf_df (pd.DataFrame): DataFrame containing performance metrics and predictions for the training data.
        test_perf_df (pd.DataFrame): DataFrame containing performance metrics and predictions for the test data.
    """
    likelihood = DirichletClassificationLikelihood(trainy, learn_additional_noise=True)
    model = DirichletGPModel(trainX, likelihood.transformed_targets, likelihood, num_classes=likelihood.num_classes)
    # n_iterations = 300
    trainer = Trainer(model, likelihood, n_iterations)
    trainer.train(trainX, trainy) 
  
    train_dist, train_observed_pred, train_pred_means, train_pred  = trainer.predict(trainX)
    train_results = trainer.gp_results(trainX, trainy)
    test_dist, test_observed_pred, test_pred_means, test_pred  = trainer.predict(testX)
    test_results = trainer.gp_results(testX, testy)
    
    train_observed_pred.mean.numpy()
    train_pred_variance2D = train_observed_pred.variance.numpy()
    test_observed_pred.mean.numpy()
    test_pred_variance2D=test_observed_pred.variance.numpy()
    
    train_pred_samples = train_dist.sample(torch.Size((256,))).exp()
    train_probabilities = (train_pred_samples / train_pred_samples.sum(-2, keepdim=True)).mean(0)

    train_prob_stds = (train_pred_samples / train_pred_samples.sum(-2, keepdim=True)).std(0)

    

    test_pred_samples = test_dist.sample(torch.Size((100,))).exp()

    test_probabilities = (test_pred_samples / test_pred_samples.sum(-2, keepdim=True)).mean(0)
    test_prob_stds = (test_pred_samples / test_pred_samples.sum(-2, keepdim=True)).std(0)

 
    train_perf_df = pd.DataFrame()
    test_perf_df = pd.DataFrame()
    train_perf_df['mean_pred_class0'] = train_observed_pred.mean.numpy()[0,]
    train_perf_df['mean_pred_class1'] = train_observed_pred.mean.numpy()[1,]
    train_perf_df['y'] = trainy
    train_perf_df['y_pred'] = train_pred_means.max(0)[1]
    train_perf_df['var_pred_class0']=train_observed_pred.variance.numpy()[0,]
    train_perf_df['var_pred_class1']=train_observed_pred.variance.numpy()[1,]
    train_perf_df['pred_prob_class0'] = train_probabilities.numpy()[0,]
    train_perf_df['pred_prob_class1'] = train_probabilities.numpy()[1,]
    train_perf_df['pred_prob_std_class0'] = train_prob_stds.numpy()[0,]
    train_perf_df['pred_prob_std_class1'] = train_prob_stds.numpy()[1,]
    
    train_perf_df['model'] = f'{root_name}_GP'
    train_perf_df['subset'] = 'train' 
    
    
    test_perf_df['mean_pred_class0'] = test_observed_pred.mean.numpy()[0,]
    test_perf_df['mean_pred_class1'] = test_observed_pred.mean.numpy()[1,]
    test_perf_df['y'] = testy
    test_perf_df['y_pred'] = test_pred_means.max(0)[1]
    test_perf_df['var_pred_class0']=test_observed_pred.variance.numpy()[0,]
    test_perf_df['var_pred_class1']=test_observed_pred.variance.numpy()[1,]
    test_perf_df['pred_prob_class0'] = test_probabilities.numpy()[0,]
    test_perf_df['pred_prob_class1'] = test_probabilities.numpy()[1,]
    test_perf_df['pred_prob_std_class0'] =test_prob_stds.numpy()[0,]
    test_perf_df['pred_prob_std_class1'] = test_prob_stds.numpy()[1,]
    test_perf_df['model'] = f'{root_name}_GP'
    test_perf_df['subset'] = 'test' 
    for k, val in train_results.items(): 
        train_perf_df[k] = val
    for k, val in test_results.items():
        test_perf_df[k] = val
    return train_perf_df, test_perf_df


In [15]:
data_dir = '/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/'
nek_nums = [2,3,5,9]
samplings = ['scaled', 'UNDER', 'SMOTE', 'ADASYN'] 
features = ['moe', 'mfp']
NEK= 'NEK'
for i, n in enumerate(nek_nums):
    nek = str(n)
    print(f'NEK{nek}')
    nek_path= f'{data_dir}NEK{nek}/bind/'
 
    for k, feat in enumerate(features): 
        print()
        for j, samp in enumerate(samplings): 
        
            print(f'NEK{nek} {feat} {samp}')
            file_root = f'NEK{nek}_binding_{feat}_{samp}'
            # print(file_root)
            trainX, trainy, testX, testy = make_torch_tens(nek_path,file_root)
            # train_perf_df = save_results(trainX, trainy, file_root, 'train', n_iterations=300, n_samples=256)
            # test_perf_df = save_results(testX, testy, file_root, 'test', n_iterations=300, n_samples=100)
            train_perf_df, test_perf_df = save_results(trainX, trainy, testX, testy,file_root, n_iterations=300)
            train_perf_df.to_csv(f'{nek_path}{file_root}_train_GP.csv',index=False) 
            test_perf_df.to_csv(f'{nek_path}{file_root}_test_GP.csv',index=False) 
            print(f'{nek_path}{file_root}_train_GP.csv')
            print(f'{nek_path}{file_root}_test_GP.csv')
            print()
            
            
    print()
    
    if n == 2 or n == 9:

        nek_path= f'{data_dir}NEK{nek}/inhib/'
        for k, feat in enumerate(features): 
            print()
            for j, samp in enumerate(samplings): 
                file_root = f'NEK{nek}_inhibition_{feat}_{samp}'
                print(f'NEK{nek} {feat} {samp}')
                trainX, trainy, testX, testy = make_torch_tens(nek_path,file_root)
                train_perf_df, test_perf_df = save_results(trainX, trainy, testX, testy,file_root, n_iterations=300)
                train_perf_df.to_csv(f'{nek_path}{file_root}_train_GP.csv',index=False) 
                test_perf_df.to_csv(f'{nek_path}{file_root}_test_GP.csv',index=False) 
                print(f'{nek_path}{file_root}_train_GP.csv')
                print(f'{nek_path}{file_root}_test_GP.csv')
                print()
        
            
    print()

NEK2

moe scaled
train X: torch.Size([1125, 306]), train y: torch.Size([1125]), test X: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.106   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.484   lengthscale: 1.037   noise: 0.809
Iter 21/300 - Loss: 4.490   lengthscale: 1.398   noise: 1.073
Iter 31/300 - Loss: 3.896   lengthscale: 1.910   noise: 1.331
Iter 41/300 - Loss: 3.464   lengthscale: 2.568   noise: 1.482
Iter 51/300 - Loss: 3.290   lengthscale: 3.195   noise: 1.511
Iter 61/300 - Loss: 3.229   lengthscale: 3.633   noise: 1.458
Iter 71/300 - Loss: 3.199   lengthscale: 3.927   noise: 1.362
Iter 81/300 - Loss: 3.174   lengthscale: 4.136   noise: 1.250
Iter 91/300 - Loss: 3.159   lengthscale: 4.295   noise: 1.133
Iter 101/300 - Loss: 3.143   lengthscale: 4.421   noise: 1.020
Iter 111/300 - Loss: 3.140   lengthscale: 4.530   noise: 0.917
Iter 121/300 - Loss: 3.126   lengthscale: 4.622   noise: 0.829
Iter 131/300 - Loss: 3.118   lengthscale: 4.700   nois

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9600, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1080    0]
 [  45    0]]
accuracy: 0.9576, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[271   0]
 [ 12   0]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_moe_scaled_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_moe_scaled_test_GP.csv

moe UNDER
train X: torch.Size([90, 306]), train y: torch.Size([90]), test X: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Iter 11/300 - Loss: 5.931   lengthscale: 0.887   noise: 1.297
Iter 21/300 - Loss: 5.439   lengthscale: 1.231   noise: 1.982
Iter 31/300 - Loss: 5.262   lengthscale: 1.918   noise: 2.574
Iter 41/300 - Loss: 5.199   lengthscale: 2.952   noise: 3.007
Iter 51/300 - Loss: 5.173   lengthscale: 4.160   noise: 3.297
Iter 61/300 - Loss: 5.154   lengthscale: 5.325   noise: 3.464
Iter 71/300 - Loss: 5.140   lengthscale: 6.303   noise: 3.524
Iter 81/300 - Loss: 5.133   lengthscale: 7.003   noise: 3.513
Iter 91/300 - Loss: 5.129   lengthscale: 7.395   noise: 3.463
Iter 101/300 - Loss: 5.125   lengthscale: 7.557   noise: 3.393
Iter 111/300 - Loss: 5.122   lengthscale: 7.603   noise: 3.306
Iter 121/300 - Loss: 5.119   lengthscale: 7.615   noise: 3.202
Iter 131/300 - Loss: 5.116   lengthscale: 7.636   noise: 3.083
Iter 141/300 - Loss: 5.114   lengthscale: 7.671   noise: 2.953
Iter 151/300 - Loss: 5.111   lengthscale: 7.708   noise: 2.816
Iter 161/300 - Loss: 5.109   lengthscale: 7.733   noise: 2.675
I



accuracy: 0.7385, precision: 0.1026, recall: 0.6667, specificity: 0.7417, cm: [[201  70]
 [  4   8]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_moe_UNDER_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_moe_UNDER_test_GP.csv

moe SMOTE
train X: torch.Size([2160, 306]), train y: torch.Size([2160]), test X: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 6.994   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.640   lengthscale: 1.321   noise: 1.278
Iter 21/300 - Loss: 4.908   lengthscale: 2.196   noise: 1.720
Iter 31/300 - Loss: 4.490   lengthscale: 3.216   noise: 1.657
Iter 41/300 - Loss: 4.153   lengthscale: 4.215   noise: 1.189
Iter 51/300 - Loss: 3.897   lengthscale: 5.062   noise: 0.638
Iter 61/300 - Loss: 3.729   lengthscale: 5.660   noise: 0.277
Iter 71/300 - Loss: 3.650   lengthscale: 6.038   noise: 0.125
Iter 81/300 - Loss: 3.607   lengthscale: 6.261   nois



accuracy: 0.9991, precision: 0.9982, recall: 1.0000, specificity: 0.9981, cm: [[1078    2]
 [   0 1080]]
accuracy: 0.9682, precision: 0.8000, recall: 0.3333, specificity: 0.9963, cm: [[270   1]
 [  8   4]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_moe_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_moe_SMOTE_test_GP.csv

moe ADASYN
train X: torch.Size([2158, 306]), train y: torch.Size([2158]), test X: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.026   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.724   lengthscale: 1.321   noise: 1.285
Iter 21/300 - Loss: 4.978   lengthscale: 2.204   noise: 1.793
Iter 31/300 - Loss: 4.539   lengthscale: 3.263   noise: 1.803
Iter 41/300 - Loss: 4.196   lengthscale: 4.296   noise: 1.351
Iter 51/300 - Loss: 3.955   lengthscale: 5.165   noise: 0.756
Iter 61/300 - Loss: 3.779   lengthscale: 5.780   noise: 0.338
Iter 71/3



accuracy: 0.9986, precision: 0.9972, recall: 1.0000, specificity: 0.9972, cm: [[1077    3]
 [   0 1078]]
accuracy: 0.9611, precision: 0.5714, recall: 0.3333, specificity: 0.9889, cm: [[268   3]
 [  8   4]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_moe_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_moe_ADASYN_test_GP.csv


mfp scaled
train X: torch.Size([1125, 2048]), train y: torch.Size([1125]), test X: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.095   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.467   lengthscale: 1.253   noise: 0.809
Iter 21/300 - Loss: 4.324   lengthscale: 2.070   noise: 1.067
Iter 31/300 - Loss: 3.335   lengthscale: 3.153   noise: 1.261
Iter 41/300 - Loss: 3.187   lengthscale: 4.190   noise: 1.288
Iter 51/300 - Loss: 3.144   lengthscale: 4.945   noise: 1.222
Iter 61/300 - Loss: 3.121   lengthscale: 5.495   noise: 1.114
Iter

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9600, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1080    0]
 [  45    0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9576, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[271   0]
 [ 12   0]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_mfp_scaled_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_mfp_scaled_test_GP.csv

mfp UNDER
train X: torch.Size([90, 2048]), train y: torch.Size([90]), test X: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.932   lengthscale: 0.669   noise: 1.297
Iter 21/300 - Loss: 5.441   lengthscale: 0.343   noise: 1.983
Iter 31/300 - Loss: 5.272   lengthscale: 0.242   noise: 2.577
Iter 41/300 - Loss: 5.222   lengthscale: 0.210   noise: 3.023
Iter 51/300 - Loss: 5.208   lengthscale: 0.198   noise: 3.341
Iter 61/300 - Loss: 5.204   lengthscale: 0.194   noise: 3.562
Iter 71/300 - Loss: 5.203   lengthscale: 0.192   noise: 3.710
Iter 81/300 - Loss: 5.202   lengthscale: 0.192   nois



accuracy: 0.9541, precision: 0.0000, recall: 0.0000, specificity: 0.9963, cm: [[270   1]
 [ 12   0]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_mfp_UNDER_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_mfp_UNDER_test_GP.csv

mfp SMOTE
train X: torch.Size([2160, 2048]), train y: torch.Size([2160]), test X: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 6.066   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.839   lengthscale: 1.312   noise: 0.795
Iter 21/300 - Loss: 4.192   lengthscale: 2.141   noise: 0.884
Iter 31/300 - Loss: 3.667   lengthscale: 2.992   noise: 0.621
Iter 41/300 - Loss: 3.421   lengthscale: 3.663   noise: 0.288
Iter 51/300 - Loss: 3.303   lengthscale: 4.036   noise: 0.115
Iter 61/300 - Loss: 3.254   lengthscale: 4.206   noise: 0.054
Iter 71/300 - Loss: 3.232   lengthscale: 4.268   noise: 0.032
Iter 81/300 - Loss: 3.220   lengthscale: 4.291   no



accuracy: 0.9931, precision: 0.9991, recall: 0.9870, specificity: 0.9991, cm: [[1079    1]
 [  14 1066]]
accuracy: 0.9647, precision: 1.0000, recall: 0.1667, specificity: 1.0000, cm: [[271   0]
 [ 10   2]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_mfp_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_mfp_SMOTE_test_GP.csv

mfp ADASYN




train X: torch.Size([2168, 2048]), train y: torch.Size([2168]), test X: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 6.114   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.858   lengthscale: 1.307   noise: 0.795
Iter 21/300 - Loss: 4.199   lengthscale: 2.133   noise: 0.880
Iter 31/300 - Loss: 3.689   lengthscale: 2.982   noise: 0.613
Iter 41/300 - Loss: 3.434   lengthscale: 3.654   noise: 0.282
Iter 51/300 - Loss: 3.325   lengthscale: 4.029   noise: 0.112
Iter 61/300 - Loss: 3.265   lengthscale: 4.203   noise: 0.053
Iter 71/300 - Loss: 3.241   lengthscale: 4.267   noise: 0.032
Iter 81/300 - Loss: 3.234   lengthscale: 4.284   noise: 0.023
Iter 91/300 - Loss: 3.221   lengthscale: 4.300   noise: 0.018
Iter 101/300 - Loss: 3.220   lengthscale: 4.323   noise: 0.015
Iter 111/300 - Loss: 3.218   lengthscale: 4.342   noise: 0.013
Iter 121/300 - Loss: 3.214   lengthscale: 4.356   noise: 0.011
Iter 131/300 - Loss: 3.211   lengthscale: 4.368   noise: 0.010
Iter 1



accuracy: 0.9940, precision: 0.9991, recall: 0.9890, specificity: 0.9991, cm: [[1079    1]
 [  12 1076]]
accuracy: 0.9647, precision: 1.0000, recall: 0.1667, specificity: 1.0000, cm: [[271   0]
 [ 10   2]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_mfp_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/bind/NEK2_binding_mfp_ADASYN_test_GP.csv



moe scaled




train X: torch.Size([1635, 306]), train y: torch.Size([1635]), test X: torch.Size([409, 306]), test y: torch.Size([409])
Iter 1/300 - Loss: 7.101   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.536   lengthscale: 1.258   noise: 0.809
Iter 21/300 - Loss: 4.581   lengthscale: 2.057   noise: 1.072
Iter 31/300 - Loss: 3.926   lengthscale: 3.125   noise: 1.320
Iter 41/300 - Loss: 3.560   lengthscale: 4.403   noise: 1.448
Iter 51/300 - Loss: 3.426   lengthscale: 5.619   noise: 1.451
Iter 61/300 - Loss: 3.373   lengthscale: 6.576   noise: 1.377
Iter 71/300 - Loss: 3.349   lengthscale: 7.284   noise: 1.263
Iter 81/300 - Loss: 3.325   lengthscale: 7.797   noise: 1.132
Iter 91/300 - Loss: 3.303   lengthscale: 8.191   noise: 0.995
Iter 101/300 - Loss: 3.285   lengthscale: 8.508   noise: 0.860
Iter 111/300 - Loss: 3.274   lengthscale: 8.770   noise: 0.738
Iter 121/300 - Loss: 3.260   lengthscale: 8.992   noise: 0.632
Iter 131/300 - Loss: 3.255   lengthscale: 9.185   noise: 0.544
Iter 141



accuracy: 0.9333, precision: 1.0000, recall: 0.0268, specificity: 1.0000, cm: [[1523    0]
 [ 109    3]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9315, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[381   0]
 [ 28   0]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_moe_scaled_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_moe_scaled_test_GP.csv

moe UNDER
train X: torch.Size([224, 306]), train y: torch.Size([224]), test X: torch.Size([409, 306]), test y: torch.Size([409])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.930   lengthscale: 1.255   noise: 1.297




Iter 21/300 - Loss: 5.435   lengthscale: 2.072   noise: 1.981
Iter 31/300 - Loss: 5.222   lengthscale: 3.129   noise: 2.563
Iter 41/300 - Loss: 5.027   lengthscale: 4.443   noise: 2.911
Iter 51/300 - Loss: 4.891   lengthscale: 5.804   noise: 2.925
Iter 61/300 - Loss: 4.818   lengthscale: 6.903   noise: 2.657
Iter 71/300 - Loss: 4.773   lengthscale: 7.686   noise: 2.238
Iter 81/300 - Loss: 4.740   lengthscale: 8.221   noise: 1.778
Iter 91/300 - Loss: 4.714   lengthscale: 8.580   noise: 1.354
Iter 101/300 - Loss: 4.693   lengthscale: 8.814   noise: 1.009
Iter 111/300 - Loss: 4.677   lengthscale: 8.964   noise: 0.751
Iter 121/300 - Loss: 4.665   lengthscale: 9.059   noise: 0.566
Iter 131/300 - Loss: 4.656   lengthscale: 9.124   noise: 0.436
Iter 141/300 - Loss: 4.649   lengthscale: 9.179   noise: 0.343
Iter 151/300 - Loss: 4.643   lengthscale: 9.234   noise: 0.275
Iter 161/300 - Loss: 4.639   lengthscale: 9.293   noise: 0.225
Iter 171/300 - Loss: 4.636   lengthscale: 9.358   noise: 0.188




accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[112   0]
 [  0 112]]
accuracy: 0.8655, precision: 0.3099, recall: 0.7857, specificity: 0.8714, cm: [[332  49]
 [  6  22]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_moe_UNDER_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_moe_UNDER_test_GP.csv

moe SMOTE
train X: torch.Size([3046, 306]), train y: torch.Size([3046]), test X: torch.Size([409, 306]), test y: torch.Size([409])
Iter 1/300 - Loss: 7.036   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.751   lengthscale: 1.320   noise: 1.287
Iter 21/300 - Loss: 5.004   lengthscale: 2.197   noise: 1.814
Iter 31/300 - Loss: 4.498   lengthscale: 3.271   noise: 1.834
Iter 41/300 - Loss: 4.148   lengthscale: 4.324   noise: 1.350
Iter 51/300 - Loss: 3.907   lengthscale: 5.168   noise: 0.742
Iter 61/300 - Loss: 3.757   lengthscale: 5.782   noise: 0.331
Iter 7



accuracy: 0.9977, precision: 0.9954, recall: 1.0000, specificity: 0.9954, cm: [[1516    7]
 [   0 1523]]
accuracy: 0.9487, precision: 0.6667, recall: 0.5000, specificity: 0.9816, cm: [[374   7]
 [ 14  14]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_moe_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_moe_SMOTE_test_GP.csv

moe ADASYN
train X: torch.Size([3037, 306]), train y: torch.Size([3037]), test X: torch.Size([409, 306]), test y: torch.Size([409])
Iter 1/300 - Loss: 7.041   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.760   lengthscale: 1.319   noise: 1.288
Iter 21/300 - Loss: 5.030   lengthscale: 2.197   noise: 1.824
Iter 31/300 - Loss: 4.540   lengthscale: 3.273   noise: 1.869
Iter 41/300 - Loss: 4.181   lengthscale: 4.338   noise: 1.400
Iter 51/300 - Loss: 3.946   lengthscale: 5.197   noise: 0.782
Iter 61/300 - Loss: 3.787   lengthscale: 5.816   noise: 0.353
I



accuracy: 0.9974, precision: 0.9947, recall: 1.0000, specificity: 0.9947, cm: [[1515    8]
 [   0 1514]]
accuracy: 0.9511, precision: 0.6818, recall: 0.5357, specificity: 0.9816, cm: [[374   7]
 [ 13  15]]




/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_moe_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_moe_ADASYN_test_GP.csv


mfp scaled
train X: torch.Size([1635, 2048]), train y: torch.Size([1635]), test X: torch.Size([409, 2048]), test y: torch.Size([409])
Iter 1/300 - Loss: 7.093   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.516   lengthscale: 1.160   noise: 0.809
Iter 21/300 - Loss: 4.300   lengthscale: 1.929   noise: 1.063
Iter 31/300 - Loss: 3.427   lengthscale: 2.999   noise: 1.227
Iter 41/300 - Loss: 3.300   lengthscale: 4.000   noise: 1.227
Iter 51/300 - Loss: 3.266   lengthscale: 4.685   noise: 1.139
Iter 61/300 - Loss: 3.244   lengthscale: 5.111   noise: 1.012
Iter 71/300 - Loss: 3.226   lengthscale: 5.385   noise: 0.870
Iter 81/300 - Loss: 3.205   lengthscale: 5.580   noise: 0.731
Iter 91/300 - Loss: 3.196   lengthscale: 5.734   noise: 0.605
Iter 101/300 - L



accuracy: 0.9547, precision: 1.0000, recall: 0.3393, specificity: 1.0000, cm: [[1523    0]
 [  74   38]]
accuracy: 0.9438, precision: 1.0000, recall: 0.1786, specificity: 1.0000, cm: [[381   0]
 [ 23   5]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_mfp_scaled_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_mfp_scaled_test_GP.csv

mfp UNDER
train X: torch.Size([224, 2048]), train y: torch.Size([224]), test X: torch.Size([409, 2048]), test y: torch.Size([409])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.931   lengthscale: 1.262   noise: 1.297
Iter 21/300 - Loss: 5.439   lengthscale: 2.076   noise: 1.983
Iter 31/300 - Loss: 5.289   lengthscale: 2.943   noise: 2.599
Iter 41/300 - Loss: 5.236   lengthscale: 2.405   noise: 3.114
Iter 51/300 - Loss: 5.211   lengthscale: 1.601   noise: 3.468
Iter 61/300 - Loss: 5.204   lengthscale: 1.137   noise: 3.681




accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[112   0]
 [  0 112]]
accuracy: 0.1051, precision: 0.0689, recall: 0.9643, specificity: 0.0420, cm: [[ 16 365]
 [  1  27]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_mfp_UNDER_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_mfp_UNDER_test_GP.csv

mfp SMOTE
train X: torch.Size([3046, 2048]), train y: torch.Size([3046]), test X: torch.Size([409, 2048]), test y: torch.Size([409])
Iter 1/300 - Loss: 6.287   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.932   lengthscale: 1.307   noise: 0.798
Iter 21/300 - Loss: 4.160   lengthscale: 2.123   noise: 0.886
Iter 31/300 - Loss: 3.644   lengthscale: 2.987   noise: 0.613
Iter 41/300 - Loss: 3.432   lengthscale: 3.642   noise: 0.287
Iter 51/300 - Loss: 3.323   lengthscale: 4.039   noise: 0.116
Iter 61/300 - Loss: 3.270   lengthscale: 4.272   noise: 0.054
Iter



accuracy: 0.9944, precision: 0.9941, recall: 0.9947, specificity: 0.9941, cm: [[1514    9]
 [   8 1515]]
accuracy: 0.9682, precision: 0.8947, recall: 0.6071, specificity: 0.9948, cm: [[379   2]
 [ 11  17]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_mfp_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_mfp_SMOTE_test_GP.csv

mfp ADASYN
train X: torch.Size([3027, 2048]), train y: torch.Size([3027]), test X: torch.Size([409, 2048]), test y: torch.Size([409])
Iter 1/300 - Loss: 6.165   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.887   lengthscale: 1.305   noise: 0.796
Iter 21/300 - Loss: 4.164   lengthscale: 2.118   noise: 0.883
Iter 31/300 - Loss: 3.650   lengthscale: 2.980   noise: 0.605
Iter 41/300 - Loss: 3.437   lengthscale: 3.632   noise: 0.281
Iter 51/300 - Loss: 3.329   lengthscale: 4.017   noise: 0.114
Iter 61/300 - Loss: 3.287   lengthscale: 4.228   noise: 0.054



accuracy: 0.9950, precision: 0.9947, recall: 0.9953, specificity: 0.9947, cm: [[1515    8]
 [   7 1497]]
accuracy: 0.9658, precision: 0.8889, recall: 0.5714, specificity: 0.9948, cm: [[379   2]
 [ 12  16]]




/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_mfp_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK2/inhib/NEK2_inhibition_mfp_ADASYN_test_GP.csv


NEK3

moe scaled
train X: torch.Size([1122, 306]), train y: torch.Size([1122]), test X: torch.Size([282, 306]), test y: torch.Size([282])
Iter 1/300 - Loss: 7.108   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.520   lengthscale: 0.974   noise: 0.809
Iter 21/300 - Loss: 4.553   lengthscale: 1.363   noise: 1.072
Iter 31/300 - Loss: 3.972   lengthscale: 1.889   noise: 1.329
Iter 41/300 - Loss: 3.570   lengthscale: 2.543   noise: 1.481
Iter 51/300 - Loss: 3.410   lengthscale: 3.153   noise: 1.514
Iter 61/300 - Loss: 3.358   lengthscale: 3.579   noise: 1.472
Iter 71/300 - Loss: 3.336   lengthscale: 3.857   noise: 1.394
Iter 81/300 - Loss: 3.319   lengthscale: 4.049   noise: 1.303
Iter 91/300 - Loss: 3.306   lengthscale: 4.192   noise: 1.210
Iter 101/300

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9430, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1058    0]
 [  64    0]]
accuracy: 0.9397, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[265   0]
 [ 17   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_moe_scaled_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_moe_scaled_test_GP.csv

moe UNDER
train X: torch.Size([128, 306]), train y: torch.Size([128]), test X: torch.Size([282, 306]), test y: torch.Size([282])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.931   lengthscale: 0.693   noise: 1.297
Iter 21/300 - Loss: 5.441   lengthscale: 0.693   noise: 1.982
Iter 31/300 - Loss: 5.272   lengthscale: 0.693   noise: 2.576
Iter 41/300 - Loss: 5.222   lengthscale: 0.693   noise: 3.023
Iter 51/300 - Loss: 5.208   lengthscale: 0.693   noise: 3.341
Iter 61/300 - Loss: 5.204   lengthscale: 0.693   noise: 3.562
Iter 71/300 - Loss: 5.203   lengthscale: 0.693   noise: 3.710
Iter 81/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.804
Iter 91/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.858
Iter 101/300 - Loss: 5.202   l



Iter 1/300 - Loss: 7.051   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.783   lengthscale: 1.316   noise: 1.289
Iter 21/300 - Loss: 5.066   lengthscale: 2.193   noise: 1.845
Iter 31/300 - Loss: 4.647   lengthscale: 3.270   noise: 1.955
Iter 41/300 - Loss: 4.337   lengthscale: 4.331   noise: 1.569
Iter 51/300 - Loss: 4.107   lengthscale: 5.240   noise: 0.949
Iter 61/300 - Loss: 3.942   lengthscale: 5.896   noise: 0.450
Iter 71/300 - Loss: 3.845   lengthscale: 6.317   noise: 0.202
Iter 81/300 - Loss: 3.804   lengthscale: 6.570   noise: 0.107
Iter 91/300 - Loss: 3.780   lengthscale: 6.714   noise: 0.068
Iter 101/300 - Loss: 3.766   lengthscale: 6.791   noise: 0.050
Iter 111/300 - Loss: 3.771   lengthscale: 6.824   noise: 0.040
Iter 121/300 - Loss: 3.750   lengthscale: 6.838   noise: 0.033
Iter 131/300 - Loss: 3.744   lengthscale: 6.842   noise: 0.028
Iter 141/300 - Loss: 3.745   lengthscale: 6.843   noise: 0.025
Iter 151/300 - Loss: 3.747   lengthscale: 6.842   noise: 0.022
Ite



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[1058    0]
 [   0 1058]]
accuracy: 0.8901, precision: 0.1111, recall: 0.1176, specificity: 0.9396, cm: [[249  16]
 [ 15   2]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_moe_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_moe_SMOTE_test_GP.csv

moe ADASYN
train X: torch.Size([2113, 306]), train y: torch.Size([2113]), test X: torch.Size([282, 306]), test y: torch.Size([282])
Iter 1/300 - Loss: 7.053   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.785   lengthscale: 1.321   noise: 1.289
Iter 21/300 - Loss: 5.070   lengthscale: 2.202   noise: 1.847
Iter 31/300 - Loss: 4.662   lengthscale: 3.277   noise: 1.964
Iter 41/300 - Loss: 4.354   lengthscale: 4.344   noise: 1.583
Iter 51/300 - Loss: 4.117   lengthscale: 5.261   noise: 0.962
Iter 61/300 - Loss: 3.955   lengthscale: 5.921   noise: 0.456
Iter 71/3



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[1058    0]
 [   0 1055]]
accuracy: 0.8865, precision: 0.1053, recall: 0.1176, specificity: 0.9358, cm: [[248  17]
 [ 15   2]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_moe_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_moe_ADASYN_test_GP.csv


mfp scaled
train X: torch.Size([1122, 2048]), train y: torch.Size([1122]), test X: torch.Size([282, 2048]), test y: torch.Size([282])
Iter 1/300 - Loss: 7.092   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.494   lengthscale: 1.033   noise: 0.809
Iter 21/300 - Loss: 4.407   lengthscale: 1.470   noise: 1.067
Iter 31/300 - Loss: 3.493   lengthscale: 2.082   noise: 1.269
Iter 41/300 - Loss: 3.346   lengthscale: 2.635   noise: 1.317
Iter 51/300 - Loss: 3.299   lengthscale: 3.018   noise: 1.277
Iter 61/300 - Loss: 3.285   lengthscale: 3.486   noise: 1.198
Iter

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9430, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1058    0]
 [  64    0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9397, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[265   0]
 [ 17   0]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_mfp_scaled_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_mfp_scaled_test_GP.csv

mfp UNDER
train X: torch.Size([128, 2048]), train y: torch.Size([128]), test X: torch.Size([282, 2048]), test y: torch.Size([282])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.930   lengthscale: 1.256   noise: 1.297
Iter 21/300 - Loss: 5.434   lengthscale: 2.074   noise: 1.981
Iter 31/300 - Loss: 5.255   lengthscale: 3.171   noise: 2.571
Iter 41/300 - Loss: 5.219   lengthscale: 3.761   noise: 3.032
Iter 51/300 - Loss: 5.203   lengthscale: 3.412   noise: 3.375
Iter 61/300 - Loss: 5.197   lengthscale: 2.959   noise: 3.594
Iter 71/300 - Loss: 5.196   lengthscale: 2.785   noise: 3.715
Iter 81/300 - Loss: 5.195   lengthscale: 2.833   no



train X: torch.Size([2116, 2048]), train y: torch.Size([2116]), test X: torch.Size([282, 2048]), test y: torch.Size([282])
Iter 1/300 - Loss: 6.325   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.992   lengthscale: 1.313   noise: 0.799
Iter 21/300 - Loss: 4.289   lengthscale: 2.152   noise: 0.909
Iter 31/300 - Loss: 3.808   lengthscale: 3.025   noise: 0.684
Iter 41/300 - Loss: 3.569   lengthscale: 3.725   noise: 0.337
Iter 51/300 - Loss: 3.442   lengthscale: 4.129   noise: 0.136
Iter 61/300 - Loss: 3.396   lengthscale: 4.323   noise: 0.061
Iter 71/300 - Loss: 3.356   lengthscale: 4.397   noise: 0.035
Iter 81/300 - Loss: 3.349   lengthscale: 4.405   noise: 0.024
Iter 91/300 - Loss: 3.332   lengthscale: 4.387   noise: 0.019
Iter 101/300 - Loss: 3.326   lengthscale: 4.376   noise: 0.015
Iter 111/300 - Loss: 3.330   lengthscale: 4.387   noise: 0.013
Iter 121/300 - Loss: 3.325   lengthscale: 4.415   noise: 0.011
Iter 131/300 - Loss: 3.313   lengthscale: 4.442   noise: 0.010
Iter 1



accuracy: 0.9957, precision: 1.0000, recall: 0.9915, specificity: 1.0000, cm: [[1058    0]
 [   9 1049]]
accuracy: 0.9539, precision: 0.7500, recall: 0.3529, specificity: 0.9925, cm: [[263   2]
 [ 11   6]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_mfp_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_mfp_SMOTE_test_GP.csv

mfp ADASYN
train X: torch.Size([2130, 2048]), train y: torch.Size([2130]), test X: torch.Size([282, 2048]), test y: torch.Size([282])
Iter 1/300 - Loss: 6.319   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.982   lengthscale: 1.313   noise: 0.798
Iter 21/300 - Loss: 4.283   lengthscale: 2.150   noise: 0.901
Iter 31/300 - Loss: 3.806   lengthscale: 3.011   noise: 0.665
Iter 41/300 - Loss: 3.573   lengthscale: 3.701   noise: 0.323
Iter 51/300 - Loss: 3.450   lengthscale: 4.094   noise: 0.129
Iter 61/300 - Loss: 3.396   lengthscale: 4.279   noise: 0.059
Iter 71



accuracy: 0.9967, precision: 1.0000, recall: 0.9935, specificity: 1.0000, cm: [[1058    0]
 [   7 1065]]
accuracy: 0.9433, precision: 0.5556, recall: 0.2941, specificity: 0.9849, cm: [[261   4]
 [ 12   5]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_mfp_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK3/bind/NEK3_binding_mfp_ADASYN_test_GP.csv



NEK5

moe scaled
train X: torch.Size([989, 306]), train y: torch.Size([989]), test X: torch.Size([248, 306]), test y: torch.Size([248])
Iter 1/300 - Loss: 7.106   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.561   lengthscale: 1.039   noise: 0.809
Iter 21/300 - Loss: 4.630   lengthscale: 1.341   noise: 1.072
Iter 31/300 - Loss: 4.073   lengthscale: 1.823   noise: 1.329
Iter 41/300 - Loss: 3.693   lengthscale: 2.460   noise: 1.481
Iter 51/300 - Loss: 3.540   lengthscale: 3.063   noise: 1.515
Iter 61/300 - Loss: 3.483   lengthscale: 3.492   noise: 1.474
I

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9221, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[912   0]
 [ 77   0]]
accuracy: 0.9194, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[228   0]
 [ 20   0]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_moe_scaled_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_moe_scaled_test_GP.csv

moe UNDER
train X: torch.Size([154, 306]), train y: torch.Size([154]), test X: torch.Size([248, 306]), test y: torch.Size([248])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.931   lengthscale: 0.694   noise: 1.297
Iter 21/300 - Loss: 5.441   lengthscale: 0.694   noise: 1.982
Iter 31/300 - Loss: 5.272   lengthscale: 0.695   noise: 2.576
Iter 41/300 - Loss: 5.222   lengthscale: 0.696   noise: 3.023
Iter 51/300 - Loss: 5.208   lengthscale: 0.696   noise: 3.341
Iter 61/300 - Loss: 5.204   lengthscale: 0.697   noise: 3.562
Iter 71/300 - 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Iter 1/300 - Loss: 7.064   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.806   lengthscale: 1.316   noise: 1.289
Iter 21/300 - Loss: 5.116   lengthscale: 2.196   noise: 1.862
Iter 31/300 - Loss: 4.692   lengthscale: 3.287   noise: 2.025
Iter 41/300 - Loss: 4.387   lengthscale: 4.385   noise: 1.678
Iter 51/300 - Loss: 4.160   lengthscale: 5.313   noise: 1.060
Iter 61/300 - Loss: 4.003   lengthscale: 5.977   noise: 0.523
Iter 71/300 - Loss: 3.915   lengthscale: 6.401   noise: 0.237
Iter 81/300 - Loss: 3.877   lengthscale: 6.656   noise: 0.124
Iter 91/300 - Loss: 3.855   lengthscale: 6.802   noise: 0.078
Iter 101/300 - Loss: 3.850   lengthscale: 6.886   noise: 0.056
Iter 111/300 - Loss: 3.819   lengthscale: 6.931   noise: 0.044
Iter 121/300 - Loss: 3.830   lengthscale: 6.955   noise: 0.037
Iter 131/300 - Loss: 3.819   lengthscale: 6.965   noise: 0.031
Iter 141/300 - Loss: 3.822   lengthscale: 6.964   noise: 0.027
Iter 151/300 - Loss: 3.826   lengthscale: 6.962   noise: 0.024
Ite



accuracy: 0.9995, precision: 0.9989, recall: 1.0000, specificity: 0.9989, cm: [[911   1]
 [  0 912]]
accuracy: 0.9274, precision: 0.5500, recall: 0.5500, specificity: 0.9605, cm: [[219   9]
 [  9  11]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_moe_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_moe_SMOTE_test_GP.csv

moe ADASYN
train X: torch.Size([1831, 306]), train y: torch.Size([1831]), test X: torch.Size([248, 306]), test y: torch.Size([248])
Iter 1/300 - Loss: 7.069   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.816   lengthscale: 1.320   noise: 1.290
Iter 21/300 - Loss: 5.140   lengthscale: 2.204   noise: 1.878
Iter 31/300 - Loss: 4.723   lengthscale: 3.304   noise: 2.071
Iter 41/300 - Loss: 4.429   lengthscale: 4.410   noise: 1.749
Iter 51/300 - Loss: 4.210   lengthscale: 5.340   noise: 1.127
Iter 61/300 - Loss: 4.050   lengthscale: 6.002   noise: 0.565
Iter 71/300 -



accuracy: 0.9995, precision: 0.9989, recall: 1.0000, specificity: 0.9989, cm: [[911   1]
 [  0 919]]
accuracy: 0.9395, precision: 0.6190, recall: 0.6500, specificity: 0.9649, cm: [[220   8]
 [  7  13]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_moe_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_moe_ADASYN_test_GP.csv


mfp scaled
train X: torch.Size([989, 2048]), train y: torch.Size([989]), test X: torch.Size([248, 2048]), test y: torch.Size([248])
Iter 1/300 - Loss: 7.099   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.548   lengthscale: 1.273   noise: 0.809
Iter 21/300 - Loss: 4.472   lengthscale: 2.110   noise: 1.067
Iter 31/300 - Loss: 3.583   lengthscale: 3.200   noise: 1.262
Iter 41/300 - Loss: 3.434   lengthscale: 4.210   noise: 1.298
Iter 51/300 - Loss: 3.413   lengthscale: 4.886   noise: 1.242
Iter 61/300 - Loss: 3.381   lengthscale: 5.310   noise: 1.147
Iter 71/30



accuracy: 0.9262, precision: 1.0000, recall: 0.0519, specificity: 1.0000, cm: [[912   0]
 [ 73   4]]
accuracy: 0.9234, precision: 1.0000, recall: 0.0500, specificity: 1.0000, cm: [[228   0]
 [ 19   1]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_mfp_scaled_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_mfp_scaled_test_GP.csv

mfp UNDER
train X: torch.Size([154, 2048]), train y: torch.Size([154]), test X: torch.Size([248, 2048]), test y: torch.Size([248])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.930   lengthscale: 1.221   noise: 1.297
Iter 21/300 - Loss: 5.427   lengthscale: 2.028   noise: 1.980
Iter 31/300 - Loss: 5.242   lengthscale: 3.128   noise: 2.560
Iter 41/300 - Loss: 5.207   lengthscale: 3.774   noise: 3.001
Iter 51/300 - Loss: 5.193   lengthscale: 3.604   noise: 3.320
Iter 61/300 - Loss: 5.186   lengthscale: 3.228   noise: 3.518
Iter 71/300 



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[77  0]
 [ 0 77]]
accuracy: 0.5484, precision: 0.0965, recall: 0.5500, specificity: 0.5482, cm: [[125 103]
 [  9  11]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_mfp_UNDER_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_mfp_UNDER_test_GP.csv

mfp SMOTE
train X: torch.Size([1824, 2048]), train y: torch.Size([1824]), test X: torch.Size([248, 2048]), test y: torch.Size([248])
Iter 1/300 - Loss: 6.380   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.035   lengthscale: 1.310   noise: 0.842
Iter 21/300 - Loss: 4.321   lengthscale: 2.145   noise: 0.939
Iter 31/300 - Loss: 3.835   lengthscale: 3.027   noise: 0.714
Iter 41/300 - Loss: 3.593   lengthscale: 3.736   noise: 0.360
Iter 51/300 - Loss: 3.477   lengthscale: 4.153   noise: 0.147
Iter 61/300 - Loss: 3.427   lengthscale: 4.370   noise: 0.066
Iter 71/300 - Lo



accuracy: 0.9951, precision: 0.9989, recall: 0.9912, specificity: 0.9989, cm: [[911   1]
 [  8 904]]
accuracy: 0.9556, precision: 0.8462, recall: 0.5500, specificity: 0.9912, cm: [[226   2]
 [  9  11]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_mfp_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_mfp_SMOTE_test_GP.csv

mfp ADASYN
train X: torch.Size([1805, 2048]), train y: torch.Size([1805]), test X: torch.Size([248, 2048]), test y: torch.Size([248])
Iter 1/300 - Loss: 6.427   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.048   lengthscale: 1.308   noise: 0.850
Iter 21/300 - Loss: 4.335   lengthscale: 2.134   noise: 0.952
Iter 31/300 - Loss: 3.851   lengthscale: 3.001   noise: 0.748
Iter 41/300 - Loss: 3.628   lengthscale: 3.698   noise: 0.391
Iter 51/300 - Loss: 3.505   lengthscale: 4.107   noise: 0.162
Iter 61/300 - Loss: 3.418   lengthscale: 4.316   noise: 0.072
Iter 71/300



accuracy: 0.9961, precision: 0.9989, recall: 0.9933, specificity: 0.9989, cm: [[911   1]
 [  6 887]]
accuracy: 0.9516, precision: 0.8333, recall: 0.5000, specificity: 0.9912, cm: [[226   2]
 [ 10  10]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_mfp_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK5/bind/NEK5_binding_mfp_ADASYN_test_GP.csv



NEK9

moe scaled
train X: torch.Size([1126, 306]), train y: torch.Size([1126]), test X: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.109   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.493   lengthscale: 0.955   noise: 0.809
Iter 21/300 - Loss: 4.501   lengthscale: 1.361   noise: 1.073
Iter 31/300 - Loss: 3.896   lengthscale: 1.864   noise: 1.330
Iter 41/300 - Loss: 3.469   lengthscale: 2.510   noise: 1.478
Iter 51/300 - Loss: 3.305   lengthscale: 3.118   noise: 1.501
Iter 61/300 - Loss: 3.238   lengthscale: 3.545   noise: 1.442
Ite

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9574, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1078    0]
 [  48    0]]
accuracy: 0.9541, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[270   0]
 [ 13   0]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_moe_scaled_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_moe_scaled_test_GP.csv

moe UNDER


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


train X: torch.Size([96, 306]), train y: torch.Size([96]), test X: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.929   lengthscale: 1.276   noise: 1.297
Iter 21/300 - Loss: 5.434   lengthscale: 2.092   noise: 1.980
Iter 31/300 - Loss: 5.254   lengthscale: 3.147   noise: 2.565
Iter 41/300 - Loss: 5.178   lengthscale: 4.463   noise: 2.979
Iter 51/300 - Loss: 5.131   lengthscale: 5.887   noise: 3.219
Iter 61/300 - Loss: 5.104   lengthscale: 7.173   noise: 3.311
Iter 71/300 - Loss: 5.090   lengthscale: 8.153   noise: 3.311
Iter 81/300 - Loss: 5.083   lengthscale: 8.839   noise: 3.270
Iter 91/300 - Loss: 5.078   lengthscale: 9.309   noise: 3.215
Iter 101/300 - Loss: 5.074   lengthscale: 9.636   noise: 3.157
Iter 111/300 - Loss: 5.072   lengthscale: 9.870   noise: 3.098
Iter 121/300 - Loss: 5.069   lengthscale: 10.046   noise: 3.037
Iter 131/300 - Loss: 5.067   lengthscale: 10.183   noise: 2.973
Iter 141/3



accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[48  0]
 [ 0 48]]
accuracy: 0.6290, precision: 0.0577, recall: 0.4615, specificity: 0.6370, cm: [[172  98]
 [  7   6]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_moe_UNDER_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_moe_UNDER_test_GP.csv

moe SMOTE
train X: torch.Size([2156, 306]), train y: torch.Size([2156]), test X: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.031   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.732   lengthscale: 1.323   noise: 1.285
Iter 21/300 - Loss: 4.992   lengthscale: 2.208   noise: 1.794
Iter 31/300 - Loss: 4.553   lengthscale: 3.272   noise: 1.810
Iter 41/300 - Loss: 4.214   lengthscale: 4.319   noise: 1.358
Iter 51/300 - Loss: 3.960   lengthscale: 5.210   noise: 0.758
Iter 61/300 - Loss: 3.788   lengthscale: 5.851   noise: 0.337
Iter 71/300 - Loss



accuracy: 0.9995, precision: 0.9991, recall: 1.0000, specificity: 0.9991, cm: [[1077    1]
 [   0 1078]]
accuracy: 0.9435, precision: 0.2000, recall: 0.0769, specificity: 0.9852, cm: [[266   4]
 [ 12   1]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_moe_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_moe_SMOTE_test_GP.csv

moe ADASYN
train X: torch.Size([2164, 306]), train y: torch.Size([2164]), test X: torch.Size([283, 306]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.035   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.743   lengthscale: 1.321   noise: 1.286
Iter 21/300 - Loss: 5.005   lengthscale: 2.204   noise: 1.807
Iter 31/300 - Loss: 4.575   lengthscale: 3.268   noise: 1.839
Iter 41/300 - Loss: 4.229   lengthscale: 4.320   noise: 1.390
Iter 51/300 - Loss: 3.977   lengthscale: 5.219   noise: 0.779
Iter 61/300 - Loss: 3.803   lengthscale: 5.870   noise: 0.347
Iter 71/3



accuracy: 0.9995, precision: 0.9991, recall: 1.0000, specificity: 0.9991, cm: [[1077    1]
 [   0 1086]]
accuracy: 0.9435, precision: 0.2000, recall: 0.0769, specificity: 0.9852, cm: [[266   4]
 [ 12   1]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_moe_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_moe_ADASYN_test_GP.csv


mfp scaled
train X: torch.Size([1126, 2048]), train y: torch.Size([1126]), test X: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.098   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.468   lengthscale: 1.266   noise: 0.809
Iter 21/300 - Loss: 4.339   lengthscale: 2.087   noise: 1.067
Iter 31/300 - Loss: 3.367   lengthscale: 3.172   noise: 1.263
Iter 41/300 - Loss: 3.206   lengthscale: 4.226   noise: 1.292
Iter 51/300 - Loss: 3.167   lengthscale: 5.026   noise: 1.229
Iter 61/300 - Loss: 3.150   lengthscale: 5.606   noise: 1.126
Iter

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9574, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[1078    0]
 [  48    0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9541, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[270   0]
 [ 13   0]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_mfp_scaled_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_mfp_scaled_test_GP.csv

mfp UNDER
train X: torch.Size([96, 2048]), train y: torch.Size([96]), test X: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.931   lengthscale: 0.739   noise: 1.297
Iter 21/300 - Loss: 5.441   lengthscale: 0.509   noise: 1.982
Iter 31/300 - Loss: 5.272   lengthscale: 0.436   noise: 2.576
Iter 41/300 - Loss: 5.222   lengthscale: 0.410   noise: 3.023
Iter 51/300 - Loss: 5.208   lengthscale: 0.400   noise: 3.341
Iter 61/300 - Loss: 5.204   lengthscale: 0.396   noise: 3.562
Iter 71/300 - Loss: 5.203   lengthscale: 0.395   noise: 3.710
Iter 81/300 - Loss: 5.202   lengthscale: 0.394   nois

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 1.0000, precision: 1.0000, recall: 1.0000, specificity: 1.0000, cm: [[48  0]
 [ 0 48]]
accuracy: 0.9541, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[270   0]
 [ 13   0]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_mfp_UNDER_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_mfp_UNDER_test_GP.csv

mfp SMOTE
train X: torch.Size([2156, 2048]), train y: torch.Size([2156]), test X: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 6.135   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.889   lengthscale: 1.311   noise: 0.796
Iter 21/300 - Loss: 4.235   lengthscale: 2.145   noise: 0.887
Iter 31/300 - Loss: 3.715   lengthscale: 3.001   noise: 0.628
Iter 41/300 - Loss: 3.471   lengthscale: 3.682   noise: 0.292
Iter 51/300 - Loss: 3.361   lengthscale: 4.067   noise: 0.116
Iter 61/300 - Loss: 3.310   lengthscale: 4.250   noise: 0.054
Iter 71/300 - Lo



accuracy: 0.9944, precision: 0.9981, recall: 0.9907, specificity: 0.9981, cm: [[1076    2]
 [  10 1068]]
accuracy: 0.9505, precision: 0.3333, recall: 0.0769, specificity: 0.9926, cm: [[268   2]
 [ 12   1]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_mfp_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_mfp_SMOTE_test_GP.csv

mfp ADASYN
train X: torch.Size([2166, 2048]), train y: torch.Size([2166]), test X: torch.Size([283, 2048]), test y: torch.Size([283])
Iter 1/300 - Loss: 6.103   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 4.872   lengthscale: 1.308   noise: 0.795
Iter 21/300 - Loss: 4.220   lengthscale: 2.137   noise: 0.880
Iter 31/300 - Loss: 3.710   lengthscale: 2.989   noise: 0.613
Iter 41/300 - Loss: 3.453   lengthscale: 3.667   noise: 0.282
Iter 51/300 - Loss: 3.340   lengthscale: 4.048   noise: 0.112
Iter 61/300 - Loss: 3.290   lengthscale: 4.227   noise: 0.053
Iter 71



accuracy: 0.9940, precision: 0.9981, recall: 0.9899, specificity: 0.9981, cm: [[1076    2]
 [  11 1077]]
accuracy: 0.9505, precision: 0.3333, recall: 0.0769, specificity: 0.9926, cm: [[268   2]
 [ 12   1]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_mfp_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/bind/NEK9_binding_mfp_ADASYN_test_GP.csv



moe scaled
train X: torch.Size([313, 306]), train y: torch.Size([313]), test X: torch.Size([80, 306]), test y: torch.Size([80])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.617   lengthscale: 0.898   noise: 0.809
Iter 21/300 - Loss: 4.738   lengthscale: 1.241   noise: 1.073
Iter 31/300 - Loss: 4.267   lengthscale: 1.742   noise: 1.333
Iter 41/300 - Loss: 3.961   lengthscale: 2.382   noise: 1.508
Iter 51/300 - Loss: 3.754   lengthscale: 3.116   noise: 1.573
Iter 61/300 - Loss: 3.662   lengthscale: 3.758   noise: 1.543
Iter 71/3



Iter 41/300 - Loss: 5.222   lengthscale: 0.693   noise: 3.023
Iter 51/300 - Loss: 5.208   lengthscale: 0.693   noise: 3.341
Iter 61/300 - Loss: 5.204   lengthscale: 0.693   noise: 3.562
Iter 71/300 - Loss: 5.203   lengthscale: 0.693   noise: 3.710
Iter 81/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.804
Iter 91/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.858
Iter 101/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.885
Iter 111/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.897
Iter 121/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.899
Iter 131/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.898
Iter 141/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.896
Iter 151/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.895
Iter 161/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.894
Iter 171/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.893
Iter 181/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.893
Iter 191/300 - Loss: 5.202   lengthscale: 0.693   noise: 3.89



train X: torch.Size([560, 306]), train y: torch.Size([560]), test X: torch.Size([80, 306]), test y: torch.Size([80])
Iter 1/300 - Loss: 7.072   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.831   lengthscale: 1.321   noise: 1.292
Iter 21/300 - Loss: 5.165   lengthscale: 2.202   noise: 1.892
Iter 31/300 - Loss: 4.714   lengthscale: 3.316   noise: 2.103
Iter 41/300 - Loss: 4.383   lengthscale: 4.463   noise: 1.785
Iter 51/300 - Loss: 4.107   lengthscale: 5.480   noise: 1.153
Iter 61/300 - Loss: 3.898   lengthscale: 6.283   noise: 0.570
Iter 71/300 - Loss: 3.766   lengthscale: 6.871   noise: 0.250
Iter 81/300 - Loss: 3.699   lengthscale: 7.290   noise: 0.124
Iter 91/300 - Loss: 3.666   lengthscale: 7.584   noise: 0.076
Iter 101/300 - Loss: 3.648   lengthscale: 7.789   noise: 0.054
Iter 111/300 - Loss: 3.638   lengthscale: 7.931   noise: 0.042
Iter 121/300 - Loss: 3.630   lengthscale: 8.030   noise: 0.035
Iter 131/300 - Loss: 3.625   lengthscale: 8.102   noise: 0.029
Iter 141/300



accuracy: 0.9946, precision: 0.9894, recall: 1.0000, specificity: 0.9893, cm: [[277   3]
 [  0 280]]
accuracy: 0.9500, precision: 0.8571, recall: 0.6667, specificity: 0.9859, cm: [[70  1]
 [ 3  6]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_moe_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_moe_SMOTE_test_GP.csv

moe ADASYN
train X: torch.Size([560, 306]), train y: torch.Size([560]), test X: torch.Size([80, 306]), test y: torch.Size([80])
Iter 1/300 - Loss: 7.067   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.808   lengthscale: 1.319   noise: 1.291
Iter 21/300 - Loss: 5.119   lengthscale: 2.198   noise: 1.868
Iter 31/300 - Loss: 4.698   lengthscale: 3.290   noise: 2.032
Iter 41/300 - Loss: 4.385   lengthscale: 4.395   noise: 1.693
Iter 51/300 - Loss: 4.113   lengthscale: 5.384   noise: 1.070
Iter 61/300 - Loss: 3.909   lengthscale: 6.167   noise: 0.518
Iter 71/300 -



accuracy: 0.9500, precision: 0.8571, recall: 0.6667, specificity: 0.9859, cm: [[70  1]
 [ 3  6]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_moe_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_moe_ADASYN_test_GP.csv


mfp scaled
train X: torch.Size([313, 2048]), train y: torch.Size([313]), test X: torch.Size([80, 2048]), test y: torch.Size([80])
Iter 1/300 - Loss: 7.095   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.598   lengthscale: 1.247   noise: 0.808
Iter 21/300 - Loss: 4.618   lengthscale: 2.062   noise: 1.068
Iter 31/300 - Loss: 3.798   lengthscale: 3.169   noise: 1.276
Iter 41/300 - Loss: 3.570   lengthscale: 4.257   noise: 1.311
Iter 51/300 - Loss: 3.520   lengthscale: 4.975   noise: 1.232
Iter 61/300 - Loss: 3.493   lengthscale: 5.395   noise: 1.100
Iter 71/300 - Loss: 3.472   lengthscale: 5.653   noise: 0.947
Iter 81/300 - Loss: 3.454   lengthscale: 5.831 



accuracy: 0.9681, precision: 0.9600, recall: 0.7273, specificity: 0.9964, cm: [[279   1]
 [  9  24]]
accuracy: 0.9375, precision: 0.8333, recall: 0.5556, specificity: 0.9859, cm: [[70  1]
 [ 4  5]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_mfp_scaled_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_mfp_scaled_test_GP.csv

mfp UNDER
train X: torch.Size([66, 2048]), train y: torch.Size([66]), test X: torch.Size([80, 2048]), test y: torch.Size([80])
Iter 1/300 - Loss: 7.107   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.943   lengthscale: 1.000   noise: 1.298
Iter 21/300 - Loss: 5.456   lengthscale: 1.269   noise: 1.995
Iter 31/300 - Loss: 5.292   lengthscale: 1.545   noise: 2.620
Iter 41/300 - Loss: 5.240   lengthscale: 1.154   noise: 3.116
Iter 51/300 - Loss: 5.225   lengthscale: 0.866   noise: 3.485
Iter 61/300 - Loss: 5.220   lengthscale: 0.711   noise: 3.755
Iter 71/300 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


accuracy: 0.9848, precision: 0.9706, recall: 1.0000, specificity: 0.9697, cm: [[32  1]
 [ 0 33]]
accuracy: 0.8875, precision: 0.0000, recall: 0.0000, specificity: 1.0000, cm: [[71  0]
 [ 9  0]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_mfp_UNDER_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_mfp_UNDER_test_GP.csv

mfp SMOTE
train X: torch.Size([560, 2048]), train y: torch.Size([560]), test X: torch.Size([80, 2048]), test y: torch.Size([80])
Iter 1/300 - Loss: 6.467   lengthscale: 0.693   noise: 0.693
Iter 11/300 - Loss: 5.084   lengthscale: 1.316   noise: 0.903
Iter 21/300 - Loss: 4.371   lengthscale: 2.143   noise: 0.974
Iter 31/300 - Loss: 3.829   lengthscale: 3.054   noise: 0.738
Iter 41/300 - Loss: 3.573   lengthscale: 3.824   noise: 0.374
Iter 51/300 - Loss: 3.443   lengthscale: 4.320   noise: 0.152
Iter 61/300 - Loss: 3.382   lengthscale: 4.631   noise: 0.068
Iter 71/300 - Lo



accuracy: 0.9911, precision: 0.9893, recall: 0.9929, specificity: 0.9893, cm: [[277   3]
 [  2 278]]
accuracy: 0.9500, precision: 0.8571, recall: 0.6667, specificity: 0.9859, cm: [[70  1]
 [ 3  6]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_mfp_SMOTE_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_mfp_SMOTE_test_GP.csv

mfp ADASYN
train X: torch.Size([555, 2048]), train y: torch.Size([555]), test X: torch.Size([80, 2048]), test y: torch.Size([80])
Iter 1/300 - Loss: 6.282   lengthscale: 0.693   noise: 0.693




Iter 11/300 - Loss: 4.992   lengthscale: 1.312   noise: 0.798
Iter 21/300 - Loss: 4.359   lengthscale: 2.129   noise: 0.917
Iter 31/300 - Loss: 3.855   lengthscale: 3.039   noise: 0.697
Iter 41/300 - Loss: 3.606   lengthscale: 3.806   noise: 0.346
Iter 51/300 - Loss: 3.485   lengthscale: 4.278   noise: 0.138
Iter 61/300 - Loss: 3.431   lengthscale: 4.542   noise: 0.062
Iter 71/300 - Loss: 3.409   lengthscale: 4.690   noise: 0.035
Iter 81/300 - Loss: 3.397   lengthscale: 4.775   noise: 0.025
Iter 91/300 - Loss: 3.391   lengthscale: 4.828   noise: 0.019
Iter 101/300 - Loss: 3.386   lengthscale: 4.868   noise: 0.016
Iter 111/300 - Loss: 3.383   lengthscale: 4.903   noise: 0.013
Iter 121/300 - Loss: 3.380   lengthscale: 4.938   noise: 0.012
Iter 131/300 - Loss: 3.378   lengthscale: 4.975   noise: 0.010
Iter 141/300 - Loss: 3.376   lengthscale: 5.013   noise: 0.009
Iter 151/300 - Loss: 3.374   lengthscale: 5.050   noise: 0.008
Iter 161/300 - Loss: 3.372   lengthscale: 5.085   noise: 0.008
I



accuracy: 0.9946, precision: 0.9892, recall: 1.0000, specificity: 0.9893, cm: [[277   3]
 [  0 275]]
accuracy: 0.9500, precision: 0.8571, recall: 0.6667, specificity: 0.9859, cm: [[70  1]
 [ 3  6]]
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_mfp_ADASYN_train_GP.csv
/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/NEK9/inhib/NEK9_inhibition_mfp_ADASYN_test_GP.csv




# Adding more metrics to csv results files 

In [45]:
from sklearn.metrics import precision_score, recall_score, roc_auc_score, matthews_corrcoef, balanced_accuracy_score, confusion_matrix
import sys
sys.path.append('/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/')
from RF_atomver import *

In [46]:
def add_cm(filepath, filename): 
    # print(filepath)
    # print(filename)
    df = pd.read_csv(filepath+filename)
    
    true_labels = df['y'] 
    predictions = df['y_pred']
    cm = confusion_matrix(true_labels, predictions )
    cm_flattened = cm.flatten().tolist()
    df['cm'] = [cm_flattened]* len(df)
    df['prediction_type'] = df.apply(lambda x: prediction_type(x['y'], x['y_pred']), axis=1)
    return df
        

In [None]:
data_dir = '/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/'
gp_result_path = '/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/GP/GP_results/'
# capstone/atom2024/atom2024/notebooks/NEK/GP/GP_results/NEK2_binding_mfp_ADASYN_test_GP.csv
nek_nums = [2,3,5,9]
samplings = ['scaled', 'UNDER', 'SMOTE', 'ADASYN'] 
features = ['moe', 'mfp']
NEK= 'NEK'
for i, n in enumerate(nek_nums):
    nek = str(n)
    print(f'NEK{nek}')
    nek_path= f'{data_dir}NEK{nek}/bind/'
    
    for k, feat in enumerate(features): 
        print()
        for j, samp in enumerate(samplings): 
        
            print(f'NEK{nek} {feat} {samp}')
            file_root = f'NEK{nek}_binding_{feat}_{samp}'
            train_file = f'{file_root}_train_GP.csv'
            test_file = f'{file_root}_test_GP.csv'

            train_df = add_cm(gp_result_path,train_file)
            test_df = add_cm(gp_result_path,test_file)
            train_df.to_csv(f'{gp_result_path}{file_root}_train_GP.csv',index=False) 
            test_df.to_csv(f'{gp_result_path}{file_root}_test_GP.csv',index=False) 
            
            print()
            
            
    print()
    
    if n == 2 or n == 9:

        nek_path= f'{data_dir}NEK{nek}/inhib/'
        for k, feat in enumerate(features): 
            print()
            for j, samp in enumerate(samplings): 
                file_root = f'NEK{nek}_inhibition_{feat}_{samp}'
                print(f'NEK{nek} {feat} {samp}')
                print(f'NEK{nek} {feat} {samp}')
                file_root = f'NEK{nek}_inhibition_{feat}_{samp}'
                train_file = f'{file_root}_train_GP.csv'
                test_file = f'{file_root}_test_GP.csv'
    
                train_df = add_cm(gp_result_path,train_file)
                test_df = add_cm(gp_result_path,test_file)
                train_df.to_csv(f'{gp_result_path}{file_root}_train_GP.csv',index=False) 
                test_df.to_csv(f'{gp_result_path}{file_root}_test_GP.csv',index=False) 
                print()
        
            
    print()

In [39]:

# source = '/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/GP/'
# dest = '/Users/jayceepang/msse/capstone/atom2024/atom2024/notebooks/NEK/GP/GP_results/'

# if not os.path.exists(source):
#     os.makedirs(dest)

# for root, dirs, files in os.walk(source):
#     for file in files:
#         if file.endswith('_GP.csv'):
#             source_file = os.path.join(root, file)
#             dest_file = os.path.join(dest, file)
#             shutil.move(source_file,dest_file)
#             print(f"Moved: {source_file} to {dest_file}")


