# Lending Tree Loan Status: SMOTE
## Classification - K-Nearest Neighbors HPO GPU

## Set Up Environment, Read Data, Split Train/Test Sets

In [None]:
from google.colab import drive 
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/RAPIDS/

/content/drive/MyDrive/RAPIDS


In [None]:
# Clone RAPIDS-Colab install files and see if GPU is compatible
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!python rapidsai-csp-utils/colab/env-check.py

In [None]:
# Update the Colab environment and restart the kernel
!bash rapidsai-csp-utils/colab/update_gcc.sh
import os
os._exit(00)

In [None]:
# Install CondaColab and restart the kernel 
import condacolab
condacolab.install()

⏬ Downloading https://github.com/jaimergp/miniforge/releases/latest/download/Mambaforge-colab-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:29
🔁 Restarting kernel...


In [None]:
# See if environment is ready to install RAPIDS
import condacolab
condacolab.check()

✨🍰✨ Everything looks OK!


In [None]:
# Install RAPIDS using the 'stable' release
!python rapidsai-csp-utils/colab/install_rapids.py stable
import os
os.environ['NUMBAPRO_NVVM'] = '/usr/local/cuda/nvvm/lib64/libnvvm.so'
os.environ['NUMBAPRO_LIBDEVICE'] = '/usr/local/cuda/nvvm/libdevice/'
os.environ['CONDA_PREFIX'] = '/usr/local'

In [None]:
# Install/import dependencies
!pip install optuna
!pip install dask_optuna
import os
import warnings
import random
import numpy as np
import cupy as cp
from cupy import asnumpy
import dask
from dask.distributed import Client, wait
from dask.diagnostics import ProgressBar
from dask.utils import parse_bytes
from dask_cuda import LocalCUDACluster
import dask_cudf
import dask_optuna
import urllib.request
from contextlib import contextmanager
import time
from datetime import datetime, timedelta
from timeit import default_timer as timer
import cudf
import cumlt
from cuml.neighbors import KNeighborsClassifier
import optuna
from optuna import Trial
optuna.logging.set_verbosity(optuna.logging.WARNING) 
import joblib
import pickle
import pandas as pd
import sklearn
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from sklearn.metrics import recall_score, precision_score
from sklearn.metrics import classification_report, confusion_matrix
import plotly.offline as py
import matplotlib.pyplot as plt
import seaborn as sns
warnings.filterwarnings('ignore')
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
print('\n')
!/usr/local/cuda/bin/nvcc --version
!nvidia-smi

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-2.10.1-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 5.2 MB/s 
[?25hCollecting alembic
  Downloading alembic-1.8.0-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 53.1 MB/s 
[?25hCollecting colorlog
  Downloading colorlog-6.6.0-py2.py3-none-any.whl (11 kB)
Collecting sqlalchemy>=1.1.0
  Downloading SQLAlchemy-1.4.39-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 52.4 MB/s 
[?25hCollecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting cliff
  Downloading cliff-3.10.1-py3-none-any.whl (81 kB)
[K     |████████████████████████████████| 81 kB 8.5 MB/s 
Collecting greenlet!=0.4.17
  Downloading greenlet-1.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_

In [None]:
# Define function to time code blocks
@contextmanager
def timed(name):
    t0 = time.time()
    yield
    t1 = time.time()
    print('..%-24s:  %8.4f' % (name, t1 - t0))

In [None]:
# Set up local CUDA cluster for Dask
cluster = LocalCUDACluster(threads_per_worker=1, ip='', dashboard_address='8081') 
c = Client(cluster)

# Query the client for all connected workers
workers = c.has_what().keys()
n_workers = len(workers)
c

0,1
Connection method: Cluster object,Cluster type: dask_cuda.LocalCUDACluster
Dashboard: http://172.28.0.2:8081/status,

0,1
Dashboard: http://172.28.0.2:8081/status,Workers: 1
Total threads: 1,Total memory: 25.46 GiB
Status: running,Using processes: True

0,1
Comm: tcp://172.28.0.2:46077,Workers: 1
Dashboard: http://172.28.0.2:8081/status,Total threads: 1
Started: Just now,Total memory: 25.46 GiB

0,1
Comm: tcp://172.28.0.2:35693,Total threads: 1
Dashboard: http://172.28.0.2:43883/status,Memory: 25.46 GiB
Nanny: tcp://172.28.0.2:46245,
Local directory: /content/drive/MyDrive/LoanStatus/ML/RAPIDS/dask-worker-space/worker-ixfhinnp,Local directory: /content/drive/MyDrive/LoanStatus/ML/RAPIDS/dask-worker-space/worker-ixfhinnp
GPU: Tesla P100-PCIE-16GB,GPU memory: 15.90 GiB


In [None]:
# Set seed 
seed_value = 42
os.environ['KNN_GPU'] = str(seed_value)
random.seed(seed_value)
cp.random.seed(seed_value)
np.random.seed(seed_value)

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Data/

/content/drive/MyDrive/LoanStatus/Data


In [None]:
# Read data
trainDF = cudf.read_csv('trainDF_SMOTE.csv', low_memory=False)
print('Train set: Number of rows and columns:', trainDF.shape)

testDF = cudf.read_csv('testDF_SMOTE.csv', low_memory=False)
print('Test set: Number of rows and columns:', testDF.shape)

Train set: Number of rows and columns: (3022132, 51)
Test set: Number of rows and columns: (432473, 51)


In [None]:
# Set up features and target
X_train, y_train = trainDF.drop('loan_status', 
                                axis=1), trainDF['loan_status'].astype('int32')
X_train = X_train.astype('float32')

X_test, y_test= testDF.drop('loan_status', 
                            axis=1), testDF['loan_status'].astype('int32')
X_test = X_test.astype('float32')

## Weighted F1 

### 100 Trials

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL/

# Define a name for the trials
study_name = 'dask_knn_optuna_SMOTE_100_weightedF1_tpe'

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL


In [None]:
# Define function to train/evaluate the model
def train_and_eval(X_param, y_param, n_neighbors=10, 
                   metric='euclidean', verbose=False): 
    """
    Partition data into train/test sets, train and evaluate the model
    for the given parameters.
        
    Params
    ______
        
    X_param:  DataFrame. 
              The data to use for training and testing. 
    y_param:  Series. 
              The label for training

    Returns
    score: F1 weighted of the fitted model
    """
    
    # Set up train/test sets
    X_train, y_train = trainDF.drop('loan_status', 
                                    axis=1), trainDF['loan_status'].astype('int32')
    X_train = X_train.astype('float32')

    X_test, y_test= testDF.drop('loan_status', 
                                axis=1), testDF['loan_status'].astype('int32')
    X_test = X_test.astype('float32')

    # Define model
    model = KNeighborsClassifier(n_neighbors=n_neighbors, 
                                 metric=metric, 
                                 verbose=verbose)
    
    # Start timer for each trial
    start = timer()

    # Fit model
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    score = f1_score(y_test.to_numpy(), y_pred.to_numpy(), average='weighted')
    run_time = timer() - start
    print('- Trial time:', run_time) 
    print('- Trial weighted F1:', score)
    print('######################################################')

    return score

In [None]:
print('Score with default parameters : ', train_and_eval(X_train, y_train))

- Trial time: 40.219344400999944
- Trial weighted F1: 0.8629177061332236
######################################################
Score with default parameters :  0.8629177061332236


In [None]:
# Define objective function with the parameters to be tested
def objective(trial, X_param, y_param):
    
    joblib.dump(study, 'KNN_Optuna_SMOTE_100_GPU_weightedF1.pkl')
    
    # Search parameters
    n_neighbors = trial.suggest_int('n_neighbors', 3, 50) 
    metric = trial.suggest_categorical('metric', ['euclidean', 'manhattan', 
                                                  'chebyshev', 'minkowski'])

    score = train_and_eval(X_param, y_param,
                           n_neighbors=n_neighbors, 
                           verbose=False)

    return score

In [None]:
with timed('dask_optuna'):
    # Begin HPO trials 
    # Start timer for experiment
    start_time = datetime.now()
    print('%-20s %s' % ('Start Time', start_time))
    if os.path.isfile('KNN_Optuna_SMOTE_100_GPU_weightedF1.pkl'): 
      study = joblib.load('KNN_Optuna_SMOTE_100_GPU_weightedF1.pkl')
    else: 
      study = optuna.create_study(sampler=optuna.samplers.TPESampler(), 
                                  study_name=study_name,
                                  direction='maximize')
      
    # Optimize in parallel on Dask cluster
    with parallel_backend('dask'):
        study.optimize(lambda trial: objective(trial, X_train, y_train), 
                       n_trials=100,
                       n_jobs=n_workers)

# End timer for experiment
end_time = datetime.now()
print('%-20s %s' % ('Start Time', start_time))
print('%-20s %s' % ('End Time', end_time))
print(str(timedelta(seconds=(end_time-start_time).seconds)))
print('\n')
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)
print('Highest F1 Score', study.best_value)

Start Time           2022-06-15 03:40:57.848260
- Trial time: 37.80832760899989
- Trial weighted F1: 0.8359641577884183
######################################################
- Trial time: 38.034264845999814
- Trial weighted F1: 0.8344895377359629
######################################################
- Trial time: 37.14290788299991
- Trial weighted F1: 0.8387366839780154
######################################################
- Trial time: 36.59460053199996
- Trial weighted F1: 0.8441244386649177
######################################################
- Trial time: 36.316031345000056
- Trial weighted F1: 0.8480416698060846
######################################################
- Trial time: 36.92772758399997
- Trial weighted F1: 0.8408875880107282
######################################################
- Trial time: 36.28065224300008
- Trial weighted F1: 0.8754888483580192
######################################################
- Trial time: 36.287281455000084
- Trial weighted F1: 0.85314

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/trialOptions/

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/trialOptions


In [None]:
# Output from HPO trials
trials_df = study.trials_dataframe()
trials_df.rename(columns={'number': 'iteration'}, inplace=True)
trials_df.rename(columns={'value': 'f1_weighted'}, inplace=True)
trials_df.rename(columns={'params_metric': 'metric'}, inplace=True)
trials_df.rename(columns={'params_n_neighbors': 'n_neighbors'}, inplace=True)

# Write processed data to csv
trials_df = trials_df.sort_values('f1_weighted', ascending=False)
trials_df.to_csv('KNN_Optuna_SMOTE_100_GPU_weightedF1.csv', index=False)
print(trials_df) 

    iteration  f1_weighted             datetime_start  \
63         63     0.887127 2022-06-15 04:19:24.813360   
60         60     0.887127 2022-06-15 04:17:36.183038   
56         56     0.887127 2022-06-15 04:15:11.340191   
71         71     0.887127 2022-06-15 04:24:17.413753   
87         87     0.887127 2022-06-15 04:33:58.036005   
..        ...          ...                        ...   
29         29     0.836708 2022-06-15 03:58:44.106483   
0           0     0.835964 2022-06-15 03:40:57.853508   
1           1     0.834490 2022-06-15 03:41:35.790828   
51         51     0.832530 2022-06-15 04:12:07.662774   
99         99          NaN 2022-06-15 04:41:13.517968   

            datetime_complete               duration     metric  n_neighbors  \
63 2022-06-15 04:20:01.025956 0 days 00:00:36.212596  manhattan          4.0   
60 2022-06-15 04:18:12.431706 0 days 00:00:36.248668  manhattan          4.0   
56 2022-06-15 04:15:47.583367 0 days 00:00:36.243176  manhattan          4.

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_Explanations/

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_Explanations


In [None]:
# Plot_optimization_history: shows the scores from all trials as well as the best score so far at each point.
fig = optuna.visualization.plot_optimization_history(study)
py.plot(fig, filename='optimizationHistory_KNN_Optuna_SMOTE_100_GPU_weightedF1.html')
fig.show()

In [None]:
# plot_parallel_coordinate: interactively visualizes the hyperparameters and scores
fig = optuna.visualization.plot_parallel_coordinate(study)
py.plot(fig, filename='parallelCoordinate_KNN_Optuna_SMOTE_100_GPU_weightedF1.html')
fig.show()

In [None]:
# plot_slice: shows the change of the hyperparamters space over the search. 
fig = optuna.visualization.plot_slice(study)
py.plot(fig, filename='slice_KNN_Optuna_SMOTE_100_GPU_weightedF1.html')
fig.show()

In [None]:
# Visualize empirical distribution function
fig = optuna.visualization.plot_edf(study)
py.plot(fig, filename='edf_KNN_Optuna_SMOTE_100_GPU_weightedF1.html')
fig.show()

In [None]:
# Arrange best parameters to fit model for model metrics
params = study.best_params   
params

{'metric': 'euclidean', 'n_neighbors': 4}

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL/

# Define a name for the trials
study_name = 'dask_knn_optuna_SMOTE_100_weightedF1_tpe'

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL


In [None]:
# Re-create the best model and train on the training data
best_model = KNeighborsClassifier(n_neighbors=4, metric='euclidean')

# Fit the model
best_model.fit(X_train, y_train)

# Save model
Pkl_Filename = 'KNN_Optuna_SMOTE_trials100_GPU_weightedF1.pkl'  

with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(best_model, file)

# =============================================================================
# # To load saved model
# model = joblib.load('KNN_Optuna_SMOTE_trials100_GPU_weightedF1.pkl')
# print(model)
# =============================================================================

In [None]:
print('\nModel Metrics for KNN HPO SMOTE 100trials GPU')
# Predict based on training 
y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)

print('\n')
print('Classification Report:')
clf_rpt = classification_report(y_test.to_numpy(), y_test_pred.to_numpy())
print(clf_rpt)
print('\n')
print('Confusion matrix:')
print(confusion_matrix(y_test.to_numpy(), y_test_pred.to_numpy()))
print('\n')
print('Accuracy score : %.3f' % accuracy_score(y_test.to_numpy(), 
                                               y_test_pred.to_numpy()))
print('Precision score : %.3f' % precision_score(y_test.to_numpy(), 
                                                 y_test_pred.to_numpy()))
print('Recall score : %.3f' % recall_score(y_test.to_numpy(), 
                                           y_test_pred.to_numpy()))
print('F1 score : %.3f' % f1_score(y_test.to_numpy(), y_test_pred.to_numpy()))


Model Metrics for KNN HPO SMOTE 100trials GPU


Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.91      0.93    377848
           1       0.52      0.70      0.60     54625

    accuracy                           0.88    432473
   macro avg       0.74      0.80      0.76    432473
weighted avg       0.90      0.88      0.89    432473



Confusion matrix:
[[342198  35650]
 [ 16384  38241]]


Accuracy score : 0.880
Precision score : 0.518
Recall score : 0.700
F1 score : 0.595


In [None]:
# Evaluate on the testing data 
print('The best model from SMOTE 100 GPU trials optimization scores {:.5f} AUC ROC on the test set.'.format(roc_auc_score(y_test.to_numpy(), 
                                                                                                                          y_test_pred.to_numpy())))
print('This was achieved using these conditions:')
print(trials_df.iloc[0])

The best model from SMOTE 100 GPU trials optimization scores 0.80286 AUC ROC on the test set.
This was achieved using these conditions:
iteration                                    63
f1_weighted                            0.887127
datetime_start       2022-06-15 04:19:24.813360
datetime_complete    2022-06-15 04:20:01.025956
duration                 0 days 00:00:36.212596
metric                                manhattan
n_neighbors                                 4.0
state                                  COMPLETE
Name: 63, dtype: object


## Weighted ROC

### 100 Trials

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL/

# Define a name for the trials
study_name = 'dask_knn_optuna_SMOTE_100_weightedROC_tpe'

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL


In [None]:
# Define function to train/evaluate the model
def train_and_eval(X_param, y_param, n_neighbors=10, 
                   metric='euclidean', verbose=False): 
    """
    Partition data into train/test sets, train and evaluate the model
    for the given parameters.
        
    Params
    ______
        
    X_param:  DataFrame. 
              The data to use for training and testing. 
    y_param:  Series. 
              The label for training

    Returns
    score: ROC weighted of the fitted model
    """
    
    # Set up train/test sets
    X_train, y_train = trainDF.drop('loan_status', 
                                    axis=1), trainDF['loan_status'].astype('int32')
    X_train = X_train.astype('float32')

    X_test, y_test= testDF.drop('loan_status', 
                                axis=1), testDF['loan_status'].astype('int32')
    X_test = X_test.astype('float32')

    # Define model
    model = KNeighborsClassifier(n_neighbors=n_neighbors, 
                                 metric=metric, 
                                 verbose=verbose)
    
    # Start timer for each trial
    start = timer()

    # Fit model
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    score = roc_auc_score(y_test.to_numpy(), y_pred.to_numpy(), 
                          average='weighted')
    run_time = timer() - start
    print('- Trial time:', run_time) 
    print('- Trial weighted ROC:', score)
    print('######################################################')

    return score

In [None]:
print('Score with default parameters : ', train_and_eval(X_train, y_train))

- Trial time: 90.05875284800004
- Trial weighted ROC: 0.8128493671761852
######################################################
Score with default parameters :  0.8128493671761852


In [None]:
# Define objective function with the parameters to be tested
def objective(trial, X_param, y_param):

    joblib.dump(study, 'KNN_Optuna_SMOTE_100_GPU_ROCweighted.pkl')
    
    # Search parameters
    n_neighbors = trial.suggest_int('n_neighbors', 3, 50) 
    metric = trial.suggest_categorical('metric', ['euclidean', 'manhattan', 
                                                  'chebyshev', 'minkowski'])

    score = train_and_eval(X_param, y_param,
                           n_neighbors=n_neighbors, 
                           verbose=False)

    return score

In [None]:
with timed('dask_optuna'):
    # Begin HPO trials 
    # Start timer for experiment
    start_time = datetime.now()
    print('%-20s %s' % ('Start Time', start_time))
    if os.path.isfile('KNN_Optuna_SMOTE_100_GPU_ROCweighted.pkl'): 
      study = joblib.load('KNN_Optuna_SMOTE_100_GPU_ROCweighted.pkl')
    else: 
      study = optuna.create_study(sampler=optuna.samplers.TPESampler(), 
                                  study_name=study_name,
                                  direction='maximize')
      
    # Optimize in parallel on Dask cluster
    with parallel_backend('dask'):
        study.optimize(lambda trial: objective(trial, X_train, y_train), 
                       n_trials=100,
                       n_jobs=n_workers)

# End timer for experiment
end_time = datetime.now()
print('%-20s %s' % ('Start Time', start_time))
print('%-20s %s' % ('End Time', end_time))
print(str(timedelta(seconds=(end_time-start_time).seconds)))
print('\n')
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)
print('Highest ROCweighted score', study.best_value)

Start Time           2022-06-15 10:42:53.325264
- Trial time: 95.52003119699975
- Trial weighted ROC: 0.8214216063393962
######################################################
- Trial time: 88.91099538100025
- Trial weighted ROC: 0.8060404902687008
######################################################
- Trial time: 95.72916410700009
- Trial weighted ROC: 0.8190257901582789
######################################################
- Trial time: 89.08241719699981
- Trial weighted ROC: 0.8032500025314987
######################################################
- Trial time: 92.14610471200012
- Trial weighted ROC: 0.8189230795020938
######################################################
- Trial time: 94.36857157600025
- Trial weighted ROC: 0.818811075532316
######################################################
- Trial time: 92.1999644829998
- Trial weighted ROC: 0.8215751888800877
######################################################
- Trial time: 91.89845101900028
- Trial weighted ROC: 0.81

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/trialOptions/

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/trialOptions


In [None]:
# Output from HPO trials
trials_df = study.trials_dataframe()
trials_df.rename(columns={'number': 'iteration'}, inplace=True)
trials_df.rename(columns={'value': 'roc_weighted'}, inplace=True)
trials_df.rename(columns={'params_metric': 'metric'}, inplace=True)

# Write processed data to csv
trials_df = trials_df.sort_values('roc_weighted', ascending=False)
trials_df.to_csv('KNN_Optuna_SMOTE_100_GPU_ROCweighted.csv', index=False)
print(trials_df) 

    iteration  roc_weighted             datetime_start  \
31         31      0.821708 2022-06-15 11:31:17.719770   
82         82      0.821708 2022-06-15 12:51:43.905532   
64         64      0.821708 2022-06-15 12:23:23.186703   
57         57      0.821708 2022-06-15 12:12:22.620830   
55         55      0.821708 2022-06-15 12:09:15.684258   
..        ...           ...                        ...   
72         72      0.814490 2022-06-15 12:36:02.101745   
9           9      0.811299 2022-06-15 10:56:50.160364   
58         58      0.808114 2022-06-15 12:13:58.051609   
1           1      0.806040 2022-06-15 10:44:28.983375   
3           3      0.803250 2022-06-15 10:47:33.919397   

            datetime_complete               duration     metric  n_neighbors  \
31 2022-06-15 11:32:53.068772 0 days 00:01:35.349002  manhattan           42   
82 2022-06-15 12:53:18.468557 0 days 00:01:34.563025  chebyshev           42   
64 2022-06-15 12:24:58.185676 0 days 00:01:34.998973  chebyshev

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_Explanations/

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_Explanations


In [None]:
# Plot_optimization_history: shows the scores from all trials as well as the best score so far at each point.
fig = optuna.visualization.plot_optimization_history(study)
py.plot(fig, filename='optimizationHistory_KNN_Optuna_SMOTE_100_GPU_ROCweighted.html')
fig.show()

In [None]:
# plot_parallel_coordinate: interactively visualizes the hyperparameters and scores
fig = optuna.visualization.plot_parallel_coordinate(study)
py.plot(fig, filename='parallelCoordinate_KNN_Optuna_SMOTE_100_GPU_ROCweighted.html')
fig.show()

In [None]:
# plot_slice: shows the change of the hyperparamters space over the search. 
fig = optuna.visualization.plot_slice(study)
py.plot(fig, filename='slice_KNN_Optuna_SMOTE_100_GPU_ROCweighted.html')
fig.show()

In [None]:
# Visualize empirical distribution function
fig = optuna.visualization.plot_edf(study)
py.plot(fig, filename='edf_KNN_Optuna_SMOTE_100_GPU_ROCweighted.html')
fig.show()

In [None]:
# Arrange best parameters to fit model for model metrics
params = study.best_params   
params

{'metric': 'manhattan', 'n_neighbors': 42}

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL/

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL


In [None]:
# Re-create the best model and train on the training data
best_model = KNeighborsClassifier(n_neighbors=42, metric='manhattan')

# Fit the model
best_model.fit(X_train, y_train)

# Save model
Pkl_Filename = 'KNN_Optuna_SMOTE_trials100_GPU_ROCweighted.pkl'  

with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(best_model, file)

# =============================================================================
# # To load saved model
# model = joblib.load('KNN_Optuna_SMOTE_trials100_GPU_ROCweighted.pkl')
# print(model)
# =============================================================================

In [None]:
print('\nModel Metrics for KNN HPO SMOTE 100trials GPU ROCweighted')
# Predict based on training 
y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)

print('\n')
print('Classification Report:')
clf_rpt = classification_report(y_test.to_numpy(), y_test_pred.to_numpy())
print(clf_rpt)
print('\n')
print('Confusion matrix:')
print(confusion_matrix(y_test.to_numpy(), y_test_pred.to_numpy()))
print('\n')
print('Accuracy score : %.3f' % accuracy_score(y_test.to_numpy(), 
                                               y_test_pred.to_numpy()))
print('Precision score : %.3f' % precision_score(y_test.to_numpy(), 
                                                 y_test_pred.to_numpy()))
print('Recall score : %.3f' % recall_score(y_test.to_numpy(), 
                                           y_test_pred.to_numpy()))
print('F1 score : %.3f' % f1_score(y_test.to_numpy(), y_test_pred.to_numpy()))


Model Metrics for KNN HPO SMOTE 100trials GPU ROCweighted


Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.85      0.91    377848
           1       0.45      0.85      0.59     54625

    accuracy                           0.85    432473
   macro avg       0.71      0.85      0.75    432473
weighted avg       0.91      0.85      0.87    432473



Confusion matrix:
[[321684  56164]
 [  8011  46614]]


Accuracy score : 0.852
Precision score : 0.454
Recall score : 0.853
F1 score : 0.592


In [None]:
# Evaluate on the testing data 
print('The best model from SMOTE 100 ROCweighted GPU trials optimization scores {:.5f} AUC ROC on the test set.'.format(roc_auc_score(y_test.to_numpy(), 
                                                                                                                                      y_test_pred.to_numpy())))
print('This was achieved using these conditions:')
print(trials_df.iloc[0])

The best model from SMOTE 100 ROCweighted GPU trials optimization scores 0.85235 AUC ROC on the test set.
This was achieved using these conditions:
iteration                                    31
roc_weighted                           0.821708
datetime_start       2022-06-15 11:31:17.719770
datetime_complete    2022-06-15 11:32:53.068772
duration                 0 days 00:01:35.349002
metric                                manhattan
n_neighbors                                  42
state                                  COMPLETE
Name: 0, dtype: object


## Recall

### 100 Trials

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL/

# Define a name for the trials
study_name = 'dask_knn_optuna_SMOTE_100_Recall_tpe'

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL


In [None]:
# Define function to train/evaluate the model
def train_and_eval(X_param, y_param, n_neighbors=10, 
                   metric='euclidean', verbose=False): 
    """
    Partition data into train/test sets, train and evaluate the model
    for the given parameters.
        
    Params
    ______
        
    X_param:  DataFrame. 
              The data to use for training and testing. 
    y_param:  Series. 
              The label for training

    Returns
    score: F1 weighted of the fitted model
    """
    
    # Set up train/test sets
    X_train, y_train = trainDF.drop('loan_status', 
                                    axis=1), trainDF['loan_status'].astype('int32')
    X_train = X_train.astype('float32')

    X_test, y_test= testDF.drop('loan_status', 
                                axis=1), testDF['loan_status'].astype('int32')
    X_test = X_test.astype('float32')

    # Define model
    model = KNeighborsClassifier(n_neighbors=n_neighbors, 
                                 metric=metric, 
                                 verbose=verbose)
    
    # Start timer for each trial
    start = timer()

    # Fit model
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    score = recall_score(y_test.to_numpy(), y_pred.to_numpy())

    run_time = timer() - start
    print('- Trial time:', run_time) 
    print('- Trial recall score:', score)
    print('######################################################')

    return score

In [None]:
print('Score with default parameters : ', train_and_eval(X_train, y_train))

- Trial time: 90.0641979789998
- Trial recall score: 0.7680549199084669
######################################################
Score with default parameters :  0.7680549199084669


In [None]:
# Define objective function with the parameters to be tested
def objective(trial, X_param, y_param):

    joblib.dump(study, 'KNN_Optuna_SMOTE_100_GPU_Recall.pkl')

    # Search parameters
    n_neighbors = trial.suggest_int('n_neighbors', 3, 50) 
    metric = trial.suggest_categorical('metric', ['euclidean', 'manhattan', 
                                                  'chebyshev', 'minkowski'])

    score = train_and_eval(X_param, y_param,
                           n_neighbors=n_neighbors, 
                           verbose=False)

    return score

In [None]:
with timed('dask_optuna'):
    # Begin HPO trials 
    # Start timer for experiment
    start_time = datetime.now()
    print('%-20s %s' % ('Start Time', start_time))
    if os.path.isfile('KNN_Optuna_SMOTE_100_GPU_Recall.pkl'): 
      study = joblib.load('KNN_Optuna_SMOTE_100_GPU_Recall.pkl')
    else: 
      study = optuna.create_study(sampler=optuna.samplers.TPESampler(), 
                                  study_name=study_name,
                                  direction='maximize')
      
    # Optimize in parallel on Dask cluster
    with parallel_backend('dask'):
        study.optimize(lambda trial: objective(trial, X_train, y_train), 
                       n_trials=100,
                       n_jobs=n_workers)

# End timer for experiment
end_time = datetime.now()
print('%-20s %s' % ('Start Time', start_time))
print('%-20s %s' % ('End Time', end_time))
print(str(timedelta(seconds=(end_time-start_time).seconds)))
print('\n')
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)
print('Highest Recall score', study.best_value)

Start Time           2022-06-15 19:09:33.497171
- Trial time: 41.311747205000074
- Trial recall score: 0.732558352402746
######################################################
- Trial time: 38.07488493699998
- Trial recall score: 0.8370343249427917
######################################################
- Trial time: 37.903391219000014
- Trial recall score: 0.8261235697940503
######################################################
- Trial time: 37.71077408500014
- Trial recall score: 0.8287231121281464
######################################################
- Trial time: 38.09518563300003
- Trial recall score: 0.8299679633867277
######################################################
- Trial time: 36.149850195
- Trial recall score: 0.7234782608695652
######################################################
- Trial time: 36.71281062900016
- Trial recall score: 0.8123935926773456
######################################################
- Trial time: 38.086212108999916
- Trial recall score: 0.831

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/trialOptions/

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/trialOptions


In [None]:
# Output from HPO trials
trials_df = study.trials_dataframe()
trials_df.rename(columns={'number': 'iteration'}, inplace=True)
trials_df.rename(columns={'value': 'recall'}, inplace=True)
trials_df.rename(columns={'params_metric': 'metric'}, inplace=True)
trials_df.rename(columns={'params_n_neighbors': 'n_neighbors'}, inplace=True)

# Write processed data to csv
trials_df = trials_df.sort_values('recall', ascending=False)
trials_df.to_csv('KNN_Optuna_SMOTE_100_GPU_Recall.csv', index=False)
print(trials_df) 

    iteration    recall             datetime_start          datetime_complete  \
73         73  0.837748 2022-06-15 19:23:33.410434 2022-06-15 19:24:11.559496   
92         92  0.837748 2022-06-15 19:35:34.889325 2022-06-15 19:36:13.025016   
83         83  0.837748 2022-06-15 19:29:51.958832 2022-06-15 19:30:30.101071   
77         77  0.837748 2022-06-15 19:26:05.998864 2022-06-15 19:26:44.136118   
74         74  0.837748 2022-06-15 19:24:11.559645 2022-06-15 19:24:49.813967   
..        ...       ...                        ...                        ...   
78         78  0.784659 2022-06-15 19:26:44.136260 2022-06-15 19:27:20.716535   
6           6  0.732558 2022-06-15 13:31:14.226533 2022-06-15 13:32:44.191983   
51         51  0.732558 2022-06-15 19:09:33.506073 2022-06-15 19:10:15.000489   
56         56  0.723478 2022-06-15 19:12:47.454995 2022-06-15 19:13:23.910785   
50         50       NaN 2022-06-15 14:40:46.556251                        NaT   

                 duration  

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_Explanations/

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_Explanations


In [None]:
# Plot_optimization_history: shows the scores from all trials as well as the best score so far at each point.
fig = optuna.visualization.plot_optimization_history(study)
py.plot(fig, filename='optimizationHistory_KNN_Optuna_SMOTE_100_GPU_Recall.html')
fig.show()

In [None]:
# plot_parallel_coordinate: interactively visualizes the hyperparameters and scores
fig = optuna.visualization.plot_parallel_coordinate(study)
py.plot(fig, filename='parallelCoordinate_KNN_Optuna_SMOTE_100_GPU_Recall.html')
fig.show()

In [None]:
# plot_slice: shows the change of the hyperparamters space over the search. 
fig = optuna.visualization.plot_slice(study)
py.plot(fig, filename='slice_KNN_Optuna_SMOTE_100_GPU_Recall.html')
fig.show()

In [None]:
# Visualize parameter importances
fig = optuna.visualization.plot_param_importances(study)
py.plot(fig, filename='paramImportance_KNN_Optuna_SMOTE_100_GPU_Recall.html')
fig.show()

In [None]:
# Visualize empirical distribution function
fig = optuna.visualization.plot_edf(study)
py.plot(fig, filename='edf_KNN_Optuna_SMOTE_100_GPU_Recall.html')
fig.show()

In [None]:
# Arrange best parameters to fit model for model metrics
params = study.best_params   
params

{'metric': 'chebyshev', 'n_neighbors': 49}

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL/

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL


In [None]:
# Re-create the best model and train on the training data
best_model = KNeighborsClassifier(n_neighbors=49, metric='chebyshev')

# Fit the model
best_model.fit(X_train, y_train)

# Save model
Pkl_Filename = 'KNN_Optuna_SMOTE_trials100_GPU_Recall.pkl'  

with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(best_model, file)

# =============================================================================
# # To load saved model
# model = joblib.load('KNN_Optuna_SMOTE_trials100_GPU_ROCweighted.pkl')
# print(model)
# =============================================================================

In [None]:
print('\nModel Metrics for KNN HPO SMOTE 100trials GPU Recall')
# Predict based on training 
y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)
print('\n')
print('Classification Report:')
clf_rpt = classification_report(y_test.to_numpy(), y_test_pred.to_numpy())
print(clf_rpt)
print('\n')
print('Confusion matrix:')
print(confusion_matrix(y_test.to_numpy(), y_test_pred.to_numpy()))
print('\n')
print('Accuracy score : %.3f' % accuracy_score(y_test.to_numpy(), 
                                               y_test_pred.to_numpy()))
print('Precision score : %.3f' % precision_score(y_test.to_numpy(), 
                                                 y_test_pred.to_numpy()))
print('Recall score : %.3f' % recall_score(y_test.to_numpy(), 
                                           y_test_pred.to_numpy()))
print('F1 score : %.3f' % f1_score(y_test.to_numpy(), y_test_pred.to_numpy()))


Model Metrics for KNN HPO SMOTE 100trials GPU Recall


Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.80      0.87    377848
           1       0.37      0.80      0.50     54625

    accuracy                           0.80    432473
   macro avg       0.67      0.80      0.69    432473
weighted avg       0.89      0.80      0.83    432473



Confusion matrix:
[[302351  75497]
 [ 10938  43687]]


Accuracy score : 0.800
Precision score : 0.367
Recall score : 0.800
F1 score : 0.503


In [None]:
# Evaluate on the testing data 
print('The best model from SMOTE 100 Recall GPU trials optimization scores {:.5f} AUC ROC on the test set.'.format(roc_auc_score(y_test.to_numpy(), 
                                                                                                                                 y_test_pred.to_numpy())))
print('This was achieved using these conditions:')
print(trials_df.iloc[0])

The best model from SMOTE 100 Recall GPU trials optimization scores 0.79998 AUC ROC on the test set.
This was achieved using these conditions:
iteration                                    73
recall                                 0.837748
datetime_start       2022-06-15 19:23:33.410434
datetime_complete    2022-06-15 19:24:11.559496
duration                 0 days 00:00:38.149062
metric                                minkowski
n_neighbors                                49.0
state                                  COMPLETE
Name: 73, dtype: object


## Precision

### 100 Trials

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL/

# Define a name for the trials
study_name = 'dask_knn_optuna_SMOTE_100_Precision_tpe'

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL


In [None]:
# Define function to train/evaluate the model
def train_and_eval(X_param, y_param, n_neighbors=10, 
                   metric='euclidean', verbose=False): 
    """
    Partition data into train/test sets, train and evaluate the model
    for the given parameters.
        
    Params
    ______
        
    X_param:  DataFrame. 
              The data to use for training and testing. 
    y_param:  Series. 
              The label for training

    Returns
    score: Precision of the fitted model
    """
    
    # Set up train/test sets
    X_train, y_train = trainDF.drop('loan_status', 
                                    axis=1), trainDF['loan_status'].astype('int32')
    X_train = X_train.astype('float32')

    X_test, y_test= testDF.drop('loan_status', 
                                axis=1), testDF['loan_status'].astype('int32')
    X_test = X_test.astype('float32')

    # Define model
    model = KNeighborsClassifier(n_neighbors=n_neighbors, 
                                 metric=metric, 
                                 verbose=verbose)
    
    # Start timer for each trial
    start = timer()

    # Fit model
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    score = precision_score(y_test.to_numpy(), y_pred.to_numpy())
     
    run_time = timer() - start

    print('- Trial time:', run_time) 
    print('- Trial precision score:', score)
    print('######################################################')

    return score

In [None]:
print('Score with default parameters : ', train_and_eval(X_train, y_train))

- Trial time: 36.392250201000024
- Trial precision score: 0.43819978275401067
######################################################
Score with default parameters :  0.43819978275401067


In [None]:
# Define objective function with the parameters to be tested
def objective(trial, X_param, y_param):

    joblib.dump(study, 'KNN_Optuna_SMOTE_100_GPU_Precision.pkl')

    # Search parameters
    n_neighbors = trial.suggest_int('n_neighbors', 3, 50) 
    metric = trial.suggest_categorical('metric', ['euclidean', 'manhattan', 
                                                  'chebyshev', 'minkowski'])

    score = train_and_eval(X_param, y_param,
                           n_neighbors=n_neighbors, 
                           verbose=False)

    return score

In [None]:
with timed('dask_optuna'):
    # Begin HPO trials 
    # Start timer for experiment
    start_time = datetime.now()
    print('%-20s %s' % ('Start Time', start_time))
    if os.path.isfile('KNN_Optuna_SMOTE_100_GPU_Precision.pkl'): 
      study = joblib.load('KNN_Optuna_SMOTE_100_GPU_Precision.pkl')
    else: 
      study = optuna.create_study(sampler=optuna.samplers.TPESampler(), 
                                  study_name=study_name,
                                  direction='maximize')
      
    # Optimize in parallel on Dask cluster
    with parallel_backend('dask'):
        study.optimize(lambda trial: objective(trial, X_train, y_train), 
                       n_trials=100,
                       n_jobs=n_workers)

# End timer for experiment
end_time = datetime.now()
print('%-20s %s' % ('Start Time', start_time))
print('%-20s %s' % ('End Time', end_time))
print(str(timedelta(seconds=(end_time-start_time).seconds)))
print('\n')
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)
print('Highest Precision score', study.best_value)

Start Time           2022-06-15 20:01:56.873616
- Trial time: 36.64731320700048
- Trial precision score: 0.39604714740285935
######################################################
- Trial time: 36.76108234300045
- Trial precision score: 0.41903847077469863
######################################################
- Trial time: 36.200918913999885
- Trial precision score: 0.4347036591288191
######################################################
- Trial time: 37.07216035500005
- Trial precision score: 0.38786451978760333
######################################################
- Trial time: 36.44326538800033
- Trial precision score: 0.43196542079837275
######################################################
- Trial time: 37.12226190000001
- Trial precision score: 0.40422102909087615
######################################################
- Trial time: 36.44924952400015
- Trial precision score: 0.43196542079837275
######################################################
- Trial time: 37.87999960400

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/trialOptions/

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/trialOptions


In [None]:
# Output from HPO trials
trials_df = study.trials_dataframe()
trials_df.rename(columns={'number': 'iteration'}, inplace=True)
trials_df.rename(columns={'value': 'precision'}, inplace=True)
trials_df.rename(columns={'params_metric': 'metric'}, inplace=True)
trials_df.rename(columns={'params_n_neighbors': 'n_neighbors'}, inplace=True)

# Write processed data to csv
trials_df = trials_df.sort_values('precision', ascending=False)
trials_df.to_csv('KNN_Optuna_SMOTE_100_GPU_Precision.csv', index=False)
print(trials_df)

    iteration  precision             datetime_start  \
50         50   0.517533 2022-06-15 20:32:34.661307   
67         67   0.517533 2022-06-15 20:42:55.831794   
81         81   0.517533 2022-06-15 20:51:25.973354   
87         87   0.517533 2022-06-15 20:55:05.171586   
61         61   0.517533 2022-06-15 20:39:17.744178   
..        ...        ...                        ...   
3           3   0.387865 2022-06-15 20:03:46.934622   
20         20   0.383127 2022-06-15 20:14:14.492004   
7           7   0.382073 2022-06-15 20:06:14.767635   
88         88   0.380738 2022-06-15 20:55:41.506605   
14         14   0.378919 2022-06-15 20:10:32.060064   

            datetime_complete               duration     metric  n_neighbors  \
50 2022-06-15 20:33:10.961472 0 days 00:00:36.300165  euclidean            4   
67 2022-06-15 20:43:32.192624 0 days 00:00:36.360830  chebyshev            4   
81 2022-06-15 20:52:02.341030 0 days 00:00:36.367676  chebyshev            4   
87 2022-06-15 20:55

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_Explanations/

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_Explanations


In [None]:
# Plot_optimization_history: shows the scores from all trials as well as the best score so far at each point.
fig = optuna.visualization.plot_optimization_history(study)
py.plot(fig, filename='optimizationHistory_KNN_Optuna_SMOTE_100_GPU_Precision.html')
fig.show()

In [None]:
# plot_parallel_coordinate: interactively visualizes the hyperparameters and scores
fig = optuna.visualization.plot_parallel_coordinate(study)
py.plot(fig, filename='parallelCoordinate_KNN_Optuna_SMOTE_100_GPU_Precision.html')
fig.show()

In [None]:
# plot_slice: shows the change of the hyperparamters space over the search. 
fig = optuna.visualization.plot_slice(study)
py.plot(fig, filename='slice_KNN_Optuna_SMOTE_100_GPU_Precision.html')
fig.show()

In [None]:
# Visualize parameter importances
fig = optuna.visualization.plot_param_importances(study)
py.plot(fig, filename='paramImportance_KNN_Optuna_SMOTE_100_GPU_Precision.html')
fig.show()

In [None]:
# Visualize empirical distribution function
fig = optuna.visualization.plot_edf(study)
py.plot(fig, filename='edf_KNN_Optuna_SMOTE_100_GPU_Precision.html')
fig.show()

In [None]:
# Arrange best parameters to fit model for model metrics
params = study.best_params   
params

{'metric': 'euclidean', 'n_neighbors': 4}

In [None]:
%cd /content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL/

/content/drive/MyDrive/LoanStatus/Python/Models/ML/KNN/Optuna/Model_PKL


In [None]:
# Re-create the best model and train on the training data
best_model = KNeighborsClassifier(n_neighbors=4, metric='euclidean')

# Fit the model
best_model.fit(X_train, y_train)

# Save model
Pkl_Filename = 'KNN_Optuna_SMOTE_trials100_GPU_Precision.pkl'  

with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(best_model, file)

# =============================================================================
# # To load saved model
# model = joblib.load('KNN_Optuna_SMOTE_trials100_GPU_Precision.pkl')
# print(model)
# =============================================================================

In [None]:
print('\nModel Metrics for KNN HPO SMOTE 100trials GPU Precision')
# Predict based on training 
y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)
print('\n')
print('Classification Report:')
clf_rpt = classification_report(y_test.to_numpy(), y_test_pred.to_numpy())
print(clf_rpt)
print('\n')
print('Confusion matrix:')
print(confusion_matrix(y_test.to_numpy(), y_test_pred.to_numpy()))
print('\n')
print('Accuracy score : %.3f' % accuracy_score(y_test.to_numpy(), 
                                               y_test_pred.to_numpy()))
print('Precision score : %.3f' % precision_score(y_test.to_numpy(), 
                                                 y_test_pred.to_numpy()))
print('Recall score : %.3f' % recall_score(y_test.to_numpy(), 
                                           y_test_pred.to_numpy()))
print('F1 score : %.3f' % f1_score(y_test.to_numpy(), y_test_pred.to_numpy()))


Model Metrics for KNN HPO SMOTE 100trials GPU Precision


Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.91      0.93    377848
           1       0.52      0.70      0.60     54625

    accuracy                           0.88    432473
   macro avg       0.74      0.80      0.76    432473
weighted avg       0.90      0.88      0.89    432473



Confusion matrix:
[[342198  35650]
 [ 16384  38241]]


Accuracy score : 0.880
Precision score : 0.518
Recall score : 0.700
F1 score : 0.595


In [None]:
# Evaluate on the testing data 
print('The best model from SMOTE 100 Precision GPU trials optimization scores {:.5f} AUC ROC on the test set.'.format(roc_auc_score(y_test.to_numpy(), 
                                                                                                                                    y_test_pred.to_numpy())))
print('This was achieved using these conditions:')
print(trials_df.iloc[0])

The best model from SMOTE 100 Precision GPU trials optimization scores 0.80286 AUC ROC on the test set.
This was achieved using these conditions:
iteration                                    50
precision                              0.517533
datetime_start       2022-06-15 20:32:34.661307
datetime_complete    2022-06-15 20:33:10.961472
duration                 0 days 00:00:36.300165
metric                                euclidean
n_neighbors                                   4
state                                  COMPLETE
Name: 50, dtype: object
