In [1]:
import pandas as pd
import numpy as np
import copy

from sklearn.impute import KNNImputer
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from collections import Counter

from sklearn.neighbors import KNeighborsClassifier
from imblearn.pipeline import Pipeline

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc
from sklearn.metrics import roc_auc_score

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df_1 = pd.read_csv('../data/code_3_train.csv')

df_1.drop(columns=['index',
                   'pco2', 'ph', 'basophils', 'lactic_acid', 'bmi',
                   'creatine_kinase', 'lymphocyte', 'neutrophils'], inplace=True)

X_train = df_1.drop(columns=['outcome'])

y_train = df_1['outcome']

In [3]:
df_2 = pd.read_csv('../data/code_3_test.csv')

df_2.drop(columns=['index',
                   'pco2', 'ph', 'basophils', 'lactic_acid', 'bmi',
                   'creatine_kinase', 'lymphocyte', 'neutrophils'], inplace=True)

X_test = df_2.drop(columns=['outcome'])

y_test = df_2['outcome']

# <font color=blue>Method 1  

In [4]:
pipe_line_kn = Pipeline([('knn_impute', KNNImputer(n_neighbors=10)),
                         ('oversample_SMOTE', SMOTE(random_state=42)),
                         ('ss_scale', StandardScaler()),
                         ('kn_class', KNeighborsClassifier())])

pipe_line_kn.set_params(oversample_SMOTE__sampling_strategy=0.75,
                        kn_class__n_neighbors=1000,
                        kn_class__weights='uniform',
                        kn_class__algorithm='brute',
                        kn_class__p=2)

pipe_line_kn.fit(X_train, y_train)

y_train_predicted1 = pipe_line_kn.predict(X_train)

y_test_predicted1 = pipe_line_kn.predict(X_test)

In [5]:
print('Train accuracy :', accuracy_score(y_train, y_train_predicted1))
print('Test accuracy :', accuracy_score(y_test, y_test_predicted1))

print('Train recall :', recall_score(y_train, y_train_predicted1))
print('Test recall :', recall_score(y_test, y_test_predicted1))

print('Train precision :', precision_score(y_train, y_train_predicted1))
print('Test precision :', precision_score(y_test, y_test_predicted1))

Train accuracy : 0.873015873015873
Test accuracy : 0.8673469387755102
Train recall : 0.15966386554621848
Test recall : 0.175
Train precision : 0.6129032258064516
Test precision : 0.5384615384615384


# <font color=blue>Method 2  

In [6]:
knn_impute = KNNImputer(n_neighbors=10)
X_train_impute = knn_impute.fit_transform(X_train)

In [7]:
oversample_SMOTE = SMOTE(random_state=42, sampling_strategy=0.75)
X_train_impute_over, y_train_over = oversample_SMOTE.fit_resample(X_train_impute, y_train.ravel())

In [8]:
ss_scale = StandardScaler()
X_train_impute_over_scale = ss_scale.fit_transform(X_train_impute_over)

In [9]:
kn_class = KNeighborsClassifier(n_neighbors=1000, weights='uniform', algorithm='brute', p=2)
kn_class.fit(X_train_impute_over_scale, y_train_over)

KNeighborsClassifier(algorithm='brute', n_neighbors=1000)

In [10]:
X_train_impute = knn_impute.transform(X_train)

X_train_impute_scale = ss_scale.transform(X_train_impute)

y_train_predicted2 = kn_class.predict(X_train_impute_scale)

In [11]:
X_test_impute = knn_impute.transform(X_test)

X_test_impute_scale = ss_scale.transform(X_test_impute)

y_test_predicted2 = kn_class.predict(X_test_impute_scale)

In [12]:
print('Train accuracy :', accuracy_score(y_train, y_train_predicted2))
print('Test accuracy :', accuracy_score(y_test, y_test_predicted2))

print('Train recall :', recall_score(y_train, y_train_predicted2))
print('Test recall :', recall_score(y_test, y_test_predicted2))

print('Train precision :', precision_score(y_train, y_train_predicted2))
print('Test precision :', precision_score(y_test, y_test_predicted2))

Train accuracy : 0.873015873015873
Test accuracy : 0.8673469387755102
Train recall : 0.15966386554621848
Test recall : 0.175
Train precision : 0.6129032258064516
Test precision : 0.5384615384615384


3 Jan 2022