In [1]:
import numpy as np
import pandas as pd

# preprocessing the data
from sklearn import preprocessing

# train test split
from sklearn.model_selection import train_test_split
# confusion matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

# import KNN
from sklearn.neighbors import KNeighborsClassifier
# feature selection
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

# Remove warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# reading the file
df=pd.read_csv('upsampledData.csv')
df.head()

Unnamed: 0,Dilation_Average,Erosion_Average,C_Kurtosis,C_ Max,C_Skewness,C_Variance,D_Kurtosis,D_ Max,D_Skewness,D_Variance,...,E_Skewness,E_Variance,O_Kurtosis,O_ Max,O_Skewness,O_Variance,Nc_D,Nc_E,Nmsw_D,Class
0,3.755598,-3.226451,2.283225,-0.56201,-0.991294,-0.322388,4.944863,2.542558,0.628872,2.007001,...,-1.830575,-1.276447,1.96102,3.277567,0.889032,3.164783,0.295673,1.950114,98.476944,Very_Light
1,2.029311,-2.656245,0.535858,-1.450874,-1.604588,-0.763448,-0.647204,0.819542,-0.531409,2.131177,...,-1.67417,-2.341096,3.800785,1.124597,-1.832675,1.956626,1.789017,-4.399744,100.909261,Very_Light
2,-0.980784,-1.576063,0.926878,-1.408121,-1.320788,4.833189,0.958903,1.684579,0.189962,0.112178,...,1.539216,-1.938536,4.236684,-0.695274,1.995591,-0.266569,-2.076487,-2.324928,99.02514,Very_Light
3,0.358878,-2.332299,2.695084,0.831103,0.675351,1.656316,0.970561,4.037049,2.957654,3.419815,...,0.551069,-0.038938,2.112835,-1.398246,2.876,0.272131,2.287493,-0.124482,99.931697,Very_Light
4,0.044279,-3.117623,2.798219,-1.768448,-3.078083,2.43262,3.700591,-2.238318,0.801603,3.07672,...,2.461441,1.370984,1.275057,0.052198,-0.562948,-1.394126,-0.646807,-0.488891,102.073919,Very_Light


In [3]:
dfY=df['Class'] #Response variable
dfX=df.drop(['Class'],axis=1) #Feature Vector

In [4]:
# a function to normalise the data
# this is veryimportant for a scale dependent ML model
def normaliseData(D):
    scaler = preprocessing.StandardScaler().fit(D)
    D_scaled=scaler.transform(D)
    return D_scaled

# a function to perform train test split
def dataSplit(X,Y):
    x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.33, random_state=42)
    return x_train,x_test,y_train,y_test

In [7]:
"""Make a forward feature selection method"""
def selectFeatureKNN(X,Y,model):
    sfs1 = SFS(model, k_features=7, forward=True, floating=False, verbose=2,scoring='accuracy',cv=10)
    sfs1=sfs1.fit(X,Y)
    return sfs1.k_feature_names_

# "Make a KNN Model and Find its accracy"
def KNN(feature_vector,response,k,confusion_matrix=False):
    #first we need to standardised the data, only the feature vector
    # X is feature vector
    # Y is response variable
    # k is k value for KNN
    normalise_data=normaliseData(feature_vector)
    # split the data in train test split
    x_train,x_test,y_train,y_test=dataSplit(feature_vector,response)
    
    # apply the mode
    neigh = KNeighborsClassifier(n_neighbors=k)
    features=selectFeatureKNN(x_train,y_train,neigh)
    print(features)
    x_train=x_train[list(features)];x_test=x_test[list(features)]
    # fit x_train and y_train
    neigh.fit(x_train, y_train)
    print("For Value of k=",k,end=' ')
    if confusion_matrix==True:
        # Make prediction with the made model on x_train data
        y_pred=neigh.predict(x_test)
        # instead of showing accuracy we will deal with confusion matrix
        # code ref: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ConfusionMatrixDisplay.html#sklearn.metrics.ConfusionMatrixDisplay.from_predictions
        ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
        plt.figure(figsize=(3,4))
        plt.show()
    else:
        print("Accuracy is:",neigh.score(x_test,y_test,sample_weight=None))

In [8]:
#testing
for k in range(1,10):
    KNN(dfX,dfY,k)
# score(X, y, sample_weight=None)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:   16.7s finished

[2022-02-13 15:26:13] Features: 1/7 -- score: 0.8937089010914514[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:   16.4s finished

[2022-02-13 15:26:29] Features: 2/7 -- score: 0.898851870003812[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:   16.5s finished

[2022-02-13 15:26:46] Features: 3/7 -- score: 0.8977338018761717[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 o

('Dilation_Average', 'Erosion_Average', 'C_Skewness', 'C_Variance', 'E_ Max', 'O_ Max', 'Nmsw_D')
For Value of k= 1 Accuracy is: 0.8801634135270087


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:   17.4s finished

[2022-02-13 15:28:13] Features: 1/7 -- score: 0.892813990938097[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:   17.1s finished

[2022-02-13 15:28:31] Features: 2/7 -- score: 0.8944543354104362[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:   17.1s finished

[2022-02-13 15:28:48] Features: 3/7 -- score: 0.8952745076466059[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 o

('C_Kurtosis', 'C_ Max', 'D_Kurtosis', 'E_Kurtosis', 'O_ Max', 'Nc_D', 'Nmsw_D')
For Value of k= 2 Accuracy is: 0.8821304282039643


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:   17.2s finished

[2022-02-13 15:30:21] Features: 1/7 -- score: 0.9113007064816946[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:   17.1s finished

[2022-02-13 15:30:38] Features: 2/7 -- score: 0.9146545218940423[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:   17.1s finished

[2022-02-13 15:30:55] Features: 3/7 -- score: 0.9154000117802517[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 

('C_Kurtosis', 'C_ Max', 'C_Variance', 'O_Kurtosis', 'O_ Max', 'Nc_D', 'Nmsw_D')
For Value of k= 3 Accuracy is: 0.9024058102587381


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:   17.2s finished

[2022-02-13 15:32:30] Features: 1/7 -- score: 0.9120462519351286[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:   17.2s finished

[2022-02-13 15:32:48] Features: 2/7 -- score: 0.9156971852978014[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:   17.3s finished

[2022-02-13 15:33:05] Features: 3/7 -- score: 0.9151015046493096[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 

('C_Kurtosis', 'C_Skewness', 'D_Kurtosis', 'D_Skewness', 'E_Kurtosis', 'O_ Max', 'Nmsw_D')
For Value of k= 4 Accuracy is: 0.8978665456196097


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:   19.4s finished

[2022-02-13 15:34:49] Features: 1/7 -- score: 0.9174126566578982[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:   17.6s finished

[2022-02-13 15:35:07] Features: 2/7 -- score: 0.920020704347913[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:   17.8s finished

[2022-02-13 15:35:24] Features: 3/7 -- score: 0.9220332380911103[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 o

('D_Kurtosis', 'D_ Max', 'D_Skewness', 'O_ Max', 'O_Skewness', 'Nc_E', 'Nmsw_D')
For Value of k= 5 Accuracy is: 0.911484339536995


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:   18.2s finished

[2022-02-13 15:37:06] Features: 1/7 -- score: 0.9177849014959808[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:   19.0s finished

[2022-02-13 15:37:25] Features: 2/7 -- score: 0.9211388836100024[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:   17.9s finished

[2022-02-13 15:37:43] Features: 3/7 -- score: 0.9221820471187838[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 

('C_Kurtosis', 'C_Variance', 'D_Kurtosis', 'D_Skewness', 'D_Variance', 'O_Variance', 'Nmsw_D')
For Value of k= 6 Accuracy is: 0.9051293690422152


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:   17.5s finished

[2022-02-13 15:39:25] Features: 1/7 -- score: 0.9203935048582425[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:   17.3s finished

[2022-02-13 15:39:42] Features: 2/7 -- score: 0.9228530213567071[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:   17.9s finished

[2022-02-13 15:40:00] Features: 3/7 -- score: 0.9230772906754863[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 

('C_ Max', 'C_Skewness', 'C_Variance', 'D_ Max', 'D_Variance', 'O_ Max', 'Nmsw_D')
For Value of k= 7 Accuracy is: 0.910425177787865


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:   17.7s finished

[2022-02-13 15:41:42] Features: 1/7 -- score: 0.9203187669410576[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:   17.5s finished

[2022-02-13 15:41:59] Features: 2/7 -- score: 0.9234495910807936[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:   17.8s finished

[2022-02-13 15:42:17] Features: 3/7 -- score: 0.9238220026205504[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 

('C_Kurtosis', 'C_ Max', 'C_Skewness', 'D_Variance', 'E_Kurtosis', 'O_ Max', 'Nmsw_D')
For Value of k= 8 Accuracy is: 0.9131487365713421


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:   17.9s finished

[2022-02-13 15:43:59] Features: 1/7 -- score: 0.9228534658945045[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:   17.6s finished

[2022-02-13 15:44:17] Features: 2/7 -- score: 0.9246416191844731[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:   17.9s finished

[2022-02-13 15:44:35] Features: 3/7 -- score: 0.925759909581012[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 o

('C_Kurtosis', 'D_Kurtosis', 'D_ Max', 'E_Kurtosis', 'E_Variance', 'O_ Max', 'Nmsw_D')
For Value of k= 9 Accuracy is: 0.9128461189287336
