In [1]:
import numpy as np
import pandas as pd
import seaborn as sb
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

names = [
    "Nearest Neighbors",
    "Decision Tree",
    "Random Forest",
    "Neural Network",
    "Naive Bayes"
]

classifiers = [
    KNeighborsClassifier(5),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    MLPClassifier(max_iter=5000),
    GaussianNB(),
]


In [3]:
def normalize_dataset(X, min, max):
    X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
    X_scaled = X_std * (max - min) + min
    return X_scaled

In [4]:
datatrain = pd.read_csv('train_dataset_lib.csv')
# datatrain = datatrain.dropna
x_train = datatrain.iloc[:,1:-1]
y_train = datatrain.iloc[:,0]
print(y_train)
scaler = MinMaxScaler()
x_train_scaled =  scaler.fit_transform(x_train)
print(len(x_train_scaled), len(x_train), len(y_train))

0       0
1       0
2       0
3       0
4       0
       ..
1392    4
1393    4
1394    4
1395    4
1396    4
Name: label, Length: 1397, dtype: int64
1397 1397 1397


In [5]:
dataval = pd.read_csv('val_dataset_lib.csv')
x_val = dataval.iloc[:, 1:-1]
y_val = dataval.iloc[:,0]

scaler = MinMaxScaler()
x_val_scaled = scaler.fit_transform(x_val)
print(len(x_val_scaled), len(x_val), len(y_val))


438 438 438


In [6]:
y_train

0       0
1       0
2       0
3       0
4       0
       ..
1392    4
1393    4
1394    4
1395    4
1396    4
Name: label, Length: 1397, dtype: int64

In [7]:
y_val

0      0
1      0
2      0
3      0
4      0
      ..
433    4
434    4
435    4
436    4
437    4
Name: label, Length: 438, dtype: int64

In [9]:
for name, model in zip(names, classifiers):
  model.fit(x_train, y_train)
  y_pred_model = model.predict(x_val)
  print(f'## MODEL NAME : {name}')
  print(confusion_matrix(y_val, y_pred_model))
  print(classification_report(y_val, y_pred_model,zero_division=0))
  print('\n')

## MODEL NAME : Nearest Neighbors
[[86 11  9  3 15]
 [13 32 13  3  4]
 [16  7 68  0  6]
 [ 0  2  0 70  0]
 [29 17 17  3 14]]
              precision    recall  f1-score   support

           0       0.60      0.69      0.64       124
           1       0.46      0.49      0.48        65
           2       0.64      0.70      0.67        97
           3       0.89      0.97      0.93        72
           4       0.36      0.17      0.24        80

    accuracy                           0.62       438
   macro avg       0.59      0.61      0.59       438
weighted avg       0.59      0.62      0.60       438



## MODEL NAME : Decision Tree
[[60 27 22  2 13]
 [16 26  9  5  9]
 [22 14 39  2 20]
 [ 1  9  2 51  9]
 [21 20 10  2 27]]
              precision    recall  f1-score   support

           0       0.50      0.48      0.49       124
           1       0.27      0.40      0.32        65
           2       0.48      0.40      0.44        97
           3       0.82      0.71      0.76   