# Preprocessing Dataset

In [None]:
import pickle

file_path1 = 'x_data.pickle'
file_path2 = 'y_data.pickle'

# Open the file in binary mode
with open(file_path1, 'rb') as file1:
    #read file
    X = pickle.load( file1)
    
# Open the file in binary mode
with open(file_path2, 'rb') as file2:
    #read file
    Y = pickle.load(file2)

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

# Split the data into test and train 
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# LogisticRegression needs a 2 dim input 
x_train =  x_train.reshape(x_train.shape[0], -1)
x_test =  x_test.reshape(x_test.shape[0], -1)

# y should be 1 d
y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)

In [4]:
del X, Y

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns
from tensorflow import keras
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

# LogisticRegression

In [None]:
from sklearn.linear_model import LogisticRegression

# Defining logistic regression model
model = LogisticRegression(max_iter=1000)

#Training the model
model.fit(x_train, y_train)

In [41]:
# Predictions for test values
y_pred=model.predict(x_test)

In [44]:
# Calculating accuracy measures

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
mse = mean_squared_error(y_test,y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"Accuracy LogisticRegression Classifier: {accuracy:.2f}")
print(f"Precision LogisticRegression Classifier: {precision:.2f}")
print(f"Recall LogisticRegression Classifier: {recall:.2f}")
print(f"F1 score LogisticRegression Classifier: {f1:.2f}")
print(f"Mean square error LogisticRegression Classifier:{mse:.2f}")
print(f"Mean Absolute Error Logistic Regression Classifier: {mae:.2f}")


Accuracy LogisticRegression Classifier: 0.14
Precision LogisticRegression Classifier: 0.12
Recall LogisticRegression Classifier: 0.11
F1 score LogisticRegression Classifier: 0.06
Mean square error LogisticRegression Classifier:11.30
Mean Absolute Error Logistic Regression Classifier: 2.72


In [None]:
# Plotting confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm,
                     index = ['0','1','2','3','4','5','6','7','8'],
                     columns = ['0','1','2','3','4','5','6','7','8'])
plt.figure(figsize=(10, 10))
sns.heatmap(cm_df/np.sum(cm_df,axis = 0), annot=True, fmt='.2%', cmap='Blues')
plt.title('Confusion Matrix: LogisticRegression')
plt.ylabel('Actual Values')
plt.xlabel('Predicted Values')
plt.show()

In [None]:
# Plotting roc curve
from sklearn.metrics import roc_curve

random_probs = [0 for i in range(len(y_test))]
fpr = {}
tpr = {}
thresh ={}
n_class = 10
for i in range(n_class):
  fpr[i], tpr[i], thresh[i] = roc_curve(y_test, y_pred[:,i], pos_label=i)

colors = ['orange', 'green', 'blue', 'red', 'purple', 'brown','yellow','black','pink','grey','coral', 'silver']
for i in range(n_class):
  plt.plot(fpr[i], tpr[i], linestyle='--', color=colors[i], label='Class {}'.format(i))
plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='best')
plt.show()

# Random Forest,

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
# Train the Random Forest classifier
rfc = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42, verbose= True)
rfc.fit(x_train, y_train)

# Make predictions on the test set
y_pred = rfc.predict(x_test)

In [None]:
# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
mse = mean_squared_error(y_test,y_pred)
mae = mean_absolute_error(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)
print(f"Accuracy Random Forest Classifier: {accuracy:.2f}")
print(f"Precision Random Forest Classifier: {precision:.2f}")
print(f"Recall Random Forest Classifier: {recall:.2f}")
print(f"F1 score Random Forest Classifier: {f1:.2f}")
print(f"Mean square error Random Forest Classifier:{mse:.2f}")
print(f"Mean Absolute Error Random Forest  Classifier: {mae:.2f}")
print(f"Specificity Random Forest  Classifier: {specificity:.2f}")

In [None]:
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm,
                     index = ['0','1','2','3','4','5','6','7','8'], 
                     columns = ['0','1','2','3','4','5','6','7','8'])
plt.figure(figsize=(10, 10))
sns.heatmap(cm_df/np.sum(cm_df,axis = 0), annot=True, fmt='.2%', cmap='Blues')
plt.title('Confusion Matrix: RandomForestClassifier')
plt.ylabel('Actual Values')
plt.xlabel('Predicted Values')
plt.show()

In [None]:
from sklearn.metrics import roc_curve
pred_prob = model.predict_proba(x_test)
y_pred_lr= model.predict_proba(x_test)
random_probs = [0 for i in range(len(y_test))]
fpr = {}
tpr = {}
thresh ={}
n_class = 10
for i in range(n_class):
  fpr[i], tpr[i], thresh[i] = roc_curve(y_test, y_pred_lr[:,i], pos_label=i)

colors = ['orange', 'green', 'blue', 'red', 'purple', 'brown','yellow','black','pink','grey','coral', 'silver']
for i in range(n_class):
  plt.plot(fpr[i], tpr[i], linestyle='--', color=colors[i], label='Class {}'.format(i))
plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='best')
plt.show()

# Naive Bayes Classifier,

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
# Train the Naive Bayes classifier
nb = GaussianNB()
nb.fit(x_train, y_train)

# Make predictions on the test set
y_pred = nb.predict(x_test)

In [None]:
# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
mse = mean_squared_error(y_test,y_pred)
mae = mean_absolute_error(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)
print(f"Accuracy cr: {accuracy:.2f}")
print(f"Precision Naive Bayes Classifier: {precision:.2f}")
print(f"Recall Naive Bayes Classifier: {recall:.2f}")
print(f"F1 score Naive Bayes Classifier: {f1:.2f}")
print(f"Mean square error Naive Bayes Classifier:{mse:.2f}")
print(f"Mean Absolute Error  Naive Bayes  Classifier: {mae:.2f}")
print(f"Specificity  Naive Bayes Classifier: {specificity:.2f}")

In [None]:
cm = confusion_matrix(y_test, y_pred)

cm_df = pd.DataFrame(cm,
                     index = ['0','1','2','3','4','5','6','7','8'], 
                     columns = ['0','1','2','3','4','5','6','7','8'])
# Plotting the confusion matrix
plt.figure(figsize=(10, 10))
sns.heatmap(cm_df /np.sum(cm_df,axis = 0), annot=True, fmt='.2%', cmap='Blues')
plt.title('Confusion Matrix: Gaussian Naive Bayes')
plt.ylabel('Actual Values')
plt.xlabel('Predicted Values')
plt.show()

In [None]:
from sklearn.metrics import roc_curve
pred_prob = model.predict_proba(x_test)
y_pred_lr= model.predict_proba(x_test)
random_probs = [0 for i in range(len(y_test))]
fpr = {}
tpr = {}
thresh ={}
n_class = 10
for i in range(n_class):
  fpr[i], tpr[i], thresh[i] = roc_curve(y_test, y_pred_lr[:,i], pos_label=i)

colors = ['orange', 'green', 'blue', 'red', 'purple', 'brown','yellow','black','pink','grey','coral', 'silver']
for i in range(n_class):
  plt.plot(fpr[i], tpr[i], linestyle='--', color=colors[i], label='Class {}'.format(i))
plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='best')
plt.show()

# Support Vector Machine,

In [None]:
from sklearn.svm import SVC

In [None]:
# Train the SVM classifier
svm = SVC(kernel='rbf', C=1, gamma='scale', random_state=42, verbose=True)
svm.fit(x_train, y_train)

# Make predictions on the test set
y_pred = svm.predict(x_test)

In [None]:
# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
mse = mean_squared_error(y_test,y_pred)
mae = mean_absolute_error(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)
print(f"Accuracy svmr: {accuracy:.2f}")
print(f"Precision svm: {precision:.2f}")
print(f"Recall svm: {recall:.2f}")
print(f"F1 score svm: {f1:.2f}")
print(f"Mean square error svm:{mse:.2f}")
print(f"Mean Absolute Error svm: {mae:.2f}")
print(f"Specificity svm: {specificity:.2f}")

In [None]:
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm,
                     index = ['0','1','2','3','4','5','6','7','8'], 
                     columns = ['0','1','2','3','4','5','6','7','8'])
plt.figure(figsize=(10, 10))
sns.heatmap(cm_df/np.sum(cm_df,axis = 0), annot=True, fmt='.2%', cmap='Blues')
plt.title('Confusion Matrix: svm')
plt.ylabel('Actual Values')
plt.xlabel('Predicted Values')
plt.show()

In [None]:
from sklearn.metrics import roc_curve
pred_prob = model.predict_proba(x_test)
y_pred_lr= model.predict_proba(x_test)
random_probs = [0 for i in range(len(y_test))]
fpr = {}
tpr = {}
thresh ={}
n_class = 10
for i in range(n_class):
  fpr[i], tpr[i], thresh[i] = roc_curve(y_test, y_pred_lr[:,i], pos_label=i)

colors = ['orange', 'green', 'blue', 'red', 'purple', 'brown','yellow','black','pink','grey','coral', 'silver']
for i in range(n_class):
  plt.plot(fpr[i], tpr[i], linestyle='--', color=colors[i], label='Class {}'.format(i))
plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='best')
plt.show()

# Gradient Boosting,

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
model=GradientBoostingClassifier()
model.fit(x_train,y_train)
y_pred=model.predict(x_test)

In [None]:
# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
mse = mean_squared_error(y_test,y_pred)
mae = mean_absolute_error(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)
print(f"Accuracy GradientBoostingClassifier: {accuracy:.2f}")
print(f"Precision   GradientBoostingClassifier: {precision:.2f}")
print(f"Recall GradientBoostingClassifier: {recall:.2f}")
print(f"F1 score GradientBoostingClassifier: {f1:.2f}")
print(f"Mean square error GradientBoostingClassifier:{mse:.2f}")
print(f"Mean Absolute Error GradientBoostingClassifier: {mae:.2f}")
print(f"Specificity GradientBoostingClassifier: {specificity:.2f}")

In [None]:
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm,
                     index = ['0','1','2','3','4','5','6','7','8'], 
                     columns = ['0','1','2','3','4','5','6','7','8'])
plt.figure(figsize=(10, 10))
sns.heatmap(cm_df/np.sum(cm_df,axis = 0), annot=True, fmt='.2%', cmap='Blues')
plt.title('Confusion Matrix:GradientBoostingClassifier')
plt.ylabel('Actual Values')
plt.xlabel('Predicted Values')
plt.show()

In [None]:
from sklearn.metrics import roc_curve
pred_prob = model.predict_proba(x_test)
y_pred_lr= model.predict_proba(x_test)
random_probs = [0 for i in range(len(y_test))]
fpr = {}
tpr = {}
thresh ={}
n_class = 10
for i in range(n_class):
  fpr[i], tpr[i], thresh[i] = roc_curve(y_test, y_pred_lr[:,i], pos_label=i)

colors = ['orange', 'green', 'blue', 'red', 'purple', 'brown','yellow','black','pink','grey','coral', 'silver']
for i in range(n_class):
  plt.plot(fpr[i], tpr[i], linestyle='--', color=colors[i], label='Class {}'.format(i))
plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='best')
plt.show()

# Decision Tree,

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
# Train a DecisionTreeClassifier on the training data
model = DecisionTreeClassifier()
model.fit(x_train, y_train)

# Make predictions on the test data
y_pred = model.predict(x_test)

In [None]:
# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
mse = mean_squared_error(y_test,y_pred)
mae = mean_absolute_error(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)
print(f"Accuracy Decision Tree Classifier: {accuracy:.2f}")
print(f"Precision Decision Tree Classifier: {precision:.2f}")
print(f"Recall Decision Tree Classifier: {recall:.2f}")
print(f"F1 score Decision Tree Classifier: {f1:.2f}")
print(f"Mean square error Decision Tree Classifier:{mse:.2f}")
print(f"Mean Absolute Error Decision Tree Classifier: {mae:.2f}")
print(f"Specificity Decision Tree Classifier: {specificity:.2f}")

In [None]:
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm,
                     index = ['0','1','2','3','4','5','6','7','8'], 
                     columns = ['0','1','2','3','4','5','6','7','8'])
plt.figure(figsize=(10, 10))
sns.heatmap(cm_df/np.sum(cm_df,axis = 0), annot=True, fmt='.2%', cmap='Blues')
plt.title('Confusion Matrix: DecisionTreeClassifier')
plt.ylabel('Actual Values')
plt.xlabel('Predicted Values')
plt.show()

In [None]:
from sklearn.metrics import roc_curve
pred_prob = model.predict_proba(x_test)
y_pred_lr= model.predict_proba(x_test)
random_probs = [0 for i in range(len(y_test))]
fpr = {}
tpr = {}
thresh ={}
n_class = 10
for i in range(n_class):
  fpr[i], tpr[i], thresh[i] = roc_curve(y_test, y_pred_lr[:,i], pos_label=i)

colors = ['orange', 'green', 'blue', 'red', 'purple', 'brown','yellow','black','pink','grey','coral', 'silver']
for i in range(n_class):
  plt.plot(fpr[i], tpr[i], linestyle='--', color=colors[i], label='Class {}'.format(i))
plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='best')
plt.show()

# KNN

In [6]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)
y_pred=knn.predict(x_test)

In [None]:
print("test")

In [None]:
# Make predictions on the test set
#y_pred = knn.predict(x_test)
# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
mse = mean_squared_error(y_test,y_pred)
mae = mean_absolute_error(y_test, y_pred)tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)
print(f"Accuracy kNN classifier: {accuracy:.2f}")
print(f"Precision kNN classifier: {precision:.2f}")
print(f"Recall kNN classifier: {recall:.2f}")
print(f"F1 score kNN classifier: {f1:.2f}")
print(f"Mean square error kNN Classifier:{mse:.2f}")
print(f"Mean Absolute Error kNN Classifier: {mae:.2f}")
print(f"Specificity kNN Classifier: {specificity:.2f}")

In [None]:
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm,
                     index = ['0','1','2','3','4','5','6','7','8'], 
                     columns = ['0','1','2','3','4','5','6','7','8'])
plt.figure(figsize=(10, 10))
sns.heatmap(cm_df/np.sum(cm_df,axis = 0), annot=True, fmt='.2%', cmap='Blues')
plt.title('Confusion Matrix: knn')
plt.ylabel('Actual Values')
plt.xlabel('Predicted Values')
plt.show()

In [None]:
from sklearn.metrics import roc_curve
pred_prob = model.predict_proba(x_test)
y_pred_lr= model.predict_proba(x_test)
random_probs = [0 for i in range(len(y_test))]
fpr = {}
tpr = {}
thresh ={}
n_class = 10
for i in range(n_class):
  fpr[i], tpr[i], thresh[i] = roc_curve(y_test, y_pred_lr[:,i], pos_label=i)

colors = ['orange', 'green', 'blue', 'red', 'purple', 'brown','yellow','black','pink','grey','coral', 'silver']
for i in range(n_class):
  plt.plot(fpr[i], tpr[i], linestyle='--', color=colors[i], label='Class {}'.format(i))
plt.title('ROC Curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='best')
plt.show()