In [None]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(font_scale=1.2)
import tensorflow as tf

In [None]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
 
plt.imshow(X_train[0], cmap='gray')
plt.show()

In [None]:
print(X_train[0])

In [None]:
print("X_train shape", X_train.shape)
print("y_train shape", y_train.shape)
print("X_test shape", X_test.shape)
print("y_test shape", y_test.shape)

- we have a shape problem when we use fit function or predict using any model 
- the functions only accept 2 arguments
- X: Training data of shape (n_samples, n_features)
- And we have X_train as 3 dimensional therfore it has to be changed
- We do this by converting the 2d array of ( 28x28) pixels into 1d array of 784 pixels 

In [None]:

X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)

- We have 60000 data ,we need to reduce it

- To avoid this, we are just going to use a portion of the dataset. So let's take the first 100 elements and use those instead.

In [None]:
X_train_v1 = X_train[:100, :]
y_train_v1 = y_train[:100]
X_test = X_test[:100, :]
y_test = y_test[:100]

In [None]:
print("X_train shape", X_train_v1.shape)
print("y_train shape", y_train_v1.shape)
print("X_test shape", X_test.shape)
print("y_test shape", y_test.shape)

- We can create our machine learning model and train it!

- Create the support vector classification model using svm.SVC(). 
- Then fit the model with the X_train set and the y_train set.

In [None]:
model = svm.SVC()
model.fit(X_train_v1, y_train_v1)

In [None]:
y_pred_v1 = model.predict(X_test)
indexToCompare = 6
 
title = 'True: ' + str(y_test[indexToCompare]) + ', Prediction: ' + str(y_pred_v1[indexToCompare])
 
plt.title(title)
plt.imshow(X_test[indexToCompare].reshape(28,28), cmap='gray')
plt.grid(None)
plt.axis('off')
plt.show()

In [None]:
acc = metrics.accuracy_score(y_test, y_pred_v1)
print('\nAccuracy: ', acc)

- Problem with lack of accuracy :
 # The sample size

- Let's see a ditribution of images for each digits in our sample using
- Seaborn's pretty graph

In [None]:
digits = pd.DataFrame.from_dict(y_train)
 
ax = sns.countplot(x=0, data=digits)
 
ax.set_title("Distribution of Digit Images in Test Set")
ax.set(xlabel='Digit')
ax.set(ylabel='Count')
 
plt.show()

- Using Confusion Matrix and heatmap to determine the accuracy of a classification model

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

cm = metrics.confusion_matrix(y_test, y_pred_v1,labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=model.classes_)           
disp.plot()

plt.show()


# Heat Map

In [None]:
ax = plt.subplots(figsize=(9, 6))
 
sns.heatmap(cm, annot=True)
 
# ax.set_title("SVC Prediction Accuracy") # type: ignore
# ax.set_xlabel("Predicted Digit") # type: ignore
# ax.set_ylabel("True Digit") # type: ignore
 
plt.show()

- Now we raise the no of samples to 500

In [None]:
X_train_v2 = X_train[:1000, :]
y_train_v2 = y_train[:1000]
X_test = X_test[:100, :]
y_test = y_test[:100]
 
model_v2 = svm.SVC()
model_v2.fit(X_train_v2, y_train_v2)

In [None]:
y_pred_v2 = model_v2.predict(X_test)
import matplotlib.pyplot as plt

indexToCompare = 0
plt.figure(figsize=(10, 10))  # Set the figure size for the entire grid

for i in range(0, 9):
    indexToCompare = indexToCompare + 1
    
    title = 'True: ' + str(y_test[indexToCompare]) + ', Prediction: ' + str(y_pred_v2[indexToCompare])
    
    plt.subplot(3, 3, i+1)  # 3 rows, 3 columns, i+1 is the index of the current subplot
    plt.title(title)
    plt.imshow(X_test[indexToCompare].reshape(28,28), cmap='gray')
    plt.grid(None)
    plt.axis('off')

plt.tight_layout()  # Adjust spacing between subplots for a clean layout
plt.show()

 


In [None]:
acc_v2 = metrics.accuracy_score(y_test, y_pred_v2)
print('\nAccuracy: ', acc_v2)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

cm = metrics.confusion_matrix(y_test, y_pred_v2,labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=model.classes_)           
disp.plot()

plt.show()