In [1]:
import numpy as np
import pandas as pd
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score 
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
import time


In [3]:
train_data=pd.read_csv(r"./mnist_train.csv")
test_data=pd.read_csv(r"./mnist_test.csv")

train_data=train_data.to_numpy()    # train_data shape: (60000, 785)
m, n = train_data.shape
test_data=test_data.to_numpy()      # test_data shape: (10000, 785)

X_train=train_data[:,1:]            # X_train shape: (60000, 784)
y_train=train_data[:,0]             # y_train shape: (60000,)
X_test=test_data[:,1:]              # X_test shape: (10000, 784)
y_test=test_data[:,0]               # y_test shape: (10000,)

X_train = X_train / 255.0           # Normalizing the data
X_test = X_test / 255.0

In [None]:
svm_ovo = SVC(decision_function_shape='ovr')
svm_ovo.fit(X_train, y_train)

In [None]:
y_pred_ovo = svm_ovo.predict(X_test)
print("One-vs-One Accuracy:", accuracy_score(y_test, y_pred_ovo))

One-vs-One Accuracy: 0.9792


In [3]:
def classify(digit, y):

    y_digit = np.array([1 if x == digit else 0 for x in y])

    return y_digit

In [32]:
def binary_digit_classifier(X_train, y_train, digit):
    
    svm = SVC(kernel='rbf')
    svm.fit(X_train, classify(digit=digit, y=y_train))
    return svm

def test_classifier(svm : SVC, X_test, y_test, digit):
    y_pred = svm.predict(X_test)
    p = precision_score(classify(digit, y_test), y_pred)
    f1 = f1_score(classify(digit, y_test), y_pred)
    r = recall_score(classify(digit, y_test), y_pred)
    
    
    print()
    return p, f1, r

In [36]:
svm = SVC(kernel='rbf')
svm.fit(X_train, classify(digit=1, y=y_train))

In [14]:
svm.predict_proba(X_test)

array([[9.99993916e-01, 6.08400604e-06],
       [9.99264125e-01, 7.35874890e-04],
       [2.94750646e-03, 9.97052494e-01],
       ...,
       [9.99997123e-01, 2.87724254e-06],
       [9.99923819e-01, 7.61807888e-05],
       [9.99870831e-01, 1.29168542e-04]], shape=(10000, 2))

In [None]:
list_SVMS = []
times = []

for digit in range(10):
    start = time.time()
    svm = binary_digit_classifier(X_train, y_train, digit)
    end = time.time()
    print(f"Time taken to train for digit {digit}: {end - start}")
    times.append(end - start)
    list_SVMS.append(svm)
    

Time taken to train for digit 0: 45.31225085258484
Time taken to train for digit 1: 24.41713809967041
Time taken to train for digit 2: 100.49352979660034
Time taken to train for digit 3: 126.77765393257141
Time taken to train for digit 4: 75.64865446090698
Time taken to train for digit 5: 115.045480966568
Time taken to train for digit 6: 41.64907717704773
Time taken to train for digit 7: 48.88533139228821
Time taken to train for digit 8: 133.71622967720032
Time taken to train for digit 9: 115.80259919166565


In [33]:
lp = []
lr = []
lf1 = []

for digit in range(10):
    p, f1, r = test_classifier(list_SVMS[digit], X_test, y_test, digit)
    lp.append(p)
    lr.append(r)
    lf1.append(f1)

min_p = min(lp)
min_f1 = min(lf1)
min_r = min(lr)













In [34]:
for i in range(10):
    print(f"Classifier metric for {i}:")
    print(f"Precision: ", lp[i])
    print(f"F1 score: ", lf1[i])
    print(f"Recall: ", lr[i])
    print()

Classifier metric for 0:
Precision:  0.9917780061664954
F1 score:  0.9882232462877624
Recall:  0.9846938775510204

Classifier metric for 1:
Precision:  0.9955595026642984
F1 score:  0.9915966386554622
Recall:  0.9876651982378855

Classifier metric for 2:
Precision:  0.9870646766169154
F1 score:  0.9739813451153657
Recall:  0.9612403100775194

Classifier metric for 3:
Precision:  0.9878419452887538
F1 score:  0.9764646970455684
Recall:  0.9653465346534653

Classifier metric for 4:
Precision:  0.988517745302714
F1 score:  0.9762886597938144
Recall:  0.9643584521384929

Classifier metric for 5:
Precision:  0.9918793503480279
F1 score:  0.9749144811858609
Recall:  0.9585201793721974

Classifier metric for 6:
Precision:  0.9914802981895634
F1 score:  0.981549815498155
Recall:  0.9718162839248434

Classifier metric for 7:
Precision:  0.9818731117824774
F1 score:  0.9648688767936665
Recall:  0.9484435797665369

Classifier metric for 8:
Precision:  0.9809926082365364
F1 score:  0.9672045809474

In [35]:
print("Min preicision at: ", lp.index(min_p))
print("Min F1 at: ", lf1.index(min_f1))
print("Min recall at: ", lr.index(min_r))
print()
print(f"{lp.index(min_p)} is the hardest to classify\n")

Min preicision at:  9
Min F1 at:  9
Min recall at:  9

9 is the hardest to classify

