In [1]:
import pandas as pd
from sklearn import tree
import matplotlib.pyplot as mp
import seaborn as sb
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np


In [2]:
# Fetch the MNIST dataset
mnist = fetch_openml(name="mnist_784", version=1)
X, y = mnist.data, mnist.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70000 entries, 0 to 69999
Columns: 784 entries, pixel1 to pixel784
dtypes: float64(784)
memory usage: 418.7 MB


In [None]:
X.shape



In [5]:
y.info()

<class 'pandas.core.series.Series'>
RangeIndex: 70000 entries, 0 to 69999
Series name: class
Non-Null Count  Dtype   
--------------  -----   
70000 non-null  category
dtypes: category(1)
memory usage: 68.9 KB


In [6]:

# Convert labels to integers
y_train = y_train.astype(int)
y_test = y_test.astype(int)

In [None]:
# Train a linear SVM classifier (one-versus-all)
linear_svm_classifier = SVC(kernel='linear', decision_function_shape='ovr')
linear_svm_classifier.fit(X_train, y_train)

In [None]:
# Report training accuracy for linear SVM
linear_svm_train_accuracy = linear_svm_classifier.score(X_train, y_train)
print(f"Training accuracy of Linear SVM: {linear_svm_train_accuracy:.2%}")


In [None]:
# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

In [None]:
# Train a scaled linear SVM classifier
scaled_linear_svm_classifier = SVC(kernel='linear', decision_function_shape='ovr')
scaled_linear_svm_classifier.fit(X_train_scaled, y_train)

In [None]:
# Report training accuracy for scaled linear SVM
scaled_linear_svm_train_accuracy = scaled_linear_svm_classifier.score(X_train_scaled, y_train)
print(f"Training accuracy of Scaled Linear SVM: {scaled_linear_svm_train_accuracy:.2%}")


In [None]:
# Train a non-linear SVM with RBF kernel
rbf_svm_classifier = SVC(kernel='rbf', decision_function_shape='ovr')
rbf_svm_classifier.fit(X_train_scaled, y_train)

In [None]:
# Report training accuracy for RBF kernel SVM
rbf_svm_train_accuracy = rbf_svm_classifier.score(X_train_scaled, y_train)
print(f"Training accuracy of RBF Kernel SVM: {rbf_svm_train_accuracy:.2%}")


In [None]:
# Make predictions on the test data
X_test_scaled = scaler.transform(X_test)

In [None]:
linear_svm_predictions = linear_svm_classifier.predict(X_test)
scaled_linear_svm_predictions = scaled_linear_svm_classifier.predict(X_test_scaled)
rbf_svm_predictions = rbf_svm_classifier.predict(X_test_scaled)


In [None]:
# Report the accuracy for each class on the test data
for digit in range(10):
    linear_svm_accuracy = accuracy_score(y_test == digit, linear_svm_predictions == digit)
    scaled_linear_svm_accuracy = accuracy_score(y_test == digit, scaled_linear_svm_predictions == digit)
    rbf_svm_accuracy = accuracy_score(y_test == digit, rbf_svm_predictions == digit)
    print(f"Accuracy for digit {digit}:")
    print(f"Linear SVM: {linear_svm_accuracy:.2%}")
    print(f"Scaled Linear SVM: {scaled_linear_svm_accuracy:.2%}")
    print(f"RBF Kernel SVM: {rbf_svm_accuracy:.2%}")
    print()