In [1]:
import mnist
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

mndata = mnist.MNIST('mnist')
train_img, train_label = mndata.load_training()
test_img, test_label = mndata.load_testing()

train_images, train_labels = np.array(train_img, dtype=np.float64), np.array(train_label)
test_images, test_labels = np.array(test_img, dtype=np.float64), np.array(test_label)

In [31]:
train_img = []
train_label = []
test_img = []
test_label = []

for i in range(10):
    idx = np.where(train_labels == i)[0][:1000]
    for j in idx:
        train_img.append(train_images[j])
        train_label.append(i)

for i in range(10):
    idx = np.where(test_labels == i)[0][:100]
    for j in idx:
        test_img.append(test_images[j])
        test_label.append(i)

In [32]:
print(np.array(test_img).shape)
print(np.array(train_img).shape)

(1000, 784)
(10000, 784)


In [33]:
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
import time

n_components = 16

# Fit PCA on training images
pca = PCA(n_components=n_components)
train_img_pca = pca.fit_transform(train_img)

# Train SVM classifier
start_time = time.time()
svm = SVC()
svm.fit(train_img_pca, train_label)
train_time = time.time() - start_time

# Project test images onto PCA space
test_img_pca = pca.transform(test_img)

# Predict test labels using SVM classifier
start_time = time.time()
pred_label = svm.predict(test_img_pca)
test_time = time.time() - start_time

# Calculate accuracy score
accuracy = accuracy_score(test_label, pred_label)

print(f'Training time: {train_time:.4f} seconds')
print(f'Testing time: {test_time:.4f} seconds')
print(f'Accuracy score: {accuracy:.4f}')

Training time: 0.5336 seconds
Testing time: 0.1513 seconds
Accuracy score: 0.9420


In [34]:
from sklearn.svm import SVC
import time

# Reshape train and test data to 2D arrays
train_img = np.array(train_img)
test_img = np.array(test_img)


print(train_img.shape)
# Train SVM on train data
start_time = time.time()
svm = SVC(kernel='linear', C=1)
svm.fit(train_img, train_label)
train_time = time.time() - start_time

# Predict labels of test data
start_time = time.time()
pred_label = svm.predict(test_img)
predict_time = time.time() - start_time

# Calculate classification accuracy
accuracy = np.mean(pred_label == test_label) * 100

print(f"Training time: {train_time:.5f} seconds")
print(f"Prediction time: {predict_time:.5f} seconds")
print(f"Accuracy: {accuracy:.2f}%")

(10000, 784)
Training time: 2.65666 seconds
Prediction time: 0.60522 seconds
Accuracy: 90.60%
