In [9]:

import numpy as np
import gzip
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

#  unzipping the gz
def load_mnist_images(filename, limit):
    with gzip.open(filename, 'rb') as f:
        f.read(4)
        num_images = int.from_bytes(f.read(4), 'big')
        rows = int.from_bytes(f.read(4), 'big')
        cols = int.from_bytes(f.read(4), 'big')
        size = limit * rows * cols
        buf = f.read(size)
        data = np.frombuffer(buf, dtype=np.uint8).reshape(limit, rows * cols)
        return data / 255.0

def load_mnist_labels(filename, limit):
    with gzip.open(filename, 'rb') as f:
        f.read(4)
        f.read(4)
        buf = f.read(limit)
        return np.frombuffer(buf, dtype=np.uint8)

#load subsets
X_train = load_mnist_images("train-images-idx3-ubyte.gz", 5000)
y_train = load_mnist_labels("train-labels-idx1-ubyte.gz", 5000)
X_test = load_mnist_images("t10k-images-idx3-ubyte.gz", 1000)
y_test = load_mnist_labels("t10k-labels-idx1-ubyte.gz", 1000)




In [10]:
# standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# PCA
pca = PCA(n_components=50)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)


In [11]:
# Train and evaluate SVM with different kernels
kernels = ['linear', 'poly', 'rbf']
results = {}

for kernel in kernels:
    clf = SVC(kernel=kernel, gamma='scale')
    clf.fit(X_train_pca, y_train)
    y_pred = clf.predict(X_test_pca)
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    results[kernel] = {
        'accuracy': acc,
        'report': report
    }

# Summary
summary_df = pd.DataFrame({k: {"accuracy": v["accuracy"]} for k, v in results.items()}).T
summary_df


Unnamed: 0,accuracy
linear,0.888
poly,0.895
rbf,0.923


In [12]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

lda = LinearDiscriminantAnalysis(n_components=9)
X_train_lda = lda.fit_transform(X_train_scaled, y_train)
X_test_lda  = lda.transform(X_test_scaled)

for kernel in ['linear', 'poly', 'rbf']:
    clf = SVC(kernel=kernel, gamma='scale')
    clf.fit(X_train_lda, y_train)
    y_pred = clf.predict(X_test_lda)
    print(f"SVM ({kernel}) on LDA accuracy: {accuracy_score(y_test, y_pred):.3f}")

lr = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
lr.fit(X_train_lda, y_train)
y_pred_lr = lr.predict(X_test_lda)
print(f"Logistic Regression on LDA accuracy: {accuracy_score(y_test, y_pred_lr):.3f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_lr))


SVM (linear) on LDA accuracy: 0.830
SVM (poly) on LDA accuracy: 0.819
SVM (rbf) on LDA accuracy: 0.837




Logistic Regression on LDA accuracy: 0.829

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        85
           1       0.89      0.94      0.92       126
           2       0.88      0.76      0.81       116
           3       0.78      0.76      0.77       107
           4       0.78      0.87      0.82       110
           5       0.76      0.79      0.78        87
           6       0.92      0.82      0.87        87
           7       0.83      0.80      0.81        99
           8       0.75      0.80      0.77        89
           9       0.78      0.81      0.80        94

    accuracy                           0.83      1000
   macro avg       0.83      0.83      0.83      1000
weighted avg       0.83      0.83      0.83      1000



In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


clf = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=2000)
clf.fit(X_train_pca, y_train)

y_pred = clf.predict(X_test_pca)
acc = accuracy_score(y_test, y_pred)
print(f"Logistic Regression Accuracy: {acc:.3f}")

print(classification_report(y_test, y_pred))




Logistic Regression Accuracy: 0.870
              precision    recall  f1-score   support

           0       0.91      0.86      0.88        85
           1       0.97      0.98      0.98       126
           2       0.83      0.84      0.83       116
           3       0.86      0.83      0.84       107
           4       0.91      0.88      0.89       110
           5       0.83      0.85      0.84        87
           6       0.93      0.93      0.93        87
           7       0.83      0.86      0.84        99
           8       0.80      0.78      0.79        89
           9       0.82      0.86      0.84        94

    accuracy                           0.87      1000
   macro avg       0.87      0.87      0.87      1000
weighted avg       0.87      0.87      0.87      1000



In [14]:

import numpy as np
from tensorflow.keras.utils import to_categorical


X_train_img = X_train.reshape(-1, 28, 28, 1)
X_test_img  = X_test .reshape(-1, 28, 28, 1)

y_train_cat = to_categorical(y_train, 10)
y_test_cat  = to_categorical(y_test, 10)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    X_train_img, y_train_cat,
    epochs=10,
    batch_size=128,
    validation_split=0.1
)

test_loss, test_acc = model.evaluate(X_test_img, y_test_cat)
print(f"Test accuracy: {test_acc:.3f}")


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 156ms/step - accuracy: 0.3944 - loss: 1.7970 - val_accuracy: 0.8520 - val_loss: 0.4858
Epoch 2/10
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 132ms/step - accuracy: 0.8353 - loss: 0.5483 - val_accuracy: 0.9260 - val_loss: 0.2673
Epoch 3/10
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 91ms/step - accuracy: 0.9007 - loss: 0.3423 - val_accuracy: 0.9500 - val_loss: 0.2054
Epoch 4/10
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 91ms/step - accuracy: 0.9302 - loss: 0.2436 - val_accuracy: 0.9580 - val_loss: 0.1749
Epoch 5/10
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 107ms/step - accuracy: 0.9371 - loss: 0.2222 - val_accuracy: 0.9620 - val_loss: 0.1597
Epoch 6/10
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 91ms/step - accuracy: 0.9522 - loss: 0.1638 - val_accuracy: 0.9640 - val_loss: 0.1378
Epoch 7/10
[1m36/36[0m [32m━━━━━━━━━━━━

In [15]:
import numpy as np

def one_hot(y, num_classes=10):
    Y = np.zeros((y.size, num_classes))
    Y[np.arange(y.size), y] = 1
    return Y

Y_train = one_hot(y_train)
Y_test = one_hot(y_test)

input_dim = X_train_pca.shape[1]
hidden_dim = 128
output_dim = 10
lr = 1e-2
epochs = 50

W1 = np.random.randn(input_dim, hidden_dim) * np.sqrt(2.0 / input_dim)
b1 = np.zeros((1, hidden_dim))
W2 = np.random.randn(hidden_dim, output_dim) * np.sqrt(2.0 / hidden_dim)
b2 = np.zeros((1, output_dim))

for epoch in range(epochs):

    z1 = X_train_pca.dot(W1) + b1
    a1 = np.maximum(0, z1)
    z2 = a1.dot(W2) + b2
    exp_scores = np.exp(z2 - np.max(z2, axis=1, keepdims=True))
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    loss = -np.mean(np.sum(Y_train * np.log(probs + 1e-8), axis=1))
    if epoch % 10 == 0:
        print(f"Epoch {epoch} – loss: {loss:.4f}")

    delta3 = (probs - Y_train) / Y_train.shape[0]
    dW2 = a1.T.dot(delta3)
    db2 = np.sum(delta3, axis=0, keepdims=True)

    delta2 = delta3.dot(W2.T)
    delta2[z1 <= 0] = 0
    dW1 = X_train_pca.T.dot(delta2)
    db1 = np.sum(delta2, axis=0, keepdims=True)

    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

z1 = X_test_pca.dot(W1) + b1
a1 = np.maximum(0, z1)
z2 = a1.dot(W2) + b2
y_pred = np.argmax(z2, axis=1)
accuracy = np.mean(y_pred == y_test)
print(f"Scratch NN Test Accuracy: {accuracy:.3f}")


Epoch 0 – loss: 5.5787
Epoch 10 – loss: 3.1708
Epoch 20 – loss: 2.2645
Epoch 30 – loss: 1.7724
Epoch 40 – loss: 1.4738
Scratch NN Test Accuracy: 0.584
