

```
YASH AJAY TAPADIYA
Predicting for 0 in the data
```



In [25]:
from sklearn.datasets import fetch_openml
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.dummy import DummyClassifier

mnist = fetch_openml('mnist_784', as_frame=False)
X, y = mnist.data, mnist.target.astype(int)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [26]:
def shift_image(image, direction):
    """
    Shift a single MNIST image by 1 pixel.
    direction: 'left', 'right', 'up', 'down'
    """
    img = image.reshape(28, 28)
    shifted = np.zeros_like(img)

    if direction == "left":
        shifted[:, :-1] = img[:, 1:]
    elif direction == "right":
        shifted[:, 1:] = img[:, :-1]
    elif direction == "up":
        shifted[:-1, :] = img[1:, :]
    elif direction == "down":
        shifted[1:, :] = img[:-1, :]
    else:
        raise ValueError("Direction must be: left, right, up, or down")

    return shifted.reshape(784)


In [27]:
def augment_training_set(X, y):
    X_aug = []
    y_aug = []

    for img, label in zip(X, y):
        X_aug.append(img)
        y_aug.append(label)

        for direction in ["left", "right", "up", "down"]:
            X_aug.append(shift_image(img, direction))
            y_aug.append(label)

    return np.array(X_aug), np.array(y_aug)


X_train_aug, y_train_aug = augment_training_set(X_train, y_train)



Original training size: 56000
Augmented training size: 280000


In [30]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

svm = SVC(kernel='rbf', gamma='scale')
svm.fit(X_train, y_train)

y_pred_svm = svm.predict(X_test)
acc_svm = accuracy_score(y_test, y_pred_svm)

print("SVM accuracy:", acc_svm)


SVM accuracy (original data): 0.9790714285714286


In [None]:
svm_aug = SVC(kernel='rbf', gamma='scale')
svm_aug.fit(X_train_aug, y_train_aug)

y_pred_svm_aug = svm_aug.predict(X_test)
acc_svm_aug = accuracy_score(y_test, y_pred_svm_aug)

print("SVM accuracy (augmented):", acc_svm_aug)


In [None]:
cm_svm = confusion_matrix(y_test, y_pred_svm)
cm_svm_aug = confusion_matrix(y_test, y_pred_svm_aug)

print("Confusion row for digit 8 (original):")
print(cm_svm[8])

print("Confusion row for digit 8 (augmented):")
print(cm_svm_aug[8])