In [None]:
from sklearn.datasets import fetch_openml
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import cross_val_score
import numpy as np
import pandas as pd

# Retrieve the data
mnist = fetch_openml("mnist_784", as_frame=True)
X, y = mnist["data"], mnist["target"]

# Separate the training and test sets
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

# Build the classifier and fit
sgd_clf = SGDClassifier(random_state=42)

# Perform cross validation
score = cross_val_score(sgd_clf, X_train, y_train, cv=3, scoring="accuracy")
print("before score", score)

# Augment data
def augment(value):
    shape = value.reshape(28,28)

    up = np.roll(shape, shift=1, axis=0).flatten()
    down = np.roll(shape, shift=-1, axis=0).flatten()
    left = np.roll(shape, shift=0, axis=-1).flatten()
    right = np.roll(shape, shift=0, axis=1).flatten()

    return [up, down, left, right]

augment_X = []
augment_Y = []

for index in range(len(X_train)):
    value = X_train.iloc[index].to_numpy()
    target = y_train.iloc[index]
    
    augmented_images = augment(value)
    augment_X.extend(augmented_images)
    augment_Y.extend([target] * len(augmented_images))

# Convert augmented data to data frames
df_augmented_X = pd.DataFrame(augment_X, columns=X_train.columns)
df_augmented_Y = pd.Series(augment_Y)

# Add augmented data to the training sets
X_train_augmented = pd.concat([X_train, df_augmented_X], ignore_index=True)
y_train_augmented = pd.concat([y_train, df_augmented_Y], ignore_index=True)
    
# Perform cross validation
score = cross_val_score(sgd_clf, X_train_augmented, y_train_augmented, cv=3, scoring="accuracy")
print("after score", score)

before score [0.87365 0.85835 0.8689 ]
