**Answer for chapter 3, question 2**

Import the data and necessary packages to perform this analysis:

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from scipy.ndimage.interpolation import shift
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml

# get the MNIST dataset & show the columns of data
mnist = fetch_openml('mnist_784', version=1)
mnist.keys()

In [None]:
# Get our features and our target values
X, y = mnist["data"], mnist["target"]

# Show the data's size and dimensions
print("X's dimensions:", X.shape,"\ny's dimensions:", y.shape)

In [None]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

In [None]:
# Load the data
rf_1st_draft_test_preds = np.array(pd.read_csv("datasets/rf_preds.csv")).reshape((10000, 1))

# See what the best model is:
best_rf_model = np.loadtxt("datasets/best_rf_model.txt")
best_rf_model

In [None]:
def shift_image(image, lr, du):
    image = image.reshape((28, 28))
    shifted_image = shift(image, [du, lr], cval = 0, mode = "constant")
    return shifted_image.reshape([-1])

In [None]:
X_train_aug = [img for img in X_train]
y_train_aug = [lbl for lbl in y_train]

for lr, du in ((1, 0), (0, 1), (-1, 0), (0, -1)):
    for img, lbl in zip(X_train, y_train):
        X_train_aug.append(shift_image(img, lr, du))
        y_train_aug.append(lbl)
        
X_train_aug = np.array(X_train_aug)
y_train_aug = np.array(y_train_aug)

# Make sure we don't get any auto-correlated errors
shuffled_index = np.random.permutation(len(X_train_aug))
X_train_aug = X_train_aug[shuffled_index]
y_train_aug = y_train_aug[shuffled_index]

# Scale the data
scaler = StandardScaler()
X_train_aug_scaled = scaler.fit_transform(X_train_aug.astype(np.float64))

# Classify using the best random forest classifier
best_forest_clf = RandomForestClassifier(**grid_search.best_params_)
best_forest_clf.fit(X_train_aug_scaled, y_train_aug)

In [None]:
# Predictions
y_aug_pred = best_forest_clf.predict(X_test)
accuracy_score(y_test, y_aug_pred)