In [None]:
# Utility imports
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm

Here, we implement MNIST in actual neural net libraries instead of pure numpy

## Keras
Good for high-level experimentation
Main source: https://keras.io/examples/vision/mnist_convnet/

In [None]:
import keras
import keras.layers
import keras.datasets

## Get MNIST data

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255


x_train = x_train.reshape((60000, 784))
x_test = x_test.reshape((10000, 784))
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

In [None]:
model = keras.Sequential(
    [
        keras.Input(shape=(784,)),
        keras.layers.Dense(128, activation="sigmoid"),
        keras.layers.Dense(10, activation="sigmoid"),
    ]
)
model.summary()

In [None]:
batch_size = 128
epochs = 15
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)


Epoch 1/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7585 - loss: 1.0213 - val_accuracy: 0.9302 - val_loss: 0.2752
Epoch 2/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9153 - loss: 0.3018 - val_accuracy: 0.9430 - val_loss: 0.2087
Epoch 3/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9307 - loss: 0.2415 - val_accuracy: 0.9540 - val_loss: 0.1750
Epoch 4/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9423 - loss: 0.2020 - val_accuracy: 0.9595 - val_loss: 0.1534
Epoch 5/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9514 - loss: 0.1680 - val_accuracy: 0.9642 - val_loss: 0.1383
Epoch 6/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9571 - loss: 0.1503 - val_accuracy: 0.9648 - val_loss: 0.1248
Epoch 7/15
[1m422/422[0m 

<keras.src.callbacks.history.History at 0x7ec23559d9d0>

In [None]:
score = model.evaluate(x_test, y_test, verbose=1)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9697 - loss: 0.1003
Test loss: 0.08943313360214233
Test accuracy: 0.9728999733924866


## Pytorch
Lower level than Keras, sketching what I did [here](https://nbviewer.org/github/epistemologist/AI_Notebooks/blob/main/MNIST_in_Pytorch.ipynb) but less detailed

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

torch.manual_seed(0)

<torch._C.Generator at 0x7ec23e3a76f0>

In [None]:
# Load the datasets
training_data = datasets.MNIST(
    root="data",
    train = True,
    download = True,
    transform = ToTensor(),
)
test_data = datasets.MNIST(
    root="data",
    train = False,
    download = True,
    transform = ToTensor(),
)
# Put dataset into DataLoader
BATCH_SIZE = 64
training_data_loader = DataLoader(
    training_data,
    batch_size = BATCH_SIZE,
)
test_data_loader = DataLoader(
    test_data,
    batch_size = BATCH_SIZE,
)

In [None]:
# Define model
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.network_stack = nn.Sequential(
            nn.Linear(28*28, 128),
            nn.Sigmoid(),
            nn.Linear(128, 10),
            nn.Sigmoid(),
        )
    def forward(self, x):
        x = self.flatten(x)
        y = self.network_stack(x)
        return y

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (network_stack): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=128, out_features=10, bias=True)
    (3): Sigmoid()
  )
)


In [None]:
# Train model
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

model.train()
for epoch in range(10):
    for batch, (X, y) in tqdm( enumerate(training_data_loader) ):
        X, y = X.to(device), y.to(device)
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch, (X, y) in tqdm( enumerate(training_data_loader) ):


0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [None]:
# Test neural network
accuracies = []
with torch.no_grad():
    model.eval()
    for batch, (X, y) in enumerate(test_data_loader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        curr_accuracies = (pred.argmax(1) == y).cpu().numpy()
        accuracies.append( (sum(curr_accuracies), len(curr_accuracies)) )

print(f"Accuracy: {sum(i[0] for i in accuracies) / sum(i[1] for i in accuracies)}")

Accuracy: 0.9595


## sklearn
This is a thing that I didn't know existed - could also be useful for quick prototyping

Source for data loading: https://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_logistic_regression_mnist.html

In [54]:
from sklearn.datasets import fetch_openml
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state

print("[+] Loading data...")

X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)

random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X[permutation]
y = y[permutation]
X = X.reshape((X.shape[0], -1))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=60000, test_size=10000
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = np.eye(10)[y_train.astype(int)]
y_test = np.eye(10)[y_test.astype(int)]

[+] Loading data...


In [55]:
clf = MLPClassifier(
    hidden_layer_sizes=(128,),
    activation="logistic",
    solver='adam',
    batch_size=128,
    max_iter=15,
    verbose=10,
)
clf.fit(X_train, y_train)

Iteration 1, loss = 1.26412339
Iteration 2, loss = 0.55865991
Iteration 3, loss = 0.43213987
Iteration 4, loss = 0.36611182
Iteration 5, loss = 0.32078020
Iteration 6, loss = 0.28645413
Iteration 7, loss = 0.25818187
Iteration 8, loss = 0.23342046
Iteration 9, loss = 0.21206629
Iteration 10, loss = 0.19323411
Iteration 11, loss = 0.17628557
Iteration 12, loss = 0.16119896
Iteration 13, loss = 0.14743603
Iteration 14, loss = 0.13524250
Iteration 15, loss = 0.12386052




In [56]:
clf.score(X_test, y_test)

0.939