In [None]:
import sys
sys.path.append("..") # for sibling import

import random
import compyute as cp

In [None]:
device = "cuda" if cp.engine.gpu_available() else "cpu"
device

# Example 4

### Convolutional Neural Network: MNIST

The goal of this model is to classify images of hand-written digits.

### Step 1: Prepare data
You will need to download the dataset from https://www.kaggle.com/competitions/digit-recognizer/data and place it into the *data* directory. Only using the official training data for training, validation and testing, since it is just to showcase the framework.

In [None]:
# ! pip install pandas

In [None]:
import pandas as pd

data = pd.read_csv('../data/mnist/train.csv')
data.head()

In [None]:
tensor = cp.tensor(data.to_numpy())
train, val, test = cp.preprocessing.split_train_val_test(tensor, ratio_val=0.1, ratio_test=0.1)

In [None]:
X_train, y_train = train[:, 1:], train[:, 0].int()
X_val, y_val = val[:, 1:], val[:, 0].int()
X_test, y_test = test[:, 1:], test[:, 0].int()

In [None]:
X_train = X_train.reshape((X_train.shape[0], 1 , 28, -1)).float()
X_val = X_val.reshape((X_val.shape[0], 1, 28, -1)).float()
X_test = X_test.reshape((X_test.shape[0], 1, 28, -1)).float()

print(f'{X_train.shape=}')
print(f'{y_train.shape=}')
print(f'{X_val.shape=}')
print(f'{y_val.shape=}')
print(f'{X_test.shape=}')
print(f'{y_test.shape=}')

Scaling

In [None]:
X_train = X_train / 255.0
X_val = X_val / 255.0
X_test = X_test / 255.0

### Step 2: Build the neural network structure

In [None]:
import compyute.nn as nn

model = nn.Sequential(
    nn.Convolution2dBlock(1, 32, kernel_size=3, activation="relu"),
    nn.MaxPooling2d(kernel_size=2),
    nn.Flatten(),
    nn.Dropout(0.5),
    nn.Linear(13*13*32, 10)
)
model.to_device(device)

In [None]:
model.summary(input_shape=(1, 28, 28))

### Step 3: Train the model

In [None]:
from compyute.nn.trainer import losses, metrics, optimizers, Trainer
from compyute.nn.trainer.callbacks import EarlyStopping, History, ProgressBar

history = History()

trainer = Trainer(
    model=model,
    optimizer=optimizers.Adam(),
    loss=losses.CrossEntropy(),
    metric=metrics.Accuracy(),
    callbacks=[
        history,
        ProgressBar()
    ]
)

In [None]:
epochs = 5
batch_size = 128

model.set_retain_values(True)
trainer.train(X_train, y_train, epochs=epochs, batch_size=batch_size, val_data=(X_val, y_val))

In [None]:
# ! pip install matplotlib

In [None]:
import matplotlib.pyplot as plt

def plot_history(t1, t2):
    trace1 = history[t1]
    trace2 = history[t2]
    plt.figure(figsize=(10, 3))
    plt.plot(cp.arange(start=1, stop=len(trace1) + 1), trace1, linewidth=1)
    plt.plot(cp.arange(start=1, stop=len(trace2) + 1), trace2, linewidth=1)

plot_history("loss", "accuracy_score")

### Step 4: Evaluate the model

In [None]:
loss, accuracy = trainer.evaluate_model(X_test, y_test, batch_size)
print(f'loss {loss:.4f}')
print(f'accuracy {accuracy*100:.2f}')

In [None]:
# ! pip install scikit-learn

In [None]:
from sklearn.metrics import confusion_matrix
import numpy
from compyute.nn.dataloaders import batched

y_pred = batched(model, batch_size=batch_size, device=model.device, shuffle_data=False)(X_test)
probs, _ = nn.functional.softmax(y_pred)

cm = confusion_matrix(
    y_test,
    probs.argmax(-1),
    labels=y_test.unique()
)

r = cp.arange(10).to_numpy()
plt.imshow(cm, cmap="Blues")
plt.xlabel("prediction")
plt.ylabel("target")
plt.xticks(ticks=r, labels=r)
plt.yticks(ticks=r, labels=r)
for (j, i), label in numpy.ndenumerate(cm):
    plt.text(i, j, str(int(label)), ha="center", va="center")

### Step 5: Explore the inner workings
Pick a random image from the testing dataset.

In [None]:
i = random.randint(0, len(X_test) - 1)
image = X_test[i].moveaxis(0, -1)  # matplotlib needs the color channel to be the last dim

plt.figure(figsize=(3, 3))
plt.imshow(image.data, cmap='gray')
plt.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

Use it to predict a number and show the probability distribution of the outcome.

In [None]:
print(f"correct label: {y_test[i].item()}")

image_tensor = X_test[None, i]
image_tensor.to_device(device)
logits = model(image_tensor)
probs = cp.nn.functional.softmax(logits)[0].squeeze()
pred = probs.argmax(-1).squeeze().item()

print(f"predicted label: {pred}")

plt.figure(figsize=(5, 3))
plt.xticks(ticks=r)
plt.bar(r, probs.to_numpy())
plt.xlabel("class")
plt.ylabel("probability");

Every layer of the model can be accessed to explore their output. Here we iterate over all the kernels of the convolutional layer to explore what they learned to focus on in images.

In [None]:
conv = model.modules[0].modules[0]

def plot_channels(array, label):
    plt.figure(figsize=(20, 20))
    for i in range(array.shape[0]):
        plt.subplot(10, 8, i + 1)
        image = array[i, :, :]
        plt.imshow(image, vmin=image.min().item(), vmax=image.max().item(), cmap="gray")
        plt.xlabel(f"channel {str(i + 1)}")
        plt.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
    plt.show()

In [None]:
out = conv.y[0].cpu()
out_min = out.min(axis=0)
out_max = out.max(axis=0)
out = (out - out_min) / (out_max - out_min)
plot_channels(out, "channel")

Learned filters

In [None]:
weights = conv.w.squeeze().cpu()
weights_min = weights.min(axis=0)
weights_max = weights.max(axis=0)
weights = (weights - weights_min) / (weights_max - weights_min)
plot_channels(weights, "filter")