In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import walnut

# Example 4

### Convolutional neural network

The goal of this model is to classify images of hand-written digits.

### Step 1: Prepare data
You will need to download the dataset from https://www.kaggle.com/competitions/digit-recognizer/data and place it into the *data* directory. Only using the official training data for training, validation and testing, since it is just to showcase the framework.

In [2]:
data = pd.read_csv('data/mnist/train.csv')
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Since the labels are represented as integers, they should be encoded.

In [3]:
data_enc = walnut.preprocessing.encoding.pd_one_hot_encode(data, columns=['label'])
data_enc.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,label_0,label_1,label_2,label_3,label_4,label_5,label_6,label_7,label_8,label_9
0,0,0,0,0,0,0,0,0,0,0,...,False,True,False,False,False,False,False,False,False,False
1,0,0,0,0,0,0,0,0,0,0,...,True,False,False,False,False,False,False,False,False,False
2,0,0,0,0,0,0,0,0,0,0,...,False,True,False,False,False,False,False,False,False,False
3,0,0,0,0,0,0,0,0,0,0,...,False,False,False,False,True,False,False,False,False,False
4,0,0,0,0,0,0,0,0,0,0,...,True,False,False,False,False,False,False,False,False,False


In [4]:
tensor = walnut.pd_to_tensor(data_enc)
train, val, test = walnut.preprocessing.split_train_val_test(tensor, ratio_val=0.01, ratio_test=0.1)

In [5]:
x_train, y_train = walnut.preprocessing.split_features_labels(train, 784)
x_val, y_val = walnut.preprocessing.split_features_labels(val, 784)
x_test, y_test = walnut.preprocessing.split_features_labels(test, 784)

In [6]:
x_train = x_train.reshape((x_train.shape[0], 1 , 28, -1))
x_val = x_val.reshape((x_val.shape[0], 1, 28, -1))
x_test = x_test.reshape((x_test.shape[0], 1, 28, -1))

print (f'{x_train.shape=}')
print (f'{y_train.shape=}')

print (f'{x_val.shape=}')
print (f'{y_val.shape=}')

print (f'{x_test.shape=}')
print (f'{y_test.shape=}')

x_train.shape=(37380, 1, 28, 28)
y_train.shape=(37380, 10)
x_val.shape=(420, 1, 28, 28)
y_val.shape=(420, 10)
x_test.shape=(4200, 1, 28, 28)
y_test.shape=(4200, 10)


Normalization

In [7]:
x_train = x_train / 255
x_val = x_val / 255
x_test = x_test / 255

### Step 2: Build the neural network structure

In [8]:
import walnut.nn as nn

model = nn.Sequential(layers=[
    nn.modules.Convolution2d(8, input_shape=(1, 28, 28), kernel_size=(3, 3), act="relu", norm="layer", pad="same"),
    nn.modules.MaxPooling(p_window=(2, 2)),
    nn.modules.Reshape(),
    nn.modules.Linear(64, act="relu", norm="layer"),
    nn.modules.Linear(10, act="softmax", norm="layer")
])

The network is compiled to internally connect it's layers and initialize the model.

In [9]:
model.compile(
    optimizer=nn.optimizers.Adam(l_r=1e-3),
    loss_fn=nn.losses.Crossentropy(),
    metric=nn.metrics.Accuracy()
)

In [10]:
model

layer_type      | input_shape     | weight_shape    | bias_shape      | output_shape    | parameters     

Convolution2d   | (1, 28, 28)     | (8, 1, 3, 3)    | (8,)            | (8, 28, 28)     | 80             
Layernorm       | (8, 28, 28)     | (8, 28, 28)     | (8, 28, 28)     | (8, 28, 28)     | 12544          
Relu            | (8, 28, 28)     | (,)             | (,)             | (8, 28, 28)     | 0
MaxPooling      | (8, 28, 28)     | (,)             | (,)             | (8, 14, 14)     | 0
Reshape         | (8, 14, 14)     | (,)             | (,)             | (1568,)         | 0
Linear          | (1568,)         | (1568, 64)      | (64,)           | (64,)           | 100416         
Layernorm       | (64,)           | (64,)           | (64,)           | (64,)           | 128            
Relu            | (64,)           | (,)             | (,)             | (64,)           | 0
Linear          | (64,)           | (64, 10)        | (10,)           | (10,)           | 650        

### Step 3: Train the model

In [None]:
train_loss_hist, val_loss_hist = model.train(x_train, y_train, epochs=50, batch_size=256, val_data=(x_val, y_val))

In [None]:
traces = {
    "train_loss" : train_loss_hist,
    "val_loss" : val_loss_hist
}
nn.analysis.plot_curve(traces=traces, figsize=(15, 3), title="loss history", x_label="epoch", y_label="loss")

### Step 4: Evaluate the model

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'loss {loss:.4f}')
print(f'accuracy {accuracy:.4f}')

In [None]:
predictions = model(x_test)
nn.analysis.plot_confusion_matrix(predictions, y_test, figsize=(5,5))

### Step 5: Explore the inner workings
Pick a random image from the testing dataset.

In [None]:
i = random.randint(0, x_test.len)
image = np.moveaxis(x_test[i].data, 0, -1)
plot = plt.imshow(image, cmap='gray')
plt.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

Use it to predict a number and show the probability distribution of the outcome.

In [None]:
image_tensor = walnut.expand_dims(x_test[i], 0)
predictions = model(image_tensor)
nn.analysis.plot_probabilities(predictions, figsize=(6, 3))

Every layer of the model can be accessed to explore their output. Here we iterate over all the kernels of the convolutional layer to explore what they learned to focus on in images.

In [None]:
channels = {f"{i + 1} {l.__class__.__name__}" : l.y.data[0].copy() for i, l in enumerate(model.layers) if l.__class__.__name__ == "Convolution2d"}
nn.analysis.plot_images(channels, figsize=(40, 40), cmap="gray")