In [None]:
import numpy as np
from sklearn import datasets
import torch
from torch import nn, optim, utils
from torchvision import transforms

# Load Example Images

In [None]:
NUM_CHANNELS = 3

_dataset = datasets.load_sample_images()
_images = _dataset["images"]

images = (torch.stack([torch.from_numpy(arr) for arr in _images])
               .permute(0, 3, 1, 2))

In [None]:
# batch_size, num_channels, height, width
images.shape

## Create a preprocessing function

In [None]:
CROPPED_HEIGHT, CROPPED_WIDTH = 70, 120


class MinMaxScaler(nn.Module):

    def __init__(self, min=0, max=255) -> None:
        super().__init__()
        self._min = min
        self._max = max

    def forward(self, X: torch.Tensor) -> torch.Tensor:
        return (X - self._min) / self._max


pre_processing_fn = nn.Sequential(
    transforms.CenterCrop(size=(CROPPED_HEIGHT, CROPPED_WIDTH)),
    MinMaxScaler(),
)


In [None]:
processed_images = pre_processing_fn(images)

In [None]:
# batch_size, num_channels, cropped_height, cropped_width
processed_images.shape

### Exercise:

Use the function below to load the [UCI Handwritten Digits Dataset](https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits). Create a PyTorch Dataset for this data. Create a pre-processing function for this dataset using three different random transformations. Run a batch of data through your preprocessing function to check that it works.

In [None]:
uci_features, uci_target = datasets.load_digits(
    return_X_y=True,
    as_frame=True
)

In [None]:
uci_features.head()

In [None]:
uci_target.head()

### Solution:

In [None]:
input_channels = 1
input_height, input_width = 8, 8
_image_feature_arrs = (uci_features.to_numpy()
                           .astype(np.float32)
                           .reshape(-1, input_channels, input_height, input_width))
_image_feature_tensors = torch.from_numpy(_image_feature_arrs)

_image_target_arr = (uci_target.to_numpy()
                               .reshape(-1, 1))
_image_target_tensor = torch.from_numpy(_image_target_arr)

uci_dataset = utils.data.TensorDataset(
    _image_feature_tensors,
    _image_target_tensor
)

pre_processing_fn = nn.Sequential(
    transforms.RandomRotation(degrees=30),
    MinMaxScaler(),
)

batch_size = 32
X, _ = uci_dataset[:batch_size]
Z = pre_processing_fn(X)
print(Z.shape)

# Creating a Convolutional Layer

In [None]:
nn.Conv2d?

## Strides and Padding

### Valid Padding

In [None]:
out_channels = 32
kernel_height, kernel_width = 7, 7
_stride_width, _stride_height = 1, 1

conv2d_layer = nn.Conv2d(
    in_channels=NUM_CHANNELS,
    out_channels=out_channels,
    kernel_size=(kernel_height, kernel_width),
    stride=(_stride_width, _stride_height),
    padding=0, # padding="valid"
)

out_kernel_height = int((CROPPED_HEIGHT - kernel_height + _stride_height) / _stride_height)
out_kernel_width = int((CROPPED_WIDTH - kernel_width + _stride_width) / _stride_width)

In [None]:
output = conv2d_layer(processed_images)

In [None]:
# batch_size, out_channels, out_kernel_height, out_kernel_width
output.shape

In [None]:
out_kernel_height, out_kernel_width

#### Same padding

In [None]:
conv2d_layer = nn.Conv2d(
    in_channels=NUM_CHANNELS,
    out_channels=out_channels,
    kernel_size=(kernel_height, kernel_width),
    padding="same",
)

In [None]:
output = conv2d_layer(processed_images)

In [None]:
output.shape

#### Larger Strides

In [None]:
_stride_height, _stride_width = (2, 3)

conv2d_layer = nn.Conv2d(
    in_channels=NUM_CHANNELS,
    out_channels=out_channels,
    kernel_size=(kernel_height, kernel_width),
    stride=(_stride_height, _stride_width),
)

out_kernel_height = int((CROPPED_HEIGHT - kernel_height + _stride_height) / _stride_height)
out_kernel_width = int((CROPPED_WIDTH - kernel_width + _stride_width) / _stride_width)


In [None]:
output = conv2d_layer(processed_images)

In [None]:
# output feature maps have height and width roughly halved
output.shape

In [None]:
out_kernel_height, out_kernel_width

## Layer Weight and Bias


In [None]:
# out_channels, in_channels, kernel_height, kernel_width
conv2d_layer.weight.shape

In [None]:
# out_channels
conv2d_layer.bias.shape

Note that the height and width of the input images do not appear in the kernel’s shape! *All the neurons in the output feature maps share the same weights.* 

This means that you can feed images of any size to this layer as long as 

1. they are at least as large as the kernels,
2. they have the right number of `in_channels`

## Use Custom Functions to Simplify Layer Creation 

Lots of hyperparameters to set for your convolutional layers. Often useful to create wrapper functions to enclose the values of hyperparameter settings that are common across layers.

In [None]:
def initialize_conv2d_layer_(layer):
    nn.init.xavier_normal_(layer.weight)
    nn.init.zeros_(layer.bias)


def create_conv2d_layer(in_channels,
                        out_channels,
                        kernel_height,
                        kernel_width):
    conv2d_layer = nn.Conv2d(
        in_channels,
        out_channels,
        kernel_size=(kernel_height, kernel_width),
        stride=(1, 1),
        padding="valid",
    )
    initialize_conv2d_layer_(conv2d_layer)
    return conv2d_layer


def create_conv2d_block(in_channels,
                        out_channels,
                        kernel_height,
                        kernel_width):
    conv2d_layer = create_conv2d_layer(
        in_channels,
        out_channels,
        kernel_height,
        kernel_width
    )
    conv2d_block = nn.Sequential(
        conv2d_layer,
        nn.ReLU()
    )
    return conv2d_block



### Exercise:

Use the functions above to create a Conv2d block using a kernel size appropriate for your UCI Handwritten Digits images. The block should output 16 feature maps. Calculate the expected size of the output feature maps of your Conv2d block. Confirm that you calculations are correct by passing a batch of processed images through your block.

### Solution:

In [None]:
in_channels = 1
out_channels = 16

# create a convolutional backbone
kernel_height, kernel_width = 2, 2
conv2d_backbone = create_conv2d_block(
    in_channels,
    out_channels,
    kernel_height,
    kernel_width,
)

# calculate the output kernel size (strides are 1)
out_kernel_height = int((input_height - kernel_height + 1) / 1)
out_kernel_width = int((input_width - kernel_width + 1) / 1)

print(out_kernel_height, out_kernel_width)

output = conv2d_backbone(Z)
print(output.shape)

### Exercise:

The functions in the cell below will help you to create and properly initialize a classifer for your UCI Handwritten Digits dataset. Using these functions, create a classifier that takes the output of your Conv2d block and computes the probabilities that each image is in each class.

In [None]:
def initialize_bias_(layer, class_probs):
    layer.bias = nn.Parameter(class_probs)


def create_classifier(in_channels,
                      num_classes,
                      class_frequencies,
                      kernel_height,
                      kernel_width):
    in_features = in_channels * kernel_height * kernel_width
    linear_layer = nn.Linear(
        in_features,
        num_classes,
    )
    initialize_bias_(linear_layer, class_frequencies)
    return nn.Sequential(
        linear_layer,
        nn.Softmax(dim=1)
    )


### Solution:

In [None]:
num_classes = 10
class_frequencies = torch.ones(num_classes) / num_classes

# create a classifier
classifier_fn = create_classifier(
    out_channels,
    num_classes,
    class_frequencies,
    out_kernel_height,
    out_kernel_width
)

### Exercise:

Combine your preprocessing function, your Conv2d backbone, and your classifier function to create a single sequential network that takes raw image tensors and outputs class probabilites.

### Solution:

In [None]:
# combine everything to get your model
model_fn = nn.Sequential(
    pre_processing_fn,
    conv2d_backbone,
    nn.Flatten(),
    classifier_fn,
)

# should accept raw image tensors and return probs
predicted_probas = model_fn(X)

# should have shape batch_size, num_classes
predicted_probas.shape