In [1]:
#export
import pickle, gzip, math, os, time, shutil, torch, matplotlib as mpl, numpy as np
import pandas as pd, matplotlib.pyplot as plt
from pathlib import Path
from torch import tensor
from torch import nn

from torch.utils.data import DataLoader, default_collate
from typing import Mapping

try:
    from .training import *
    from .datasets import *
except:
    from src.miniai.training import *
    from src.miniai.datasets import *

In [2]:
from src.miniai.export import nb_export

In [3]:
mpl.rcParams['image.cmap'] = 'gray'

In [6]:
mnist_path = Path('./data/mnist/')
os.listdir(mnist_path)

['test.csv', 'train.csv']

In [17]:
def xy_split(df):
    return df.drop('label', axis=1).values, df['label'].values

def train_valid_split(X, y, y_n=10000):
    X_train, X_valid = X[:-y_n], X[-y_n:]
    y_train, y_valid = y[:-y_n], y[-y_n:]
    return X_train, X_valid, y_train, y_valid

def to_tensor(X, y):
    return tensor(X).float()/255., tensor(y).long()

In [18]:
# Prepping our data
df = pd.read_csv(mnist_path/'train.csv')
X, y = xy_split(df)
X_train, X_valid, y_train, y_valid = train_valid_split(X, y)
X_train, y_train = to_tensor(X_train, y_train)
X_valid, y_valid = to_tensor(X_valid, y_valid)

## Convolutions in PyTorch

In [21]:
import torch.nn.functional as F
import torch

In [22]:
n, m = X_train.shape
c = y_train.max()+1
nh = 50

In [23]:
# Creating a simple linear layer
model = nn.Sequential(
    nn.Linear(m, nh),
    nn.ReLU(),
    nn.Linear(nh, 10)
)

In [24]:
# Broken CNN
broken_cnn = nn.Sequential(
    nn.Conv2d(1, 30, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.Conv2d(30, 10, kernel_size=3, padding=1)
)

In [34]:
# turning our flat vectors into matrices that resemble the images
x_imgs = X_train.view(-1, 28, 28)

In [38]:
# grabbing a batch of x_imgs
xb = x_imgs[:16,None]
xb.shape

torch.Size([16, 1, 28, 28])

In [39]:
# feed through our broken cnn to see the output
broken_cnn(xb).shape

torch.Size([16, 10, 28, 28])

In [41]:
#export
# Creating conv function to return a customized conv layer
def conv(ni, nf, ks=3, stride=2, act=True):
    res = nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2)
    if act: res = nn.Sequential(res, nn.ReLU())
    return res

In [42]:
# Let's now create a simple cnn
simple_cnn = nn.Sequential(
    conv(1, 4),
    conv(4, 8),
    conv(8, 16),
    conv(16, 16),
    conv(16, 10, act=False),
    nn.Flatten() # flatten our conv output to feed into a linear layer
)

In [43]:
simple_cnn

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (1): Sequential(
    (0): Conv2d(4, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (3): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (4): Conv2d(16, 10, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (5): Flatten(start_dim=1, end_dim=-1)
)

In [45]:
simple_cnn(xb).shape # this is what we want to perform classification

torch.Size([16, 10])