In [1]:
#export
import pickle, gzip, math, os, time, shutil, torch, matplotlib as mpl, numpy as np
import pandas as pd, matplotlib.pyplot as plt
from pathlib import Path
from torch import tensor
from torch import nn

from torch.utils.data import DataLoader, default_collate
from typing import Mapping

try:
    from .training import *
    from .datasets import *
except:
    from src.miniai.training import *
    from src.miniai.datasets import *

In [2]:
from src.miniai.export import nb_export

In [3]:
mpl.rcParams['image.cmap'] = 'gray'

In [4]:
mnist_path = Path('./data/mnist/')
os.listdir(mnist_path)

['test.csv', 'train.csv']

In [5]:
def xy_split(df):
    return df.drop('label', axis=1).values, df['label'].values

def train_valid_split(X, y, y_n=10000):
    X_train, X_valid = X[:-y_n], X[-y_n:]
    y_train, y_valid = y[:-y_n], y[-y_n:]
    return X_train, X_valid, y_train, y_valid

def to_tensor(X, y):
    return tensor(X).float()/255., tensor(y).long()

In [6]:
# Prepping our data
df = pd.read_csv(mnist_path/'train.csv')
X, y = xy_split(df)
X_train, X_valid, y_train, y_valid = train_valid_split(X, y)
X_train, y_train = to_tensor(X_train, y_train)
X_valid, y_valid = to_tensor(X_valid, y_valid)

## Convolutions in PyTorch

In [7]:
import torch.nn.functional as F
import torch

In [8]:
n, m = X_train.shape
c = y_train.max()+1
nh = 50

In [9]:
# Creating a simple linear layer
model = nn.Sequential(
    nn.Linear(m, nh),
    nn.ReLU(),
    nn.Linear(nh, 10)
)

In [10]:
# Broken CNN
broken_cnn = nn.Sequential(
    nn.Conv2d(1, 30, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.Conv2d(30, 10, kernel_size=3, padding=1)
)

In [11]:
# turning our flat vectors into matrices that resemble the images
x_imgs = X_train.view(-1, 28, 28)

In [12]:
# grabbing a batch of x_imgs
xb = x_imgs[:16,None]
xb.shape

torch.Size([16, 1, 28, 28])

In [13]:
# feed through our broken cnn to see the output
broken_cnn(xb).shape

torch.Size([16, 10, 28, 28])

In [14]:
#export
# Creating conv function to return a customized conv layer
def conv(ni, nf, ks=3, stride=2, act=True):
    res = nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2)
    if act: res = nn.Sequential(res, nn.ReLU())
    return res

In [15]:
# Let's now create a simple cnn
simple_cnn = nn.Sequential(
    conv(1, 4),
    conv(4, 8),
    conv(8, 16),
    conv(16, 16),
    conv(16, 10, act=False),
    nn.Flatten() # flatten our conv output to feed into a linear layer
)

In [16]:
simple_cnn

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (1): Sequential(
    (0): Conv2d(4, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (3): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (4): Conv2d(16, 10, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (5): Flatten(start_dim=1, end_dim=-1)
)

In [17]:
simple_cnn(xb).shape # this is what we want to perform classification

torch.Size([16, 10])

In [18]:
# how to check for mac silicon gpu
torch.backends.mps.is_available()

True

In [19]:
# take our vector images and create a dataset for our model to use
x_images = X_train.view(-1, 1, 28, 28)
x_valid_images = X_valid.view(-1, 1, 28, 28)
train_ds, valid_ds = Dataset(x_images, y_train), Dataset(x_valid_images, y_valid)

In [20]:
#export
# check for device, and add a tensor object or dictionary of tensor objects to that device
def_device = "mps" if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu'
# def_device = "cpu"

def to_device(x, device=def_device):
    # Map dictionary values to the device if this format is present
    if isinstance(x, Mapping): return {k:v.to(device) for k,v in x.items()}
    # else map the tensor object to device
    return type(x)(o.to(device) for o in x)

# collate to call on batch grab
def collate_device(b): return to_device(default_collate(b))

In [21]:
# now we can train our simple_cnn
from torch import optim

bs = 256
lr = 0.4
train_dl, valid_dl = get_dls(train_ds, valid_ds, bs, collate_fn=collate_device)
opt = optim.SGD(simple_cnn.parameters(), lr=lr)

In [22]:
loss, acc = fit(
    model=simple_cnn.to(def_device),
    opt=opt,
    loss_fn=F.cross_entropy,
    train_dl=train_dl,
    valid_dl=valid_dl,
    epochs=5
)

0 0.6641474186897278 0.7910000008583069
1 0.5120913469791413 0.8378999996185302
2 0.16024165568351745 0.9474000001907349
3 0.13110165576934815 0.9573000002861023
4 0.11562569268941879 0.9612000002861023


In [23]:
# continue training with lower learning rate
opt = optim.SGD(simple_cnn.parameters(), lr=lr/4)
loss, acc = fit(
    model=simple_cnn.to(def_device),
    opt=opt,
    loss_fn=F.cross_entropy,
    train_dl=train_dl,
    valid_dl=valid_dl,
    epochs=5
)

0 0.10252068943977356 0.9676000005722046
1 0.09800325930118561 0.9685000005722046
2 0.09126972550153732 0.9692000004768372
3 0.09272865797281266 0.9697000005722046
4 0.0909618160367012 0.9708000007629395


In [24]:
# continue training with lower learning rate
opt = optim.SGD(simple_cnn.parameters(), lr=lr/10)
loss, acc = fit(
    model=simple_cnn.to(def_device),
    opt=opt,
    loss_fn=F.cross_entropy,
    train_dl=train_dl,
    valid_dl=valid_dl,
    epochs=5
)

0 0.08805583106279373 0.9701000006675721
1 0.08864711799621582 0.9705000005722045
2 0.08702481895685196 0.9718000008583069
3 0.08783251739740372 0.970700000667572
4 0.08711385513544083 0.9715000008583069


In [25]:
simple_cnn

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (1): Sequential(
    (0): Conv2d(4, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (3): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (4): Conv2d(16, 10, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (5): Flatten(start_dim=1, end_dim=-1)
)

In [26]:
xb, yb = next(iter(train_dl))

In [27]:
# out = simple_cnn[:-1](xb)
out = simple_cnn[:-1](xb)

In [28]:
out.shape

torch.Size([256, 10, 1, 1])

In [29]:
nb_export??

[0;31mSignature:[0m [0mnb_export[0m[0;34m([0m[0mnotebook_file[0m[0;34m,[0m [0moutput_file[0m[0;34m,[0m [0mexport_token[0m[0;34m=[0m[0;34m'#export'[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mSource:[0m   
[0;32mdef[0m [0mnb_export[0m[0;34m([0m[0mnotebook_file[0m[0;34m,[0m [0moutput_file[0m[0;34m,[0m [0mexport_token[0m[0;34m=[0m[0;34m"#export"[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0mexported_cells[0m [0;34m=[0m [0mformat_cells[0m[0;34m([0m[0mnotebook_file[0m[0;34m,[0m [0mexport_token[0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0mcode[0m [0;34m=[0m [0mcells_2_txt[0m[0;34m([0m[0mnotebook_file[0m[0;34m,[0m [0mexported_cells[0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0mwrite_code[0m[0;34m([0m[0moutput_file[0m[0;34m,[0m [0mcode[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mFile:[0m      ~/Documents/miniai/src/miniai/export.py
[0;31mType:[0m      function


In [30]:
nb_export('06_convolutions.ipynb', './src/miniai/conv.py')