In [1]:
%reload_ext autoreload
%autoreload 2

In [31]:
import gzip,pickle
import mytorch
import myai
import numpy as np
import matplotlib.pyplot as plt
from time import time

In [3]:
tr,va,te=pickle.load(gzip.open('data/mnist.pkl.gz'),encoding='latin1')

In [4]:
def normalize(x,mean,std): return (x-mean)/std
tr_mean,tr_std=tr[0].mean(),tr[0].std()
def denorm(x): return x*tr_std+tr_mean
tr[0].min(),tr[0].max(),tr_mean,tr_std

(0.0, 0.99609375, 0.13044983, 0.3072898)

In [5]:
tr=(normalize(tr[0],tr_mean,tr_std),tr[1])
va=(normalize(va[0],tr_mean,tr_std),va[1])
te=(normalize(te[0],tr_mean,tr_std),te[1])
tr[0].mean(),tr[0].std(),va[0].mean(),va[0].std(),te[0].mean(),te[0].std()

(-3.1638146e-07, 0.99999934, -0.005850922, 0.99243325, 0.005034822, 1.0064359)

In [6]:
denorm(tr[0]).min(),denorm(tr[0]).max(),tr[0].min(),tr[0].max()

(0.0, 0.99609375, -0.42451727, 2.8170278)

In [7]:
n_in=te[0].shape[1]
n_out=te[1].max()+1
n_in,n_out

(784, 10)

In [34]:
class Resize(mytorch.Module):
    def forward(self,x): return x.reshape((-1,1,28,28))
class Flatten(mytorch.Module):
    def forward(self,x): return x.reshape((len(x),-1))

def getm():
    return mytorch.Seq(
        Resize() # 784 -> 1x28x28
        ,mytorch.Conv2d(1,8,5,padding=2,stride=2),mytorch.ReLU() #14
        ,mytorch.Conv2d(8,16,3,padding=1,stride=2),mytorch.ReLU() #7
        ,mytorch.Conv2d(16,32,3,padding=1,stride=2),mytorch.ReLU() #4
        ,Flatten()
        ,mytorch.Linear(32*4*4,n_out)
    )

In [35]:
class TimeCallback:
    def before_fit(self): self.t0=time()
    def after_fit(self): print('total time ',time()-self.t0,'s',sep='')
        
class StatsCallback:
    def before_epoch(self): self.tr_loss,self.val_loss,self.acc=0,0,0
    def after_loss(self):
        if self.learn.training:
            self.tr_loss+=self.learn.loss.item()*len(self.learn.xb)
        else:
            self.acc+=(self.learn.preds.argmax(axis=1)==self.learn.yb).sum().item()
            self.val_loss+=self.learn.loss.item()*len(self.learn.xb)
    def after_epoch(self):
        print('train loss',self.tr_loss/len(tr[0]))
        print('valid loss',self.val_loss/len(va[0]))
        print('accuracy',self.acc/len(va[0]))

In [36]:
dev='cpu'
if mytorch.cuda_is_available(): dev='cuda'
dev

'cuda'

In [37]:
batch_size=100
lr=.1
n_ep=3

def getdl(x,y,shuffle=False,first_n=None):
    if first_n: x,y=x[:first_n],y[:first_n]
    x,y=map(lambda x:mytorch.tensor(x,device=dev),[x,y])
    ds=mytorch.TensorDataset(x,y)
    return mytorch.DataLoader(ds,bs=batch_size,shuffle=shuffle)

trdl=getdl(*tr,shuffle=True)
valdl=getdl(*va)
m=getm().to(dtype=mytorch.float32,device=dev)
opt=mytorch.SGD(m.params(),lr,set_to_none=True)
lrn=myai.Learner(trdl,m,opt,mytorch.cross_entropy,valdl=valdl,
            cbs=[TimeCallback(),StatsCallback()])

In [38]:
lrn.fit(n_ep)

train loss 0.2792095726951957
valid loss 0.11150240968447178
accuracy 0.9685
train loss 0.10175056593865156
valid loss 0.08823755176505074
accuracy 0.9757
train loss 0.07689579922147095
valid loss 0.08139400115702301
accuracy 0.9757
total time 7.828434705734253s


In [39]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader,TensorDataset

In [41]:
class Resize(nn.Module):
    def forward(self,x): return x.reshape((-1,1,28,28))
class Flatten(nn.Module):
    def forward(self,x): return x.reshape((len(x),-1))
def getm():
    return nn.Sequential(
        Resize() # 784 -> 1x28x28
        ,nn.Conv2d(1,8,5,padding=2,stride=2),nn.ReLU() #14
        ,nn.Conv2d(8,16,3,padding=1,stride=2),nn.ReLU() #7
        ,nn.Conv2d(16,32,3,padding=1,stride=2),nn.ReLU() #4
        ,Flatten()
        ,nn.Linear(32*4*4,n_out)
    )

def getdl(x,y,shuffle=False,first_n=None):
    if first_n: x,y=x[:first_n],y[:first_n]
    x,y=map(lambda x:torch.tensor(x,device=dev),[x,y])
    ds=TensorDataset(x,y)
    return DataLoader(ds,batch_size=batch_size,shuffle=shuffle)

trdl=getdl(*tr,shuffle=True)
valdl=getdl(*va)
m=getm().to(device=dev)
opt=optim.SGD(m.parameters(),lr)
t0=time()
for ep in range(n_ep):
    totloss=0.
    for xb,yb in trdl:
        loss=F.cross_entropy(m(xb),yb)
        totloss+=loss.item()*len(xb)
        loss.backward()
        opt.step()
        opt.zero_grad()
    print('train loss',totloss/len(tr[0]))

    with torch.no_grad():
        loss=0.
        acc=0.
        for xb,yb in valdl:
            yhat=m(xb)
            acc+=(yhat.argmax(axis=1)==yb).sum().item()
            loss+=F.cross_entropy(yhat,yb).item()*len(xb)
        print(acc/len(va[0]),loss/len(va[0]))
print(time()-t0,'s')

train loss 0.5143283933699131
0.9621 0.1388060618750751
train loss 0.12304508703574538
0.9679 0.11211160973645747
train loss 0.0891373685207218
0.978 0.07642323927721009
9.099228382110596 s
