In [3]:
!pip install python-mnist
%load_ext autoreload
%autoreload 2

%matplotlib inline

Collecting python-mnist
  Downloading python_mnist-0.7-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: python-mnist
Successfully installed python-mnist-0.7
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
#export
from exp.nb_05b import *
torch.set_num_threads(2)

In [5]:
x_train,y_train,x_valid,y_valid = get_data()

fetching http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
fetching http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
fetching http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
fetching http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz


In [6]:
#export
def normalize_to(train, valid):
    m,s = train.mean(),train.std()
    return normalize(train, m, s), normalize(valid, m, s)

In [7]:
x_train,x_valid = normalize_to(x_train,x_valid)
train_ds,valid_ds = Dataset(x_train, y_train),Dataset(x_valid, y_valid)

In [8]:

x_train.mean(),x_train.std()

(tensor(0.0001), tensor(1.0000))

In [9]:
nh,bs = 50,512
c = y_train.max().item()+1
loss_func = F.cross_entropy

data = DataBunch(*get_dls(train_ds, valid_ds, bs), c)

In [10]:
#export
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x): return self.func(x)

def flatten(x):      return x.view(x.shape[0], -1)

In [11]:
def mnist_resize(x):
    return x.view(-1,1,28,28)

In [12]:
def get_cnn_model(data):
    return nn.Sequential(
        Lambda(mnist_resize),
        nn.Conv2d( 1, 8, 5, padding=2,stride=2), nn.ReLU(), #14
        nn.Conv2d( 8,16, 3, padding=1,stride=2), nn.ReLU(), # 7
        nn.Conv2d(16,32, 3, padding=1,stride=2), nn.ReLU(), # 4
        nn.Conv2d(32,32, 3, padding=1,stride=2), nn.ReLU(), # 2
        nn.AdaptiveAvgPool2d(1),
        Lambda(flatten),
        nn.Linear(32,data.c)
    )

In [13]:
model = get_cnn_model(data)

In [30]:
cbfs = [Recorder, partial(AvgStatsCallback,accuracy)]

In [15]:
opt = optim.SGD(model.parameters(), lr=0.4)
learn = Learner(model, opt, loss_func, data)
run = Runner(cb_funcs=cbfs)

In [16]:
run.fit(5,learn)

train: [1.8464143229166667, tensor(0.3574)]
valid: [0.592774365234375, tensor(0.8052)]
train: [0.34084807942708334, tensor(0.8931)]
valid: [0.3825819580078125, tensor(0.8747)]
train: [0.17606775716145834, tensor(0.9462)]
valid: [0.214144287109375, tensor(0.9328)]
train: [0.1287925537109375, tensor(0.9614)]
valid: [0.20327281494140625, tensor(0.9330)]
train: [0.1091248046875, tensor(0.9664)]
valid: [0.11075758056640625, tensor(0.9663)]


In [17]:
# Somewhat more flexible way
device = torch.device('cuda',0)

In [18]:

class CudaCallback(Callback):
    def __init__(self,device): self.device=device
    def begin_fit(self): self.model.to(self.device)
    def begin_batch(self): self.run.xb,self.run.yb = self.xb.to(self.device),self.yb.to(self.device)

In [19]:
# Somewhat less flexible, but quite convenient
torch.cuda.set_device(device)

In [20]:
#export
class CudaCallback(Callback):
    def begin_fit(self): self.model.cuda()
    def begin_batch(self): self.run.xb,self.run.yb = self.xb.cuda(),self.yb.cuda()

In [35]:
cbfs.append(CudaCallback)

In [36]:
model = get_cnn_model(data)

In [37]:
opt = optim.SGD(model.parameters(), lr=0.4)
learn = Learner(model, opt, loss_func, data)
run = Runner(cb_funcs=cbfs)

In [38]:
%time run.fit(3, learn)

train: [1.5729352864583332, tensor(0.4583, device='cuda:0')]
valid: [0.39863408203125, tensor(0.8848, device='cuda:0')]
train: [0.257634521484375, tensor(0.9238, device='cuda:0')]
valid: [0.2824466064453125, tensor(0.9107, device='cuda:0')]
train: [0.14573048502604166, tensor(0.9563, device='cuda:0')]
valid: [0.157650634765625, tensor(0.9542, device='cuda:0')]
CPU times: user 2.91 s, sys: 40 ms, total: 2.95 s
Wall time: 2.92 s


In [39]:
def conv2d(ni, nf, ks=3, stride=2):
    return nn.Sequential(
        nn.Conv2d(ni, nf, ks, padding=ks//2, stride=stride), nn.ReLU())

In [40]:
#export
class BatchTransformXCallback(Callback):
    _order=2
    def __init__(self, tfm): self.tfm = tfm
    def begin_batch(self): self.run.xb = self.tfm(self.xb)

def view_tfm(*size):
    def _inner(x): return x.view(*((-1,)+size))
    return _inner

In [46]:
mnist_view = view_tfm(1,28,28)
cbfs.append(partial(BatchTransformXCallback, mnist_view))

In [47]:
nfs = [8,16,32,32]

In [51]:
def get_cnn_layers(data, nfs):
    nfs = [1] + nfs
    return [
        conv2d(nfs[i], nfs[i+1], 5 if i==0 else 3)
        for i in range(len(nfs)-1)
    ] + [nn.AdaptiveAvgPool2d(1), Lambda(flatten), nn.Linear(nfs[-1], data.c)]

def get_cnn_model(data, nfs): return nn.Sequential(*get_cnn_layers(data, nfs))

In [52]:
#export
def get_runner(model, data, lr=0.6, cbs=None, opt_func=None, loss_func = F.cross_entropy):
    if opt_func is None: opt_func = optim.SGD
    opt = opt_func(model.parameters(), lr=lr)
    learn = Learner(model, opt, loss_func, data)
    return learn, Runner(cb_funcs=listify(cbs))

In [53]:
model = get_cnn_model(data, nfs)
learn,run = get_runner(model, data, lr=0.4, cbs=cbfs)


In [54]:
run.fit(3,learn)

train: [1.9876596354166667, tensor(0.3001, device='cuda:0')]
valid: [0.8780345703125, tensor(0.7177, device='cuda:0')]
train: [0.37515709635416666, tensor(0.8835, device='cuda:0')]
valid: [0.2710060302734375, tensor(0.9138, device='cuda:0')]
train: [0.16561806640625, tensor(0.9501, device='cuda:0')]
valid: [0.14968392333984376, tensor(0.9536, device='cuda:0')]
