In [1]:
import numpy as np
import torch


## Model selection strategy: 
Use 10-fold cross validation for hyperparameter selection on the training set.

In [10]:
def load_data():
    trset = np.load("data/train.npy")
    testset = np.load("data/test.npy")
    return trset, testset

In [80]:
kf = KFold(n_splits=10)
splits = kf.split(tr)

In [84]:
def get_tr_and_val(trset, n_split=10):
    from sklearn.model_selection import KFold
    kf = KFold(n_split)
    splits = kf.split(trset)
    return [ (trset[s[0]], trset[s[1]]) for s in splits]

In [85]:
tr, test = load_data()

In [86]:
tv = get_tr_and_val(tr)

In [102]:
def get_dataloader(trset, batch_size=64, shuffle=True, split_first=False):
    trset = torch.tensor(trset)
    from torch.utils.data import TensorDataset, DataLoader
    if split_first:
        x, y = trset[:, 1:], trset[:, 0]
        ds = TensorDataset(x,y)
    else:
        ds = TensorDataset(trset)
    
    return DataLoader(ds, batch_size=batch_size, shuffle=shuffle)

In [103]:
from torch.utils.data import TensorDataset

In [104]:
dl = get_dataloader(tv[0][0],split_first=True,batch_size)

In [107]:
def do_cross_val(trset, fit_function, model, hyperparams,
                         val_bs = 128,n_split=10, n_train=10):
    """ Average cost when training a particular model, with a particular set of 
    hyperparams, on different train/ val splits
        n_split: number of folds to use to split up the dataset
        n_train: how many of these to train on and subsequently average over
        
        trset: numpy array holding mnist training data"""
    
    tr_and_val_splits = get_tr_and_val(trset, n_split=n_split)
    costs = []
    for ii in range(n_train):
        t,v = tr_and_val_splits[ii]
        train_dl = get_dataloader(t, batch_size=hyperparams['batch_size'],
                                  shuffle=True,split_first=True)
        val_dl = get_dataloader(t, batch_size=val_bs)
        
        cost = fit_function(model, train_dl, val_dl, hyperparams)
        costs.append(cost)
    return np.mean(costs)
    

In [5]:
from torch import nn

In [8]:
class Lambda(nn.Module):
    """a lambda-layer"""
    def __init__(self, func):
        super().__init__()
        self.func = func
    
    def forward(self, x):
        return self.func(x)    

    

In [42]:
def preprocess(x):
    """Preprocess mnist data in a format amenable to conv layers"""
    # reshape to 2d and add a (dummy) channel
    x = x.view(-1,1, 28,28)
    # cast to torch.float32, default for conv layers
    return x.to(torch.float)
    

## Conv model for digit recognition
Inputs are (N, 784) real tensors.
First, they're reshaped to 2d images with 1 channel. Then conv+relu, maxpool, conv+relu, global average pooling, and a final linear layer to produce the logits

In [58]:
convmodel = nn.Sequential( Lambda(preprocess), 
                            nn.Conv2d(1,32,3,padding=1), 
                             nn.ReLU(),
                             nn.MaxPool2d(2),
                            nn.Conv2d(32, 32,3,padding=1),
                           nn.ReLU(),
                            nn.AvgPool2d(14), 
                            Lambda(torch.squeeze),
                            nn.Linear(32,10)
                         )
                            

In [65]:
def loss_fn(logits, labels):
    """ The cross-entropy loss between model's logits and integer 
    labels 0....9.
        logits = (N, 10) tensor of logit values. The model probs for each class are defined as the softmax
        of the logits.
        labels = (N,) tensor of integer labels.
        
        returns: scalar loss tensor"""
    return nn.function.cross_entropy(logits, labels)