In [1]:
import pickle, gzip, math, os, time, shutil
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

# torch
import torch
from torch import tensor, nn
import torch.nn.functional as F

# huggingface datasets
import datasets
from datasets import Dataset
from torch.utils.data import DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# download mnist from hugginface datasets
mnist = datasets.load_dataset('mnist')

Found cached dataset mnist (/Users/diegomedina-bernal/.cache/huggingface/datasets/mnist/mnist/1.0.0/9d494b7f466d6931c64fb39d58bb1249a4d85c9eb9865d9bc20960b999e2a332)
100%|██████████| 2/2 [00:00<00:00, 681.17it/s]


In [3]:
mnist.set_format('torch')
# mnist.set_format(type='np', columns=['image', 'label'])

# lets get them tensors
train = mnist['train']
test = mnist['test']
x_train, y_train = train['image'], train['label']
x_test, y_test = test['image'], test['label']

In [4]:
class DS:
    def __init__(self, x, y):
        self.x = x.view(-1, 28*28).float()/255.
        self.y = y[:, None].long().view(-1)
    def __len__(self):
        return len(self.x)
    def __getitem__(self, i):
        return self.x[i], self.y[i]
    
# scuffed way to create our own dataset
train_ds = DS(x_train, y_train)
test_ds = DS(x_test, y_test)


# scuffed but works?
BS = 256
train_dl = DataLoader(train_ds, batch_size=BS, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=256, shuffle=False)

In [5]:
# 2 layer nn
class Model(nn.Module):
    def __init__(self, nin, nh, nout, act_fn=nn.ReLU()):
        super().__init__()
        assert isinstance(act_fn, nn.Module)
        self.layers = [
            nn.Linear(nin, nh),
            act_fn,
            nn.Linear(nh, nout)]
        
    def forward(self, x):
        for l in self.layers: x = l(x)
        return x

In [6]:
# testing
xb, yb = next(iter(train_dl))
xb.shape, yb.shape

(torch.Size([256, 784]), torch.Size([256]))

In [7]:
# simple model
model = Model(
    nin=28*28,           # 28x28 image -> we flatten on each batch
    nh=50,               # hidden layer size
    nout=10,             # output size
    act_fn=nn.ReLU()     # activation function
    )

In [8]:
# does it work? yes? no?
out = model(xb)
out.shape

torch.Size([256, 10])

In [9]:
out.shape, yb.shape

(torch.Size([256, 10]), torch.Size([256]))

In [10]:
# can we do softmax? yes? no? im not writing the new one
F.log_softmax(out, dim=1)

tensor([[-2.4215, -2.1928, -2.3615,  ..., -2.3137, -2.3516, -2.2684],
        [-2.3692, -2.2034, -2.3421,  ..., -2.3573, -2.3131, -2.4510],
        [-2.3751, -2.1939, -2.3081,  ..., -2.3212, -2.3565, -2.4970],
        ...,
        [-2.2646, -2.2131, -2.3599,  ..., -2.3244, -2.3184, -2.3789],
        [-2.4540, -2.1610, -2.3690,  ..., -2.3229, -2.2838, -2.3374],
        [-2.3782, -2.2248, -2.3896,  ..., -2.3489, -2.3051, -2.2977]],
       grad_fn=<LogSoftmaxBackward0>)

In [11]:
def log_softmax(x):
    return (x.exp()/(x.exp().sum(-1, keepdim=True))).log()

In [12]:
# accuracy function
def accuracy(out, yb): return (out.argmax(dim=1)==yb).float().mean()
# function to print
def report(loss, preds, yb): print(f"{loss:.2f}, {accuracy(preds, yb):.2f}")

In [13]:
# to test out a new function we need a new model
# this is practically the same thing as the previous model
class SimpleModel(nn.Module):
    def __init__(self, nin, nh, nout=10, act_fn=nn.ReLU):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(nin, nh),
            act_fn(),
            nn.Linear(nh, nout))
        
    def forward(self, x):
        return self.layers(x)

In [14]:
model = SimpleModel(28*28, 50, act_fn=nn.ReLU)

In [15]:
# let's train our simple simple model and see if it learns?
# maybe it will!
lr = 1e-2
opt = torch.optim.Adam(model.parameters(), lr=lr)
epochs = 100
for epoch in range(epochs):
    for xb, yb in train_dl:
        preds = model(xb)
        loss = F.cross_entropy(preds, yb)
        loss.backward()
        opt.step()
        opt.zero_grad()
    if epoch % 10 == 0: report(loss, preds, yb)

0.19, 0.93
0.03, 0.98
0.00, 1.00
0.04, 0.97
0.01, 0.99
0.00, 1.00
0.02, 0.99
0.00, 1.00
0.00, 1.00
0.00, 1.00


In [16]:
from datasets import load_dataset, load_dataset_builder

In [17]:
name = "fashion_mnist"
ds_builder = load_dataset_builder(name)
print(ds_builder.info.description)

Fashion-MNIST is a dataset of Zalando's article images—consisting of a training set of
60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image,
associated with a label from 10 classes. We intend Fashion-MNIST to serve as a direct drop-in
replacement for the original MNIST dataset for benchmarking machine learning algorithms.
It shares the same image size and structure of training and testing splits.



In [18]:
dsd = load_dataset(name)

Found cached dataset fashion_mnist (/Users/diegomedina-bernal/.cache/huggingface/datasets/fashion_mnist/fashion_mnist/1.0.0/0a671f063342996f19779d38c0ab4abef9c64f757b35af8134b331c294d7ba48)
100%|██████████| 2/2 [00:00<00:00, 658.76it/s]


In [19]:
dsd

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 60000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 10000
    })
})

In [20]:
x, y = 'image', 'label'

In [21]:
train, test = dsd['train'], dsd['test']

In [22]:
import torchvision.transforms.functional as TF
from torch.utils.data import DataLoader

In [23]:
#export
def transforms(b):
    b[x] = [TF.to_tensor(o) for o in b[x]]
    return b

In [24]:
tds = train.with_transform(transforms)
dl = DataLoader(tds, batch_size=256, shuffle=True)

In [25]:
xb = next(iter(dl))

In [26]:
xb

{'image': tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]],
 
 
         [[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.2588, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.4588, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]],
 
 
         [[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         

In [34]:
#export
from torch.utils.data import default_collate
from operator import itemgetter

In [35]:
#export
def collate_dict(ds):
    get = itemgetter(*ds.features)
    def _f(b): return get(default_collate(b))
    return _f

def inplace(f):
    def _f(b):
        f(b)
        return b
    return _f

In [36]:
dlf = DataLoader(tds, batch_size=4, collate_fn=collate_dict(tds))

In [37]:
xb, yb = next(iter(dlf))

In [38]:
xb.shape, yb.shape

(torch.Size([4, 1, 28, 28]), torch.Size([4]))

In [1]:
from export import nb_export

In [2]:
nb_export("01_backprop_torch.ipynb", "./miniai/dataset5.py")