# **Framework Improvements - Adding The Learner Class**

In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import pickle, gzip, math, os, time, shutil
import fastcore.all as fc
from operator import attrgetter, itemgetter
from functools import partial
from collections.abc import Mapping
from contextlib import contextmanager
from pathlib import Path
from copy import copy

import torch
from torch import nn, tensor, optim
from torch.utils.data import default_collate, DataLoader
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from datasets import load_dataset, load_dataset_builder

from miniai.training import * # Modules have already been developed in previous NBs
from miniai.datasets import * # and, reside in separate Github repo.
from miniai.conv import *
from fastprogress import progress_bar, master_bar

In [2]:
from fastcore.test import test_close

torch.set_printoptions(precision=2, linewidth=14, sci_mode=False)
torch.manual_seed(1)
mpl.rcParams['image.cmap'] = 'gray'

import logging 
logging.disable(logging.WARNING)

## **The Learner**

The objective of this new class will be to:
1. Enable rapid testing of new approaches wrt the modeling process, especially on the training and inference fronts.
2. Constantly build on-top of the existing learner's functionality.
3. Allow improved inspection, optimized CUDA implementations etc.

So, let's get the HuggingFace `Fashion_MNIST` dataset again.

In [3]:
x, y = 'image', 'label'
name  = "fashion_mnist"
dsd = load_dataset(name)

In [4]:
dsd

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 60000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 10000
    })
})

In [5]:
@inplace
def transformi(b): b[x] = [torch.flatten(TF.to_tensor(o)) for o in b[x]]

In [6]:
bs = 1024
tds = dsd.with_transform(transformi)

In [7]:
dls = DataLoaders.from_dd(tds, bs, num_workers=4)
dt = dls.train
xb, yb = next(iter(dt))
xb.shape, yb[:10]

(torch.Size([1024, 784]),
 tensor([5, 4,
         9, 4,
         3, 0,
         6, 5,
         7, 6]))

Prose

In [8]:
class Learner:
    def __init__(self, model, dls, loss_func, lr, opt_func=optim.SGD): 
        # fastcore store_attr() reduces the amount of boilerplate that usually goes
        # into an __init__()
        fc.store_attr()

    def one_batch(self):
        self.xb, self.yb = to_device(self.batch)
        self.preds = self.model(self.xb)
        self.loss = self.loss_func(self.preds, self.yb)
        if self.model.training:
            self.loss.backward()
            self.opt.step()
            self.opt.zero_grad()
        with torch.no_grad(): self.calc_stats()

    def calc_stats(self):
        acc = (self.preds.argmax(dim=1)==self.yb).float().sum()
        self.accs.append(acc)
        n = len(self.xb)
        self.losses.append(self.loss*n)
        self.ns.append(n)

    def one_epoch(self, train):
        self.model.training = train
        dl = self.dls.train if train else self.dls.valid
        for self.num,self.batch in enumerate(dl): self.one_batch()
        n = sum(self.ns)
        print(self.epoch, self.model.training, sum(self.losses).item()/n, sum(self.accs).item()/n)
    
    def fit(self, n_epochs):
        self.accs, self.losses,self.ns = [],[],[]
        self.model.to(def_device)
        self.opt = self.opt_func(self.model.parameters(), self.lr)
        self.n_epochs = n_epochs
        for self.epoch in range(n_epochs):
            self.one_epoch(True)
            with torch.no_grad(): self.one_epoch(False)

We can test the new learner on the simple MLP. Also, note that we can now add `num_workers` to the DataLoader to improve the performance of collate functions.

In [9]:
m, nh = 28*28, 50
model = nn.Sequential(nn.Linear(m, nh), nn.ReLU(), nn.Linear(nh, 10))

In [10]:
learn = Learner(model, dls, F.cross_entropy, lr=0.2)
learn.fit(1)

0 True 1.1753046875 0.5986833333333333
0 False 1.1203290178571428 0.6135285714285714


## **Basic Callbacks Learner**

In [11]:
class CancelFitException(Exception): pass
class CancelBatchException(Exception): pass
class CancelEpochException(Exception): pass

In [12]:
class Callback(): order = 0

In [13]:
def run_cbs(cbs, method_nm, learn=None):
    for cb in sorted(cbs, key=attrgetter('order')):
        method = getattr(cb, method_nm, None)
        if method is not None: method(learn)

In [14]:
class CompletionCB(Callback):
    def before_fit(self, learn): self.count = 0
    def after_batch(self, learn): self.count += 1
    def after_fit(self, learn): print(f'Completed {self.count} batches')

In [15]:
cbs = [CompletionCB()]
run_cbs(cbs, 'before_fit')
run_cbs(cbs, 'after_batch')
run_cbs(cbs, 'after_fit')

Completed 1 batches


Introducing additional callbacks for demonstrative purposes. The Learner's code will be changed accordingly.

In [16]:
class Learner():
    def __init__(self, model, dls, loss_func, lr, cbs, opt_func=optim.SGD): fc.store_attr()

    def one_batch(self):
        self.preds = self.model(self.batch[0])
        self.loss = self.loss_func(self.preds, self.batch[1])
        if self.model.training:
            self.loss.backward()
            self.opt.step()
            self.opt.zero_grad()

    def one_epoch(self, train):
        self.model.train(train)
        self.dl = self.dls.train if train else self.dls.valid
        try:
            self.callback('before_epoch')
            for self.iter,self.batch in enumerate(self.dl):
                try:
                    self.callback('before_batch')
                    self.one_batch()
                    self.callback('after_batch')
                except CancelBatchException: pass
            self.callback('after_epoch')
        except CancelEpochException: pass
    
    def fit(self, n_epochs):
        self.n_epochs = n_epochs
        self.epochs = range(n_epochs)
        self.opt = self.opt_func(self.model.parameters(), self.lr)
        try:
            self.callback('before_fit')
            for self.epoch in self.epochs:
                self.one_epoch(True)
                self.one_epoch(False)
            self.callback('after_fit')
        except CancelFitException: pass

    def callback(self, method_nm): run_cbs(self.cbs, method_nm, self)

In [17]:
m, nh = 28*28, 50
def get_model(): return nn.Sequential(nn.Linear(m, nh), nn.ReLU(), nn.Linear(nh, 10))

In [19]:
model = get_model()
learn = Learner(model, dls, F.cross_entropy, lr=0.2, cbs=[CompletionCB()])
learn.fit(1)

Completed 64 batches


In [20]:
class SingleBatchCB(Callback):
    order = 1
    def after_batch(self, learn): raise CancelFitException()

In [21]:
learn = Learner(get_model(), dls, F.cross_entropy, lr=0.2, cbs=[SingleBatchCB(), CompletionCB()])
learn.fit(1)

## **Metrics**

As the framework begins to take shape, we won't be working exclusively with `Cross Entropy` loss. This is because the framework is supposed to be flexible for a wide range of deep learning problems, be they generative or otherwise.

With this in mind, let's create a `Metrics` class. Here, we will also introduce the concept of [Sub-classing](https://llego.dev/posts/in-depth-guide-superclasses-subclasses-python/), which in short is:

> Superclasses and subclasses are key concepts in object-oriented programming that allow code reuse through inheritance. A superclass, also called a base class or parent class, contains attributes and methods that are common to a set of related classes. A subclass, also known as a derived class or child class, inherits from the superclass and specializes or extends its capabilities.

In [28]:
class Metric:
    def __init__(self): self.reset()

    def reset(self): self.vals, self.ns = [], []

    def add(self, inp, targ=None, n=1): # n = number of items in the mini-batch
        self.last = self.calc(inp, targ)
        self.vals.append(self.last)
        self.ns.append(n)

    @property
    def value(self):
        ns = tensor(self.ns)
        return (tensor(self.vals)*ns).sum() / ns.sum()

    def calc(self, inps, targ): return inps

Here, `Accuracy` is the subclass of `Metric`. Whats happening under the hood is that `calc()`, when it is called in `Accuracy`, basically uses the code from the `Metric` class and runs its own version of the function instead of the parent / super-class's version.

In [26]:
class Accuracy(Metric):
    # The sub-class runs this version of calc() instead of the parent's version.
    def calc(self, inps, targs): return  (inps==targs).float().mean()

In [27]:
# For demonstration purposes, creating some random inputs and predictions
acc = Accuracy()
# Each call to add() stores the  inputs and predictions
acc.add(tensor([0, 1, 2, 0, 1, 2]), tensor([0, 1, 1, 2, 1, 0]))
acc.add(tensor([1, 1, 2, 0, 1]), tensor([0, 1, 1, 2, 1]))
# Here value is a property of Metric
acc.value

tensor(0.45)

In [25]:
# In contrast to Accuracy(), Metric() will calculate whatever is passed.
loss = Metric()
loss.add(0.6, n=32)
loss.add(0.9, n=2)

loss.value, round((0.6*32 + 0.9*2) / (32+2), 2)

(tensor(0.62), 0.62)