In [1]:
# https://course.fast.ai/videos/?lesson=9 @1:23:34
# Callbacks
# keep track of metrics: progress bar animation
# hyper params scheduling 
# regularization techniques
# tensorboards
# mixed precision training


In [2]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from pdb import set_trace

In [4]:
mnist = tf.keras.datasets.mnist


(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1] * x_train.shape[1]))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1] * x_test.shape[1]))
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape, y_train.min(), y_train.max())
n, m = x_train.shape
c = y_train.max() + 1
print(n, m, c)

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)
(60000, 784) (60000,) (10000, 784) (10000,) 0 9
60000 784 10


In [5]:
def normalize(x, m, s): return (x - m) / s

In [6]:
train_mean, train_std = x_train.mean(), x_train.std()
train_mean, train_std

(33.318421449829934, 78.56748998339798)

In [7]:
x_train = normalize(x_train, train_mean, train_std)
x_test  = normalize(x_test,  train_mean, train_std)

In [8]:
train_mean, train_std = x_train.mean(), x_train.std()
train_mean, train_std

(-3.064638490070051e-17, 0.9999999999999998)

In [9]:
nh = 50
lr = 0.5
class Model(tf.keras.Model):
    def __init__(self, nh, n_out):        
        super().__init__()
        self.lrs = [tf.keras.layers.Dense(nh), tf.keras.layers.ReLU(), tf.keras.layers.Dense(n_out)]
        
    def call(self, x):
        for l in self.lrs: x = l(x)
        return x
    
# corss entropy loss
def cross_entropy(targets, predictions):
    return tf.math.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(targets, predictions))

# here the target MUST NOT BE one hot encoded
def accuracy(targets, predictions):
    y_predictions = tf.cast(tf.argmax(predictions, axis=1), dtype=tf.float32)
    y_targets = tf.cast(targets, dtype=tf.float32)
    return tf.math.reduce_mean(tf.cast(tf.math.equal( y_predictions, y_targets) , dtype=tf.float32))

# class cross_entropy():
#     def __init__(self):
#         pass
    
#     def call(self, targets, predictions):
#         return tf.math.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(targets, predictions))

    
model = Model(nh, 10)
loss_function = cross_entropy
optimizer = tf.keras.optimizers.SGD(learning_rate=lr)

In [10]:
# activate the model
pred = model(x_train)

W0119 14:52:00.020627 140153097033472 base_layer.py:1814] Layer model is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



## Packing Your Model into a Learner

In [11]:
# The idea is to pack your model into something that take
# 1. the model 
# 2. the optimization method 
# 3. the loss function amd 4. the data
class Learner():
    def __init__(self, model, optimizer, loss_function, data):
        self.model, self.optimizer, self.loss_function, self.data = model, optimizer, loss_function, data

In [12]:
# The data as well can be packed into a DataBunch containing 
# 1. the training data
# 2. the validation data
# 3. the classes from 0 to 9
# in a future version it can be interesting to get a subsample of the full data
# for training purposes
class DataBunch():
    def __init__(self, train_dl, valid_dl, classes=None):
        self.train_dl, self.valid_dl, self.classes = train_dl, valid_dl, classes
        self.train_samples, self.valid_samples =  len(list(self.train_dl)), len(list(self.valid_dl))            

In [13]:
def one_hot(x, y, classes):
    return x, tf.one_hot(y, classes)

In [14]:
# Setting the dataset in tensorflow

BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 100

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_ds = train_ds.shuffle(SHUFFLE_BUFFER_SIZE)
train_ds = train_ds.batch(BATCH_SIZE)
train_ds = train_ds.map(lambda x, y: one_hot(x, y, classes=c))

valid_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))
valid_ds = valid_ds.batch(BATCH_SIZE)
valid_ds = valid_ds.map(lambda x, y: one_hot(x, y, classes=c))
train_ds, valid_ds

(<MapDataset shapes: ((None, 784), (None, 10)), types: (tf.float64, tf.float32)>,
 <MapDataset shapes: ((None, 784), (None, 10)), types: (tf.float64, tf.float32)>)

In [15]:
data = DataBunch(train_ds, valid_ds, classes=c)

In [16]:
# pack the full model into a learner
learn = Learner(model=model, optimizer=optimizer, loss_function=loss_function, data=data)

In [17]:
# fitting the model
def fit(epochs, learn):
    for epoch in range(epochs):
        for xb, yb in learn.data.train_dl:
            with tf.GradientTape() as tape:                    
                loss = learn.loss_function(yb, learn.model(xb))                
                gradients = tape.gradient(loss, learn.model.trainable_variables)
                learn.optimizer.apply_gradients(zip(gradients, learn.model.trainable_variables))
                
        tot_loss, tot_acc = 0., 0.    
        for xb, yb in learn.data.valid_dl:
            tot_loss += learn.loss_function(yb, learn.model(xb)).numpy()
            tot_acc  += accuracy(tf.argmax(yb, axis=-1), learn.model(xb)).numpy()*100
                
        print(epoch, tot_loss/learn.data.valid_samples, tot_acc/learn.data.valid_samples)
    return tot_loss/learn.data.valid_samples, tot_acc/learn.data.valid_samples

In [18]:
loss, acc = fit(1, learn)

0 0.42360708575434747 88.93312101910828


In [19]:
# Now how to fix this and insert callbacks: metrics and stuff

In [20]:
#export
import re

_camel_re1 = re.compile('(.)([A-Z][a-z]+)')
_camel_re2 = re.compile('([a-z0-9])([A-Z])')
def camel2snake(name):
    s1 = re.sub(_camel_re1, r'\1_\2', name)
    return re.sub(_camel_re2, r'\1_\2', s1).lower()

class Callback():
    _order=0
    def set_runner(self, run): self.run=run
    def __getattr__(self, k): return getattr(self.run, k)
    @property
    def name(self):
        name = re.sub(r'Callback$', '', self.__class__.__name__)
        return camel2snake(name or 'callback')

In [21]:
#export
class TrainEvalCallback(Callback):
    def begin_fit(self):        
        self.run.n_epochs=0.
        self.run.n_iter=0
    
    def after_batch(self):        
        if not self.in_train: return
        self.run.n_epochs += 1./self.iters
        self.run.n_iter   += 1
        
    def begin_training(self):        
        self.run.n_epochs=self.epoch
#         self.model.train()
        self.run.in_train=True

    def begin_validate(self):        
#         self.model.eval()
        self.run.in_train=False

In [22]:
cbname = 'TrainEvalCallback'
camel2snake(cbname)

'train_eval_callback'

In [23]:
TrainEvalCallback().name

'train_eval'

In [24]:
#export
from typing import *

def listify(o):
    if o is None: return []
    if isinstance(o, list): return o
    if isinstance(o, str): return [o]
    if isinstance(o, Iterable): return list(o)
    return [o]

In [46]:
#export
class Runner():
    def __init__(self, cbs=None, cb_funcs=None):
        cbs = listify(cbs)        
        for cbf in listify(cb_funcs):
            cb = cbf()
            setattr(self, cb.name, cb)
            cbs.append(cb)
        self.stop,self.cbs = False,[TrainEvalCallback()]+cbs

    @property
    def optimizer(self):       return self.learn.optimizer
    @property
    def model(self):           return self.learn.model
    @property
    def loss_function(self):   return self.learn.loss_function
    @property
    def data(self):            return self.learn.data

    def one_batch(self, xb, yb):        
        # if watch_accessed_variables is True, the variables will be watched for gradients computation
        # if watch_accessed_variables is False, no variables are watched for further gradients computation        
        with tf.GradientTape(watch_accessed_variables=self.in_train) as tape:            
            self.xb,self.yb = xb,yb
            if self('begin_batch'): return
            # if training is True, this enables to calculate batchnorm and dropout
            # else batchnorm and dropout are disabled
            self.pred = self.model(self.xb, training=self.in_train)
            if self('after_pred'): return
            self.loss = self.loss_function(self.yb, self.pred)            
            if self('after_loss') or not self.in_train: return
            gradients = tape.gradient(self.loss, self.model.trainable_variables)
            if self('after_gradients'): return
            optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
            if self('after_step'): return        

    def all_batches(self, dl):        
        self.iters = len(list(dl))
        for xb,yb in dl:
            if self.stop: break
            self.one_batch(xb, yb)
            self('after_batch')
        self.stop=False

    def fit(self, epochs, learn):          
        self.epochs,self.learn = epochs,learn                
        try:  
            # set runner for all callbacks
            for cb in self.cbs: 
                cb.set_runner(self)     
                
            # on begin fit what to do?
            if self('begin_fit'): 
                return            
            for epoch in range(epochs):                
                self.epoch = epoch
                
                # on begin epoch what to do?
                if not self('begin_epoch'):                     
                    self.all_batches(self.data.train_dl)
                
                # on begin validate what to do?                
                if not self('begin_validate'): 
                    self.all_batches(self.data.valid_dl)
                    
                if self('after_epoch'): break
            
        finally:
            self('after_fit')
            self.learn = None

    def __call__(self, cb_name):
        for cb in sorted(self.cbs, key=lambda x: x._order):
            f = getattr(cb, cb_name, None)
            if f and f(): return True
        return False

In [50]:
class LossAcc():
    def __init__(self, metrics, in_train):
        self.metrics, self.in_train = metrics, in_train
        self.reset()
        
    def reset(self):
        self.tot_loss, self.tot_acc = 0., 0.
        
    def calculate(self, run):      
#         set_trace()
        self.tot_loss += run.loss.numpy()
        self.tot_acc  += self.metrics(tf.argmax(run.yb, axis=-1), run.pred).numpy()*100
    
#     def print_loss_acc(self, run):
#         print(self.tot_loss / run.data.valid_samples, self.tot_acc / run.data.valid_samples)
        
class LossAccCallback(Callback):
    def __init__(self, metrics):
        self.epoch = 0
        self.valid_lossacc = LossAcc(metrics, in_train=False)
        self.train_lossacc = LossAcc(metrics, in_train=True)
        
    def begin_epoch(self):        
        self.train_lossacc.reset()
        self.run.in_train=True
    
    def after_loss(self):
        if self.run.in_train:
            self.train_lossacc.calculate(self.run)
        else:
            self.valid_lossacc.calculate(self.run)
        
    def after_epoch(self):
        self.print_train(self.run)
        self.print_valid(self.run)
        self.epoch += 1
        
    def begin_validate(self):        
        self.valid_lossacc.reset()
        self.run.in_train=False
        
    def print_valid(self, run):
        print('Valid: ', 
              self.epoch, 
              self.valid_lossacc.tot_loss / run.data.valid_samples, 
              self.valid_lossacc.tot_acc / run.data.valid_samples)
        
    def print_train(self, run):
        print('Train: ', 
              self.epoch, 
              self.train_lossacc.tot_loss / run.data.train_samples, 
              self.train_lossacc.tot_acc / run.data.train_samples)

In [51]:
acc = LossAccCallback(accuracy)
run = Runner(cbs=acc)

In [52]:
run.fit(1, learn)

> <ipython-input-50-26b1802b9fd0>(25)begin_epoch()
-> self.train_lossacc.reset()
(Pdb) l
 20  	        self.valid_lossacc = LossAcc(metrics, in_train=False)
 21  	        self.train_lossacc = LossAcc(metrics, in_train=True)
 22  	
 23  	    def begin_epoch(self):
 24  	        set_trace()
 25  ->	        self.train_lossacc.reset()
 26  	        self.run.in_train=True
 27  	
 28  	    def after_loss(self):
 29  	        if self.run.in_train:
 30  	            self.train_lossacc.calculate(self.run)
(Pdb) self.train
*** AttributeError: 'Runner' object has no attribute 'train'
(Pdb) self.train_lossacc
<__main__.LossAcc object at 0x7f771fb03860>
(Pdb) self.run
<__main__.Runner object at 0x7f771fb03d30>
(Pdb) self.run.in_train
*** AttributeError: 'Runner' object has no attribute 'in_train'
(Pdb) q


BdbQuit: 