In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
from google.colab import drive
drive.mount("/content/gdrive")
%cd gdrive/MyDrive/Ml/nbs/dl2/

from six.moves import urllib
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)

## get the data from dataset file
def get_data():
    import os
    import torchvision.datasets as datasets
    root = '../data'
    if not os.path.exists(root):
        os.mkdir(root)
    train_set = datasets.MNIST(root=root, train=True, download=True)
    test_set = datasets.MNIST(root=root, train=False, download=True)
    x_train, x_valid = train_set.data.split([50000, 10000])
    y_train, y_valid = train_set.targets.split([50000, 10000])
    return (x_train.view(50000, -1) / 256.0), y_train.float(), (x_valid.view(10000, -1))/ 256.0, y_valid.float()

x_train,y_train,x_valid,y_valid = get_data()

from exp.nb_03 import *
import torchvision

train_ds,valid_ds = Dataset(x_train, y_train),Dataset(x_valid, y_valid)
# make image data into dataset instance
nh,bs = 50,64 # set hidden layer into 50, and mini-batch size into 64
c = y_train.max().item()+1 # total number of class is 10
loss_func = torch.nn.CrossEntropyLoss()
# use crossentroyloss

class DataBunch():
    def __init__(self, train_dl, valid_dl, c=None):
        self.train_dl,self.valid_dl,self.c = train_dl,valid_dl,c
        
        #c is the number of classes
    @property
    def train_ds(self): return self.train_dl.dataset
        
    @property
    def valid_ds(self): return self.valid_dl.dataset
# bunch the train/valid dataset into one instance

data = DataBunch(*get_dls(train_ds, valid_ds, bs), c)

def get_model(data, lr=0.5, nh=50):
    m = data.train_ds.x.shape[1] #size of input

    # model = nn.Sequential(nn.Linear(m,nh), nn.ReLU(), nn.Linear(nh,data.c)) original
    model = nn.Sequential(nn.Linear(m,nh), nn.ReLU(), nn.Linear(nh,int(data.c)))
    return model, optim.SGD(model.parameters(), lr=lr)
# bcz data.c is float type, change into int type and construct the model structure with SGD optimizer

class Learner():
    def __init__(self, model, opt, loss_func, data):
        self.model,self.opt,self.loss_func,self.data = model,opt,loss_func,data

learn = Learner(*get_model(data), loss_func, data)
# learner is not concerned with any algorithms but just packaging all neccessary variables


# basic callback class
# it will be the parent of all callbacks
class Callback():
    def begin_fit(self, learn):
        # before going into fit process, execute this function
        # initialize the learner into callback learner which can be used everywhere inside of callback
        self.learn = learn
        return True # if it is true, not interrupting original process
    
    def after_fit(self): return True
    # after fit process, execute this function
    def begin_epoch(self, epoch):
        # as soons as epoch begins, start this function with epoch parameter
        self.epoch=epoch
        return True
    def begin_validate(self): return True
    # as soon as validate process begins, start this function
    def after_epoch(self): return True
    # after out of one epoch process, start this function
    def begin_batch(self, xb, yb):
    # as soon as batch process begins, start this function with data bunches
        self.xb,self.yb = xb,yb
        return True
    def after_loss(self, loss):
    # after calculating the loss, start this function
        self.loss = loss
        return True
    def after_backward(self): return True
    # after getting gredient with respect to parameters, start this function
    def after_step(self): return True
    # after finishing the one mini-batch training each, start this function



class CallbackHandler():
    # get multiple callbacks which is cbs
    def __init__(self,cbs=None):
        self.cbs = cbs if cbs else []
    # store the mulitple callbacks into self.cbs
    def begin_fit(self, learn):
        self.learn,self.in_train = learn,True
        # initialize the learner and trace where the running is in
        learn.stop = False # if True quit the process
        res = True
        # execute every related callbacks in cbs
        # if any callback return the false, res becomes false then process would be interrupted
        for cb in self.cbs: 
            res = res and cb.begin_fit(learn)

        return res

    def begin_epoch(self, epoch):
        self.learn.model.train()
        # to separate the train and eval process, as soon as epoch begins change the model mode to train
        self.in_train=True # if epoch begins, it means training starts . So change train from false in to true
        res = True
                # execute every related callbacks in cbs
        # if any callback return the false, res becomes false then process would be interrupted
        for cb in self.cbs: res = res and cb.begin_epoch(epoch)
        return res

    def begin_batch(self, xb, yb):
        res = True
                # execute every related callbacks in cbs
        # if any callback return the false, res becomes false then process would be interrupted
        for cb in self.cbs: res = res and cb.begin_batch(xb, yb)
        return res

    def after_fit(self):
        res = not self.in_train # after finishing train loop it will be false, so not false = true
                # execute every related callbacks in cbs
        # if any callback return the false, res becomes false then process would be interrupted
        for cb in self.cbs: res = res and cb.after_fit()
        return res
    

    def after_loss(self, loss):
        res = self.in_train
        for cb in self.cbs: res = res and cb.after_loss(loss)
        return res

    def after_backward(self):
        res = True
        for cb in self.cbs: res = res and cb.after_backward()
        return res

    def after_step(self):
        res = True
        for cb in self.cbs: res = res and cb.after_step()

        return res

    def do_stop(self):
        # normally self.learn.stop will be false, but if some condition met it will return true and quit the process
        try:    
             return self.learn.stop
        finally: 
            self.learn.stop = False # it makes learn.stop true state comtemporary. 

    def begin_validate(self):
        # change the model into eval mode.
        # Because it is not in train but valid, change the in_train into false
        self.learn.model.eval()
        self.in_train=False
        res = True
        for cb in self.cbs: res = res and cb.begin_validate()
        return res

    def after_epoch(self):
        res = True
        for cb in self.cbs: res = res and cb.after_epoch()
        return res


def one_batch(xb, yb, cb):
    if not cb.begin_batch(xb,yb): return # it meas that if dont want to start batch in certain condition, interrupt

    loss = cb.learn.loss_func(cb.learn.model(xb), yb.long())

    if not cb.after_loss(loss): return # it meas that if dont want to start after loss process in certain condition, interrupt
    loss.backward()

    if cb.after_backward(): cb.learn.opt.step() # # it meas that if want to optimize in certain condition, do it

    if cb.after_step(): cb.learn.opt.zero_grad() # # it meas that if want to initialize the gradient to zero in certain condition, do it

def all_batches(dl, cb):
    for xb,yb in dl:
        one_batch(xb, yb, cb)
        if cb.do_stop(): return # if want to quit the batch process in certain condition, do it


Mounted at /content/gdrive
/content/gdrive/MyDrive/Ml/nbs/dl2


In [12]:

def fit(epochs, learn, cb):
    if not cb.begin_fit(learn): # if don't want to start fit process in certain condition, do it
        return

    for epoch in range(epochs):

        if not cb.begin_epoch(epoch): #if don't want to start this epoch process in certain condition, interrupt it
            continue
        all_batches(learn.data.train_dl, cb)
        
        if cb.begin_validate(): # if want to validate the model in certain condition, do it
            with torch.no_grad(): 
                all_batches(learn.data.valid_dl, cb)
                
        if cb.do_stop() or not cb.after_epoch(): #if you want to stop the continouos epoch or don't want to start another epoch in certain condition,
                                                  # interrupt it
            break
    cb.after_fit() # if you want to do some events after training in certain condition, do it.

class TestCallback(Callback):
    
    # as soon as fit starts, print the column name of each column
    def begin_fit(self, learn):
        super().begin_fit(learn)
        print(f'{"epoch":<20}{"batch":<20}{"train_loss":<40}{"train_error":<40}')
        return True

    # as soon as epoch starts, trace the epoch which is self.epoch and print it
    # Also initialize the batch count whenever new epoch starts.(n_iters)
    def begin_epoch(self,epoch):
        super().begin_epoch(epoch)
        self.n_iters = 0
        print('{:<20}'.format(self.epoch+1),end='')
        self.accumulated_tot_loss = 0 # record the epoch loss and acc
        self.accumulated_tot_acc = 0
        return True
    # whenever batch starts, initialize the loss and accuracy
    # Since, we want to know each batch's loss and accuracy
    def begin_batch(self, xb, yb):
        super().begin_batch(xb,yb)
        self.tot_loss, self.tot_acc = 0,0
        
        return True
    # after each batch, augment the batch counter adding 1.
    # and calculate teh accuracy and loss in each batch process and print it/
    def after_step(self):
        self.n_iters += 1
        pred = self.learn.model(self.xb)
        self.tot_loss = self.learn.loss_func(pred, self.yb.long())
        self.tot_acc = accuracy(pred,self.yb)
        self.accumulated_tot_loss += self.learn.loss_func(pred, self.yb.long())
        self.accumulated_tot_acc += accuracy(pred,self.yb)
        nv = len(self.yb)
        if self.n_iters == 1: # That n_iter is 1 means printing epoch right before 
            # so to adjust the space we need to divide.
            print('{:<20}{:<40}{:<40}'.format(self.n_iters, (self.tot_loss).item(),1 - (self.tot_acc).item()))
        else:
            print('{:<20}{:<20}{:<40}{:<40}'.format('', self.n_iters, (self.tot_loss).item(),1 - (self.tot_acc).item()))
                                                                        #self.tot_loss/nv
        
        return True

    # before go into the valid process, print the epoch train loss and error
    def begin_validate(self):
        epoch_err = 1 - self.accumulated_tot_acc / self.n_iters 
        print(f'epoch_train_loss:{self.accumulated_tot_loss}, epoch_train_error:{epoch_err}')
        return True

In [13]:
learn = Learner(*get_model(data), loss_func, data) # construct learner with model, loss function, and datas
fit(2, learn, cb=CallbackHandler([TestCallback()]))

epoch               batch               train_loss                              train_error                             
1                   1                   2.2030587196350098                      0.484375                                
                    2                   2.0828912258148193                      0.546875                                
                    3                   2.051906108856201                       0.625                                   
                    4                   1.978106141090393                       0.46875                                 
                    5                   1.7654951810836792                      0.1875                                  
                    6                   1.7099754810333252                      0.421875                                
                    7                   1.6431807279586792                      0.453125                                
                    8           