In [1]:
#export
from dataclasses import dataclass
from functools import reduce
import torch
from torch import optim
from nbs.dl2.exp.nb_02 import getMnistData, assertNearZero
from nbs.dl2.exp.nb_03 import Dataset, createDataLoaders, accuracy
from nbs.dl2.exp.nb_04 import DataBunch
from nbs.dl2.exp.nb_05 import aggregateSchedulers, createCosineSchedulers, cosineScheduler
from nbs.dl2.exp.nb_06 import normalizeVectors, createBetterConvolutionModel
from nbs.dl2.exp.nb_07D import *

In [2]:
%matplotlib inline


In [3]:
xTraining, yTraining, xValidation, yValidation = getMnistData()
xTrainingNormalized, xValidationNormalized = \
    normalizeVectors(xTraining, xValidation)

In [4]:
assertNearZero(xTrainingNormalized.mean())
assertNearZero(xValidationNormalized.mean())
assertNearZero(1 - xTrainingNormalized.std())
assertNearZero(1 - xValidationNormalized.std())


In [5]:
layerSizes = [8, 16, 32, 64, 64]
numberOfClasses = 10
hiddenLayerSize = 75
batchSize = 64

In [6]:
trainingDataSet, validationDataSet = Dataset(xTrainingNormalized[:10000], yTraining[:10000]), Dataset(xValidationNormalized[:10000], yValidation[:10000])
trainingDataLoader, validationDataLoader = createDataLoaders(trainingDataSet, validationDataSet, batchSize)
imageDataBunch = DataBunch(trainingDataLoader, validationDataLoader, numberOfClasses)

In [7]:
phases = [0.3, 0.7]
weightsScheduler = aggregateSchedulers(phases, createCosineSchedulers(0.3, 0.6, 0.2)) 
biasScheduler = aggregateSchedulers(phases, createCosineSchedulers(0.9, 1.8, 0.6))



In [8]:
class ProcessCancellationException(Exception): pass

In [9]:
def composeFunctions(funInput, functions): 
    return reduce(lambda accum, function: function(accum), 
                  functions, 
                  funInput)

In [10]:
@dataclass
class HyperParameters:
    learningRate: float

In [11]:
def flatMap(function, items):
    return reduce(lambda accum, manyItems: accum + manyItems,
           list(map(function, items)))

In [12]:
flatMap(lambda n: list(map(lambda _: n+1, range(n+1))), 
        list(range(3)))

[1, 2, 2, 3, 3, 3]

In [76]:
class Optimizer:
    def __init__(self, modelParameters, 
                 optimizationFunctions,
                 hyperParameters: HyperParameters=HyperParameters(0.5)):
        super().__init__()
        self._modelParameters = list(modelParameters)
        self._hyperParameters = [hyperParameters for _ in self._modelParameters]
        self._optimizationFunctions = optimizationFunctions
        
    def getLayersWithGradients(self):
        return list(filter(lambda p: p.grad is not None, self._modelParameters))

In [82]:
convolutionalModelSR2 = createBetterConvolutionModel(numberOfClasses, layerSizes)

In [83]:
optimus = Optimizer(convolutionalModelSR2.parameters(), [])
optimus.getLayersWithGradients()

[None, None, None, None, None, None, None, None, None, None, None, None]

In [84]:
loss = torch.nn.functional.cross_entropy(convolutionalModelSR2(xTrainingNormalized), yTraining)
loss.backward()
optimus.getLayersWithGradients()

[tensor([[[[ 1.6537e-04,  1.5276e-04,  1.2270e-04,  9.5444e-05,  2.9221e-05],
           [ 2.0994e-04,  1.8922e-04,  1.5616e-04,  1.4520e-04,  1.1184e-04],
           [ 1.8543e-04,  1.6632e-04,  1.3245e-04,  1.4436e-04,  1.5908e-04],
           [ 1.3511e-04,  1.3104e-04,  1.1326e-04,  1.4035e-04,  1.7617e-04],
           [ 1.2411e-04,  1.5057e-04,  1.6889e-04,  2.0645e-04,  2.4725e-04]]],
 
 
         [[[-3.6407e-04, -3.1179e-04, -2.2377e-04, -1.4259e-04, -1.0727e-04],
           [-3.9629e-04, -3.3432e-04, -2.4948e-04, -2.0095e-04, -1.8291e-04],
           [-3.9928e-04, -3.0347e-04, -2.3504e-04, -1.9623e-04, -1.7031e-04],
           [-3.2954e-04, -2.1940e-04, -1.6649e-04, -1.2502e-04, -9.3775e-05],
           [-1.8629e-04, -9.1762e-05, -7.7924e-05, -4.9799e-05, -2.7928e-05]]],
 
 
         [[[-1.0126e-04, -1.2653e-04, -8.3046e-05,  5.5435e-05,  1.9817e-04],
           [-1.2738e-04, -1.2087e-04, -4.6480e-05,  1.0877e-04,  2.3885e-04],
           [-1.0219e-04, -6.0637e-05,  3.7349e-05,  

**Important Note**: PyTorch will NOT put the gradients on the parameters until backwards has been called on the model

In [16]:
class TrainingSubscriber(StatisticsSubscriber, HookedSubscriber):

    def __init__(self,
                 lossFunction=torch.nn.functional.cross_entropy,
                 schedulingFunctions=[cosineScheduler(1e-1, 1e-6), cosineScheduler(1e-1, 1e-6)], 
                 optimizationFunctions=[]
                 ):
        super().__init__(name="Training")
        self._optimizer = None
        self._optimizationFunctions = optimizationFunctions
        self._schedulingFunctions = schedulingFunctions
        self._lossFunction = lossFunction

    def preModelTeach(self, model, epochs):
        super().preModelTeach(model, epochs)
        self._optimizer = optim.SGD(model.parameters(), self._schedulingFunctions[0](0))
        self._totalEpochs = epochs

    def postBatchEvaluation(self, predictions, valdationData):
        super().postBatchEvaluation(predictions, valdationData)
        calculatedLoss = self._lossFunction(predictions, valdationData)
        self._teachModel(calculatedLoss)
        self.postBatchLossConsumption(calculatedLoss)

    def _teachModel(self, loss):
        loss.backward()
        self._optimizer.step()
        self._optimizer.zero_grad()

    def preBatchEvaluation(self):
        super().preBatchEvaluation()
        self._annealLearningRate()

    def _annealLearningRate(self):
        for parameterGroup, schedulingFunction in zip(self._optimizer.param_groups, self._schedulingFunctions):
            parameterGroup['lr'] = schedulingFunction(self._currentEpoch / self._totalEpochs)

In [17]:
class TeacherOptimized:
    def __init__(self,
                 dataBunch,
                 trainingSubscriber: TrainingSubscriber,
                 validationSubscriber: ValidationSubscriber):
        self._dataBunch = dataBunch
        self._trainingSubscriber = trainingSubscriber
        self._validationSubscriber = validationSubscriber

    def teachModel(self, model, numberOfEpochs):
        self._notifiyPreTeach(model, numberOfEpochs)
        for epoch in range(numberOfEpochs):
            self._trainModel(model,
                             epoch)
            self._validateModel(model,
                                epoch)
        self._notifiyPostTaught()

    def _notifiyPreTeach(self, model, epochs):
        self._trainingSubscriber.preModelTeach(model, epochs)
        self._validationSubscriber.preModelTeach(model, epochs)

    def _notifiyPostTaught(self):
        self._trainingSubscriber.postModelTeach()
        self._validationSubscriber.postModelTeach()

    def _trainModel(self, model, epoch):
        self._processData(model,
                          self._dataBunch.trainingDataSet,
                          epoch,
                          self._trainingSubscriber)

    def _validateModel(self, model, epoch):
        with torch.no_grad():
            self._processData(model,
                              self._dataBunch.validationDataSet,
                              epoch,
                              self._validationSubscriber)

    def _processData(self,
                     model,
                     dataLoader,
                     epoch,
                     processingSubscriber: Subscriber):
        processingSubscriber.preEpoch(epoch, dataLoader)
        try:
            for _xDataBatch, _yDataBatch in dataLoader:
                processingSubscriber.preBatchEvaluation()
                _predictions = model(_xDataBatch)
                processingSubscriber.postBatchEvaluation(_predictions, _yDataBatch)
        except ProcessCancellationException: pass
        finally:
            processingSubscriber.postEpoch(epoch)


In [18]:
validationSubscriber = ValidationSubscriber()

In [19]:
trainingSubscriber = TrainingSubscriber()

In [20]:
teacher = TeacherEnhanced(imageDataBunch, 
                          trainingSubscriber,
                          validationSubscriber
                         )


In [21]:
convolutionalModelSR1 = createBetterConvolutionModel(numberOfClasses, layerSizes)


In [22]:
accuracy(convolutionalModelSR1(validationDataSet.xVector), validationDataSet.yVector)

tensor(0.0990)

In [23]:
teacher.teachModel(convolutionalModelSR1, 3)

Epoch #0 Training: Loss 2.301060914993286 Accuracy 0.1177348718047142
Epoch #0 Validation: Loss 0.0 Accuracy 0.1890822798013687
Epoch #1 Training: Loss 1.7400457859039307 Accuracy 0.41401273012161255
Epoch #1 Validation: Loss 0.0 Accuracy 0.7901503443717957
Epoch #2 Training: Loss 0.5095152258872986 Accuracy 0.8572850227355957
Epoch #2 Validation: Loss 0.0 Accuracy 0.8803402185440063


In [24]:
accuracy(convolutionalModelSR1(validationDataSet.xVector), validationDataSet.yVector)

tensor(0.8797)