In [1]:
%matplotlib inline

In [2]:
#export
from exp.nb_06 import *
from torch import nn

In [3]:
# xTraining, yTraining, xValidation, yValidation = list(map(lambda x: x.cuda(),getMnistData()))
xTraining, yTraining, xValidation, yValidation = list(map(lambda x: x,getMnistData()))
xTrainingNormalized, xValidationNormalized = normalizeVectors(xTraining, xValidation)
(xTrainingNormalized.mean(), xTrainingNormalized.std, xValidationNormalized.mean(), xValidationNormalized.std())

(tensor(-7.6999e-06),
 <function Tensor.std>,
 tensor(-7.0751e-08),
 tensor(1.0000))

In [4]:
numberOfClasses = 10
hiddenLayerOutput = 50
batchSize = 64
lossFuction = Functional.cross_entropy

In [5]:
# trainingDataSet, validationDataSet = Dataset(xTrainingNormalized, yTraining), Dataset(xValidationNormalized, yValidation)

In [24]:
trainingDataSet, validationDataSet = Dataset(xTrainingNormalized[:batchSize*3], yTraining[:batchSize*3]), Dataset(xValidationNormalized[:batchSize*3], yValidation[:batchSize*3])

In [25]:
trainingDataLoader, validationDataLoader = createDataLoaders(trainingDataSet, validationDataSet, batchSize)

In [26]:
imageDataBunch = DataBunch(trainingDataLoader, validationDataLoader, numberOfClasses)

In [27]:
layerSizes = [8, 16, 32, 64, 64]

In [28]:
phases = [0.3, 0.7]
weightsScheduler = aggregateSchedulers(phases, createCosineSchedulers(0.3, 0.6, 0.2)) 
biasScheduler = aggregateSchedulers(phases, createCosineSchedulers(0.9, 1.8, 0.6))

In [29]:
class Subscriber:
    def __init__(self):
        self._totalEpochs = 0
        self._currentEpoch = 0

    def postEpoch(self, epochNumber):
        pass

    def postBatchEvaluation(self, predictions, validationData):
        pass

    def preBatchEvaluation(self):
        pass

    def preEpoch(self, epoch, dataLoader):
        self._currentEpoch = epoch
        pass

    def preModelTeach(self, model, epochs):
        self._totalEpochs = epochs
        pass

    def postModelTeach(self):
        pass


In [30]:
class StatisticsSubscriber(Subscriber):

    def __init__(self,
                 accuracyFunction=accuracy,
                 name="Steve"):
        super().__init__()
        self._epochAccuracy = 0.
        self._epochLoss = 0.
        self._numberOfBatches = 0
        self._accuracyFunction = accuracyFunction
        self._name = name

    def preEpoch(self, epoch, dataLoader):
        super().preEpoch(epoch, dataLoader)
        self._numberOfBatches = len(dataLoader)

    def postBatchEvaluation(self, predictions, validationData):
        super().postBatchEvaluation(predictions, validationData)
        self._epochAccuracy += self._accuracyFunction(predictions, validationData)

    def postBatchLossConsumption(self, loss):
        self._epochLoss += loss

    def postEpoch(self, epochNumber):
        super().postEpoch(epochNumber)
        print("Epoch #{} {}: Loss {} Accuracy {}".format(epochNumber,
                                                         self._name,
                                                         self._epochLoss / self._numberOfBatches,
                                                         self._epochAccuracy / self._numberOfBatches))


In [31]:
class ValidationSubscriber(StatisticsSubscriber):

    def __init__(self):
        super().__init__(name="Validation")


In [32]:
class TrainingSubscriber(StatisticsSubscriber):

    def __init__(self,
                 lossFunction=Functional.cross_entropy,
                 schedulingFunctions=[cosineScheduler(1e-1, 1e-6), cosineScheduler(1e-1, 1e-6)], ):
        super().__init__(name="Training")
        self._optimizer = None
        self._schedulingFunctions = schedulingFunctions
        self._lossFunction = lossFunction

    def preModelTeach(self, model, epochs):
        super().preModelTeach(model, epochs)
        self._optimizer = optim.SGD(model.parameters(), self._schedulingFunctions[0](0))
        self._totalEpochs = epochs

    def postBatchEvaluation(self, predictions, valdationData):
        super().postBatchEvaluation(predictions, valdationData)
        calculatedLoss = self._lossFunction(predictions, valdationData)
        self._teachModel(calculatedLoss)
        self.postBatchLossConsumption(calculatedLoss)

    def _teachModel(self, loss):
        loss.backward()
        self._optimizer.step()
        self._optimizer.zero_grad()

    def preBatchEvaluation(self):
        super().preBatchEvaluation()
        self._annealLearningRate()

    def _annealLearningRate(self):
        for parameterGroup, schedulingFunction in zip(self._optimizer.param_groups, self._schedulingFunctions):
            parameterGroup['lr'] = schedulingFunction(self._currentEpoch / self._totalEpochs)


In [33]:
class TeacherEnhanced:
    def __init__(self,
                 dataBunch,
                 trainingSubscriber: TrainingSubscriber,
                 validationSubscriber: ValidationSubscriber):
        self._dataBunch = dataBunch
        self._trainingSubscriber = trainingSubscriber
        self._validationSubscriber = validationSubscriber

    def teachModel(self, model, numberOfEpochs):
        self._notifiyPreTeach(model, numberOfEpochs)
        for epoch in range(numberOfEpochs):
            self._trainModel(model,
                             epoch)
            self._validateModel(model,
                                epoch)
        self._notifiyPostTaught()

    def _notifiyPreTeach(self, model, epochs):
        self._trainingSubscriber.preModelTeach(model, epochs)
        self._validationSubscriber.preModelTeach(model, epochs)

    def _notifiyPostTaught(self):
        self._trainingSubscriber.postModelTeach()
        self._validationSubscriber.postModelTeach()

    def _trainModel(self, model, epoch):
        self._processData(model,
                          self._dataBunch.trainingDataSet,
                          epoch,
                          self._trainingSubscriber)

    def _validateModel(self, model, epoch):
        with torch.no_grad():
            self._processData(model,
                              self._dataBunch.validationDataSet,
                              epoch,
                              self._validationSubscriber)

    def _processData(self,
                     model,
                     dataLoader,
                     epoch,
                     dataProcessingSubscriber: Subscriber):
        dataProcessingSubscriber.preEpoch(epoch, dataLoader)
        for _xDataBatch, _yDataBatch in dataLoader:
            dataProcessingSubscriber.preBatchEvaluation()
            _predictions = model(_xDataBatch)
            dataProcessingSubscriber.postBatchEvaluation(_predictions, _yDataBatch)
        dataProcessingSubscriber.postEpoch(epoch)


In [34]:
validationSubscriber = ValidationSubscriber()

In [35]:
trainingSubscriber = TrainingSubscriber()

In [37]:
teacher = TeacherEnhanced(imageDataBunch, 
                          trainingSubscriber,
                          validationSubscriber
                         )

In [39]:
# convolutionalModelSR1 = createBetterConvolutionModel(numberOfClasses, layerSizes).cuda()
convolutionalModelSR1 = createBetterConvolutionModel(numberOfClasses, layerSizes)

In [40]:
accuracy(convolutionalModelSR1(xValidationNormalized), yValidation)

tensor(0.0983)

In [41]:
teacher.teachModel(convolutionalModelSR1, 2)

Epoch #0 Training: Loss 2.326660633087158 Accuracy 0.1041666641831398
Epoch #0 Validation: Loss 0.0 Accuracy 0.125
Epoch #1 Training: Loss 4.635397434234619 Accuracy 0.2083333283662796
Epoch #1 Validation: Loss 0.0 Accuracy 0.25


In [42]:
accuracy(convolutionalModelSR1(xValidationNormalized), yValidation)

tensor(0.0983)

Todo: Statistics Subscriber, Hooks