In [None]:
%install '.package(path: "$cwd/FastaiNotebooks")' FastaiNotebooks

In [None]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

In [None]:
import FastaiNotebooks
import Python
import Path
import TensorFlow

let np = Python.import("numpy")
let plt = Python.import("matplotlib.pyplot")

## Early stopping

### Better callback cancellation

In [None]:
let (xTrain, yTrain, xValid, yValid) = loadMNIST(path: mnistPath, flat: true)
let (n,m) = (Int(xTrain.shape[0]),Int(xTrain.shape[1]))
let c = Int((yTrain.max()+1).scalar!)
let nHid = 50
let data = mnistDataBunch(flat: true)
var model = BasicModel(nIn: m, nHid: nHid, nOut: c)

// Using RiemannSGD (fancy name for generic without momentum) because the normal one doesn't let me change the LR.
let opt = RiemannSGD<BasicModel, Float>(learningRate: 1e-2)
func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}

// TODO: When TF-421 is fixed, switch back to the normal `softmaxCrossEntropy`.
@differentiable(vjp: _vjpSoftmaxCrossEntropy)
func softmaxCrossEntropy1<Scalar: TensorFlowFloatingPoint>(
    _ features: Tensor<Scalar>, _ labels: Tensor<Scalar>
) -> Tensor<Scalar> {
    return Raw.softmaxCrossEntropyWithLogits(features: features, labels: labels).loss.mean()
}

@usableFromInline
func _vjpSoftmaxCrossEntropy<Scalar: TensorFlowFloatingPoint>(
    features: Tensor<Scalar>, labels: Tensor<Scalar>
) -> (Tensor<Scalar>, (Tensor<Scalar>) -> (Tensor<Scalar>, Tensor<Scalar>)) {
    let (loss, grad) = Raw.softmaxCrossEntropyWithLogits(features: features, labels: labels)
    let batchSize = Tensor<Scalar>(features.shapeTensor[0])
    return (loss.mean(), { v in ((v / batchSize) * grad, Tensor<Scalar>(0)) })
}

In [None]:
let learner = Learner(data: data, lossFunction: softmaxCrossEntropy1, optimizer: opt, initializingWith: modelInit)

In [None]:
// Extension of learner because it must take learner as a template paramter.
extension Learner {
    public class TestCallback : Delegate {
        // Alas, this is "after_batch" because "after_step" did not exist.
        public override func batchDidFinish(learner: Learner) throws {
            print(learner.currentIter)
            if learner.currentIter >= 10 {
                throw LearnerAction.stop
            }
        }
    }
}

In [None]:
learner.delegates = [Learner.TrainEvalDelegate(), Learner.TestCallback()]

In [None]:
learner.fit(3)

### Other callbacks

In [None]:
// export
public func plot<S1, S2>(_ arr1: [S1], _ arr2: [S2], logScale:Bool = false, xLabel: String="", yLabel: String = "") 
    where S1:PythonConvertible, S2:PythonConvertible{
    plt.figure(figsize: [6,4])
    let (npArr1, npArr2) = (np.array(arr1), np.array(arr2))
    if logScale {plt.xscale("log")} 
    if !xLabel.isEmpty {plt.xlabel(xLabel)}
    if !yLabel.isEmpty {plt.ylabel(yLabel)}    
    let fig = plt.plot(npArr1, npArr2)
    plt.show(fig)
}

// export
extension Learner where O.Scalar: PythonConvertible{
    public func makeRecorder() -> Recorder {
        return Recorder()
    }

    public class Recorder: Delegate {
        public var losses: [Loss] = []
        public var lrs: [O.Scalar] = []
        
        public override func batchDidFinish(learner: Learner) {
            if learner.inTrain {
                losses.append(learner.currentLoss)
                lrs.append(learner.optimizer.learningRate)
            }
        }
        
        public func plotLosses(){
            plot(Array(0..<losses.count), losses.map{$0.scalar}, xLabel:"iteration", yLabel:"loss")
        }
        
        public func plotLRs(){
            plot(Array(0..<lrs.count), lrs, xLabel:"iteration", yLabel:"lr")
        }
        
        public func plotLRFinder(){
            plot(lrs, losses.map{$0.scalar}, logScale: true, xLabel:"lr", yLabel:"loss")
        }
        
    }
}

extension Learner where O.Scalar: PythonConvertible{
    public var recorder: Recorder? {
        for callback in delegates {
            if let recorder = callback as? Recorder { return recorder }
        }
        return nil
    }
}

/// Hack
extension Learner {
    func makeTrainEvalDelegate() -> TrainEvalDelegate { return TrainEvalDelegate() }
}

/// A non-generalized learning rate scheduler
extension Learner where O.Scalar == Float {

    public class ParamScheduler: Delegate {
        public typealias ScheduleFunc = (Float) -> Float

        // A learning rate schedule from step to float.
        public var scheduler: ScheduleFunc
        
        public init(scheduler: @escaping (Float) -> Float) {
            self.scheduler = scheduler
        }
        
        override public func batchWillStart(learner: Learner) {
            learner.optimizer.learningRate = scheduler(learner.pctEpochs/Float(learner.epochCount))
        }
    }
}

### LR Finder

NB: You may want to also add something that saves the model before running this, and loads it back after running - otherwise you'll lose your weights!

In [None]:
func expSchedule(start: Float, end: Float, pct: Float) -> Float {
    return start * pow(end / start, pct)
}

func makeAnnealer(start: Float, end: Float, schedule: @escaping (Float, Float, Float) -> Float) -> (Float) -> Float { 
    return { pct in return schedule(start, end, pct) }
}

/// LR Finder test
extension Learner where O.Scalar == O.Scalar: BinaryFloatingPoint {

    public class LRFinder2: Delegate {
        public typealias ScheduleFunc = (Float) -> Float

        // A learning rate schedule from step to float.
        public var scheduler: ScheduleFunc
        public var numIter: Int
        public var minLoss: Float? = nil
        
        public init(start: Float = 1e-5, end: Float = 10, numIter: Int = 100) {
            scheduler = makeAnnealer(start: start, end: end, schedule: expSchedule)
            self.numIter = numIter
        }
        
        override public func batchWillStart(learner: Learner) {
            learner.optimizer.learningRate = O.Scalar(scheduler(Float(learner.currentIter)/Float(numIter)))
        }
        
        override public func batchDidFinish(learner: Learner) throws {
            if minLoss == nil {minLoss = learner.currentLoss.scalar}
            else { 
                if learner.currentLoss.scalarized() < minLoss! { minLoss = learner.currentLoss.scalarized()}
                if learner.currentLoss.scalarized() > 4 * minLoss! { throw LearnerAction.stop }
                if learner.currentIter >= numIter { throw LearnerAction.stop }
            }
        }
    }
    
    func makeLRFinder2(start: Float = 1e-5, end: Float = 10, numIter: Int = 100)-> LRFinder2 {
        return LRFinder2(start: start, end: end, numIter: numIter)
    }
}

NB: In fastai we also use exponential smoothing on the loss. For that reason we check for `best_loss*3` instead of `best_loss*10`.

In [None]:
learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeRecorder(), learner.makeLRFinder2()]


In [None]:
learner.fit(2)

In [None]:
learner.recorder.plotLRFinder()

## Export

In [None]:
notebookToScript(fname: (Path.cwd / "05b_early_stopping.ipynb").string)