In [None]:
%install '.package(path: "$cwd/FastaiNotebooks")' FastaiNotebooks

Installing packages:
	.package(path: "/usr/local/google/home/jekbradbury/fastai_docs/dev_swift/FastaiNotebooks")
		FastaiNotebooks
With SwiftPM flags: []
Working in: /tmp/tmpp933qfeq
Fetching https://github.com/mxcl/Path.swift
Fetching https://github.com/JustHTTP/Just
Completed resolution in 2.38s
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.2
Cloning https://github.com/JustHTTP/Just
Resolving https://github.com/JustHTTP/Just at 0.7.1
Compile Swift Module 'Just' (1 sources)
Compile Swift Module 'Path' (9 sources)
Compile Swift Module 'FastaiNotebooks' (6 sources)
Compile Swift Module 'jupyterInstalledPackages' (1 sources)
Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so
Initializing Swift...
Loading library...
Installation complete!


In [None]:
import FastaiNotebooks

In [None]:
// export
import Foundation
import TensorFlow
import Path

In [None]:
//export
open class StatDelegate<Scalar: TensorFlowFloatingPoint> {
    var defaultConfig: [String: Double] { return [:] }
    func initialState(
        for param: Tensor<Scalar>
    ) -> [String: Tensor<Scalar>] {
        return [:]
    }
    func update(
        state: inout [String: Tensor<Scalar>],
        for param: Tensor<Scalar>,
        along direction: Tensor<Scalar>,
        config: inout [String: Double]
    ) { }
}

//export
open class StepDelegate<Scalar: TensorFlowFloatingPoint> {
    var defaultConfig: [String: Double] { return [:] }
    func update(
        param: inout Tensor<Scalar>,
        along direction: inout Tensor<Scalar>,
        state: [String: Tensor<Scalar>],
        config: inout [String: Double]
    ) { }
}

In [None]:
//export
class StatefulOptimizer<Model: Differentiable & KeyPathIterable,
                        Scalar: TensorFlowFloatingPoint>
    where Model.AllDifferentiableVariables == Model.CotangentVector,
          Model.AllDifferentiableVariables: KeyPathIterable {
    var config: [String: Double]
    var states: [[String: Tensor<Scalar>]]
    var statDelegates: [StatDelegate<Scalar>]
    var stepDelegates: [StepDelegate<Scalar>]
    init(
        for model: Model,
        stepDelegates: [StepDelegate<Scalar>],
        statDelegates: [StatDelegate<Scalar>],
        config: [String: Double]
    ) {
        self.config = [:]
        self.states = model.recursivelyAllWritableKeyPaths(
            to: Tensor<Scalar>.self).map { _ in [:] }
        for stepDelegate in stepDelegates {
            self.config.merge(stepDelegate.defaultConfig) { (_, new) in new }
        }
        for statDelegate in statDelegates {
            self.config.merge(statDelegate.defaultConfig) { (_, new) in new }
            for (i, kp) in model.recursivelyAllWritableKeyPaths(
                to: Tensor<Scalar>.self).enumerated() {
                self.states[i].merge(statDelegate.initialState(
                    for: model[keyPath: kp])) { (_, new) in new }
            }
        }
        self.config.merge(config) { (_, new) in new }
        self.stepDelegates = stepDelegates
        self.statDelegates = statDelegates
    }
    func update(
        _ model: inout Model.AllDifferentiableVariables,
        along direction: Model.CotangentVector
    ) {
        for (i, kp) in model.recursivelyAllWritableKeyPaths(
            to: Tensor<Scalar>.self).enumerated() {
            var grad = direction[keyPath: kp]
            for statDelegate in statDelegates {
                statDelegate.update(
                    state: &states[i],
                    for: model[keyPath: kp],
                    along: grad,
                    config: &config
                )
            }
            for stepDelegate in stepDelegates {
                stepDelegate.update(
                    param: &model[keyPath: kp],
                    along: &grad,
                    state: states[i],
                    config: &config
                )
            }
        }
    }
}

In [None]:
//export
class SGDStep<Scalar: TensorFlowFloatingPoint>: StepDelegate<Scalar> {
    override func update(
        param: inout Tensor<Scalar>,
        along direction: inout Tensor<Scalar>,
        state: [String: Tensor<Scalar>],
        config: inout [String: Double]
    ) {
        param -= Scalar(config["learningRate"]!) * direction
    }
}

In [None]:
//export
class WeightDecay<Scalar: TensorFlowFloatingPoint>: StepDelegate<Scalar> {
    override var defaultConfig: [String: Double] { return ["weightDecay": 0.0] }
    override func update(
        param: inout Tensor<Scalar>,
        along direction: inout Tensor<Scalar>,
        state: [String: Tensor<Scalar>],
        config: inout [String: Double]
    ) {
        param *= Scalar(1 - config["learningRate"]! * config["weightDecay"]!)
    }
}

In [None]:
//export
class L2Regularization<Scalar: TensorFlowFloatingPoint>: StepDelegate<Scalar> {
    override var defaultConfig: [String: Double] { return ["weightDecay": 0.0] }
    override func update(
        param: inout Tensor<Scalar>,
        along direction: inout Tensor<Scalar>,
        state: [String: Tensor<Scalar>],
        config: inout [String: Double]
    ) {
        direction += Scalar(config["weightDecay"]!) * param
    }
}

In [None]:
//export
class AverageGrad<Scalar: TensorFlowFloatingPoint>: StatDelegate<Scalar> {
    override var defaultConfig: [String: Double] { return ["momentum": 0.9] }
    let dampened: Bool
    init(dampened: Bool = false) { self.dampened = dampened }
    override func initialState(
        for param: Tensor<Scalar>
    ) -> [String: Tensor<Scalar>] {
        return ["averageGrad": Tensor<Scalar>(zeros: param.shape)]
    }
    override func update(
        state: inout [String: Tensor<Scalar>],
        for param: Tensor<Scalar>,
        along direction: Tensor<Scalar>,
        config: inout [String: Double]
    ) {
        state["averageGrad"]! *= Scalar(config["momentum"]!)
        config["momentumDampening"] = 1.0 - (dampened ? config["momentum"]! : 0.0)
        state["averageGrad"]! += Scalar(1.0 - config["momentum"]!) * direction
    }
}

In [None]:
//export
class AverageSquaredGrad<Scalar: TensorFlowFloatingPoint>: StatDelegate<Scalar> {
    override var defaultConfig: [String: Double] { return ["squareMomentum": 0.99] }
    let dampened: Bool
    init(dampened: Bool = false) { self.dampened = dampened }
    override func initialState(
        for param: Tensor<Scalar>
    ) -> [String: Tensor<Scalar>] {
        return ["averageSquaredGrad": Tensor<Scalar>(zeros: param.shape)]
    }
    override func update(
        state: inout [String: Tensor<Scalar>],
        for param: Tensor<Scalar>,
        along direction: Tensor<Scalar>,
        config: inout [String: Double]
    ) {
        state["averageSquaredGrad"]! *= Scalar(config["squareMomentum"]!)
        config["squareMomentumDampening"] = 1.0 - (
            dampened ? config["squareMomentum"]! : 0.0)
        state["averageSquaredGrad"]! += Scalar(
            1.0 - config["squareMomentum"]!) * direction * direction
    }
}

In [None]:
//export
class StepCount<Scalar: TensorFlowFloatingPoint>: StatDelegate<Scalar> {
    override func initialState(
        for param: Tensor<Scalar>
    ) -> [String: Tensor<Scalar>] {
        return ["step": Tensor<Scalar>(0)]
    }
    override func update(
        state: inout [String: Tensor<Scalar>],
        for param: Tensor<Scalar>,
        along direction: Tensor<Scalar>,
        config: inout [String: Double]
    ) {
        state["step"]! += Scalar(1)
    }
}

In [None]:
//export
func debias(
    momentum: Double,
    dampening: Double,
    step: Double
) -> Double {
    return dampening * (1 - pow(momentum, step)) / (1 - momentum)
}

In [None]:
//export
class AdamStep<Scalar: TensorFlowFloatingPoint>: StepDelegate<Scalar> {
    override var defaultConfig: [String: Double] { return ["epsilon": 1e-5] }
    override func update(
        param: inout Tensor<Scalar>,
        along direction: inout Tensor<Scalar>,
        state: [String: Tensor<Scalar>],
        config: inout [String: Double]
    ) {
        let debiasedLearningRate = Scalar(config["learningRate"]! / debias(
            momentum: config["momentum"]!,
            dampening: config["momentumDampening"]!,
            step: Double(state["step"]!.scalarized())
        ))
        let debiasedRMSGrad = sqrt(state["averageSquaredGrad"]! / Scalar(debias(
            momentum: config["squareMomentum"]!,
            dampening: config["squareMomentumDampening"]!,
            step: Double(state["step"]!.scalarized())
        ))) + Scalar(config["epsilon"]!)
        param -= debiasedLearningRate * state["averageGrad"]! / debiasedRMSGrad
    }
}

In [None]:
class LambStep<Scalar: TensorFlowFloatingPoint>: StepDelegate<Scalar> {
    override var defaultConfig: [String: Double] {
        return ["epsilon": 1e-6, "weightDecay": 0.0]
    }
    override func update(
        param: inout Tensor<Scalar>,
        along direction: inout Tensor<Scalar>,
        state: [String: Tensor<Scalar>],
        config: inout [String: Double]
    ) {
        let debiasedAverageGrad = state["averageGrad"]! / Scalar(debias(
            momentum: config["momentum"]!,
            dampening: config["momentumDampening"]!,
            step: Double(state["step"]!.scalarized())
        ))
        let debiasedRMSGrad = sqrt(state["averageSquaredGrad"]! / Scalar(debias(
            momentum: config["squareMomentum"]!,
            dampening: config["squareMomentumDampening"]!,
            step: Double(state["step"]!.scalarized())
        )) + Scalar(config["epsilon"]!))
        let step = debiasedAverageGrad / debiasedRMSGrad + Scalar(
            config["weightDecay"]!) * param
        let r1 = sqrt((param * param).mean())
        let r2 = sqrt((step * step).mean())
        let factor = min(r1 / r2, Scalar(10.0))
        param -= Scalar(config["learningRate"]!) * factor * step
    }
}

## Export

In [None]:
notebookToScript(fname: (Path.cwd / "09_optimizer.ipynb").string)