In [2]:
import TensorFlow
import Foundation

struct DummyModel : Layer {
  public var parameters: Tensor<Double>
  
  @differentiable(wrt: (self, input))
  func applied(to input: Tensor<Double>, in context: Context) -> Tensor<Double> {
    return input * parameters
  }
}

// Make the parameters use `1/memoryFraction` of memory so that a training loop
// that leaks one gradient per iteration will run out of memory after about
// `memoryFraction` iterations.
let memoryFraction = UInt64(20)
let systemMemoryBytes = ProcessInfo.processInfo.physicalMemory
let parameterCount = Int32(systemMemoryBytes / UInt64(MemoryLayout<Double>.size) / memoryFraction)

// A tensor of shape [parameterCount] makes TF throw weird errors. Maybe it's
// too big along one dimension. So we do a tensor of shape [1000, parameterCount / 1000].
var model = DummyModel(parameters: Tensor<Double>(ones: [1000, parameterCount / 1000]))

extension DummyModel {
  @differentiable(wrt: (self))
  func loss(in context: Context) -> Tensor<Double> {
    return self.applied(to: Tensor(1), in: context).mean().squared()
  }
}

let optimizer = SGD<DummyModel, Double>(learningRate: 10, momentum: 1)

let context = Context(learningPhase: .training)
// Loop for 5 times `memoryFraction` so that we definitely crash if we're
// leaking memory.
for epoch in 0..<(5 * memoryFraction) {
  let (value, grads) = model.valueWithGradient { model in model.loss(in: context) }
  print("Epoch \(epoch) loss: \(value)")
  optimizer.update(&model.allDifferentiableVariables, along: grads)
}

print("Done training!")