## eDSL A: Correct-by-construction PeriodSeries (with provenance)
#### Lef Ioannidis (@elefthei) : Investment Engineer, Bridgewater Associates

### First some minimal dependencies.

In [1]:
%classpath add mvn joda-time joda-time 2.10.5
%classpath add mvn org.typelevel cats-core_2.12 2.1.0
import org.joda.time.DateTime
import org.joda.time.Duration
import org.joda.time.format.DateTimeFormat
import java.io.File
import org.joda.time.format.DateTimeFormatter
import scala.io.Source
import scala.collection.mutable.ArrayBuffer
import scala.math.Ordered._
import cats.data.State

// Dates are ordered
implicit val DateTimeOrdered = new Ordering[DateTime] {
    override def compare(x: DateTime, y: DateTime): Int = x.getMillis.compare(y.getMillis)
}

$line25.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anon$1@18efc879

### Problem: Too many runtime errors when combining different frequencies
### Solution: Index the Series on Frequency type

In [None]:
  sealed trait Frequency
  sealed abstract class FrequencyValue[F <: Frequency] {
    def days: Int
  }

  object Frequency {
    sealed trait Daily extends Frequency
    sealed trait Monthly extends Frequency
    sealed trait Quarterly extends Frequency
    sealed trait Yearly extends Frequency

    case object Daily extends FrequencyValue[Daily] {
      override def days: Int = 1
    }
    case object Monthly extends FrequencyValue[Monthly] {
      override def days: Int = 30
    }
    case object Quarterly extends FrequencyValue[Quarterly] {
      override def days: Int = 30 * 3
    }
    case object Yearly extends FrequencyValue[Yearly] {
      override def days: Int = 365
    }
  }

In [None]:
case class PeriodSeries[F <: Frequency](data: Array[Double], frequency: FrequencyValue[F], base: DateTime)

### We promoted Frequency from an Enum value to a Sum type.
#### Surely we can now copy+paste the rest of our code...

In [None]:
  implicit class StringFreqValueParser(s: String) {
    def fromString[F <: Frequency]: FrequencyValue[F] =
      s match {
        case "Yearly" => Frequency.Yearly
        case "Monthly" => Frequency.Monthly
        case "Quarterly" => Frequency.Quarterly
        case "Daily" => Frequency.Daily
      }
  }

### There's no Dependent Types in Scala. One day in the future we'll be able to do this.
#### But today, we need to improvise!

In [None]:
object CSV {
    def loadMonthlyData(csv: String): PeriodSeries[Frequency.Monthly] = {
      val file = Source.fromFile(csv).getLines().toSeq
      require(file.head == "Monthly", "Cannot load Data which not Monthly with this method")
      val base = DateTime.parse(file.tail.head, DateTimeFormat.forPattern("yyyy"))
      val data = file.tail.tail.map(_.toDouble).toArray
      PeriodSeries(data, Frequency.Monthly, base)
    }
    def loadQuarterlyData(csv: String): PeriodSeries[Frequency.Quarterly] = {
      val file = Source.fromFile(csv).getLines().toSeq
      require(file.head == "Quarterly", "Cannot load Data which not Quarterly with this method")
      val base = DateTime.parse(file.tail.head, DateTimeFormat.forPattern("yyyy"))
      val data = file.tail.tail.map(_.toDouble).toArray
      PeriodSeries(data, Frequency.Quarterly, base)
    }
    def loadYearlyData(csv: String): PeriodSeries[Frequency.Yearly] = {
      val file = Source.fromFile(csv).getLines().toSeq
      require(file.head == "Yearly", "Cannot load Data which not Yearly with this method")
      val base = DateTime.parse(file.tail.head, DateTimeFormat.forPattern("yyyy"))
      val data = file.tail.tail.map(_.toDouble).toArray
      PeriodSeries(data, Frequency.Yearly, base)
    }
}


### This error shows that types are not magic, although they may feel like it.
#### If the user cannot figure out a type, the compiler can't either

## Let's see the results of our  work

In [None]:
// Plot them
implicit class PeriodSeriesPlotting[F <: Frequency](ps: PeriodSeries[F]) {
  private def toTicks(): Seq[(DateTime, Double)] = 
    ps.data.toIndexedSeq.zipWithIndex.map {
        case (d, i) => (ps.base.plusDays(ps.frequency.days * i), d) 
    }
  def plot(name: String, yname: String): SimpleTimePlot = 
    new SimpleTimePlot {
      title = name
      data = toTicks.map { case(t, d) => Map(yname -> d, "time" -> t.toDate()) }
      columns = Seq(yname)
    }
  def plot(): SimpleTimePlot = plot("Timeseries", "y")
}

In [None]:
CSV.loadMonthlyData("acme-stocks/stocks-1.txt").plot()

### Great, now let's port the math from before into the PeriodSeries[F] implementation

In [None]:
 implicit class PeriodSeriesMath[F <: Frequency](ps: PeriodSeries[F]) {
    import scala.math.Ordered._
    def length: Int = ps.data.length
    def first: Double = ps.data.head
    def enddate: DateTime =
      ps.base.plusDays(ps.frequency.days * ps.data.length)
    def last: Double = ps.data.last
    def vol: Double = {
      val avg = ps.data.sum / ps.data.size
      math.sqrt(ps.data.map(a => math.pow(a - avg, 2)).sum) / ps.data.length
    }

    def roll(f: PeriodSeries[F] => Double, window: Int = 2): PeriodSeries[F] = {
      require(ps.data.length > window, "Window too big for small dataset")
      var i = 0
      val result: ArrayBuffer[Double] = ArrayBuffer()
      while (i + window < ps.data.length) {
        val newps = ps.copy(data = ps.data.slice(i, i + window))
        result.append(f(newps))
        i += 1
      }
      PeriodSeries(result.toArray, ps.frequency, ps.base)
    }

    def intersection(other: PeriodSeries[F], op: (Double, Double) => Double): PeriodSeries[F] = {
      val ord = Ordering[DateTime]
      require(ord.max(ps.base, other.base) <= ord.min(ps.enddate, other.enddate), "Cannot zip PeriodSeries that do not overlap")
      ps.copy(data = ps.data.zip(other.data).map((p: (Double, Double)) => op(p._1, p._2)), base = ord.max(ps.base, other.base))
    }
    def rollVol(window: Int = 10): PeriodSeries[F] = roll(_.vol, window)
    def +(other: PeriodSeries[F]): PeriodSeries[F] = intersection(other, _ + _)
    def -(other: PeriodSeries[F]): PeriodSeries[F] = intersection(other, _ - _)
    def *(other: PeriodSeries[F]): PeriodSeries[F] = intersection(other, _ * _)
    def /(other: PeriodSeries[F]): PeriodSeries[F] = intersection(other, _ / _)
  }

## Everything we have so far returns `PeriodSeries [F]`
### But what if we actually want to change frequencies.
### For `upSample`/`downSample` we need a richer relation; a Typeclass.

In [None]:
trait Gt[Small <: Frequency, Large <: Frequency] {
  def smaller: FrequencyValue[Small]
  def large: FrequencyValue[Large]
}
object Gt {
  import Frequency.{Daily, Monthly, Quarterly, Yearly}
    
  implicit val dailyLtMonthly: Gt[Daily, Monthly] = new Gt[Daily, Monthly] {
    override def smaller: FrequencyValue[Daily] = Daily
    override def large: FrequencyValue[Monthly] = Monthly
  }
  implicit val dailyLtQuarterly: Gt[Daily, Quarterly] = new Gt[Daily, Quarterly] {
    override def smaller: FrequencyValue[Daily] = Daily
    override def large: FrequencyValue[Quarterly] = Quarterly
  }
  implicit val dailyLtYearly: Gt[Daily, Yearly] = new Gt[Daily, Yearly] {
    override def smaller: FrequencyValue[Daily] = Daily
    override def large: FrequencyValue[Yearly] = Yearly
  }
  implicit val monthlyLtYearly: Gt[Monthly, Yearly] = new Gt[Monthly, Yearly] {
    override def smaller: FrequencyValue[Monthly] = Monthly
    override def large: FrequencyValue[Yearly] = Yearly
  }
  implicit val monthlyLtQuarterly: Gt[Monthly, Quarterly] = new Gt[Monthly, Quarterly] {
    override def smaller: FrequencyValue[Monthly] = Monthly
    override def large: FrequencyValue[Quarterly] = Quarterly
  }
  implicit val quarterlyLtYearly: Gt[Quarterly, Yearly] = new Gt[Quarterly, Yearly] {
    override def smaller: FrequencyValue[Quarterly] = Quarterly
    override def large: FrequencyValue[Yearly] = Yearly
  }
}

### Now we can express upSampling and downSampling in the type-level.

In [None]:
  // Interpolation
  implicit class PeriodSeriesExtensions[F <: Frequency](ps: PeriodSeries[F]) {
    private def interpolate(first: Double, second: Double, steps: Int): Array[Double] =
      (0 to steps).map(_ * (second - first) / steps).map(_ + first).toArray

    def downSample[FF <: Frequency](newfreq: FrequencyValue[FF])(implicit lt: Gt[F, FF]) : PeriodSeries[FF] = {
      val step: Int = lt.large.days / lt.smaller.days
      def groupByNum[A](s: Iterable[A], n: Int): Iterable[Iterable[A]] =
        if(n == 0) {
          Seq()
        } else if(s.size <= n) {
          Seq(s)
        } else {
          val (left, right) = s.splitAt(n)
          Seq(left) ++ groupByNum(right.tail, n)
        }
      ps.copy(data = groupByNum(ps.data, step).map(data => data.sum / data.size).toArray, frequency = lt.large)
    }

    def upSample[FF <: Frequency](newfreq: FrequencyValue[FF])(implicit lt: Gt[FF, F]): PeriodSeries[FF] = {
      val steps = lt.large.days / lt.smaller.days
      val seq = ps.data.toIndexedSeq.zipWithIndex.flatMap {
        case (_, i) if i < ps.data.length - 1 => interpolate(ps.data(i), ps.data(i + 1), steps)
        case (d, _) => Array(d)
      }.toArray
      ps.copy(data = seq, frequency = lt.smaller)
    }
  }

### Make sure everything works with the frequency-indexed PeriodSeries.

In [None]:
val stocks = CSV.loadMonthlyData("acme-stocks/stocks-1.txt")
stocks.upSample(Frequency.Daily).plot()

### Summary
1. ~Promoted frequendy errors to compile-time.~
2. **Promote bad math to runtime errors.**
3. Provenance of operations.

In [None]:
// Bad!
stocks.upSample(Frequency.Daily).rollVol(12).plot()

In [None]:
// Good
stocks.rollVol(12).upSample(Frequency.Daily).plot()

# eDSL A: PeriodSeriesAlgebra

### Here's the AST (Abstract Syntax Tree) for our first eDSL. It Reifies elementary statistics operations to a GADT.

**Reify** : (verb, transitive) to regard something abstract as if it were a concrete material thing

**GADT**  : An ADT, with extra type constraints

In [None]:
  sealed trait PeriodSeriesAlgebra[F <: Frequency] {
    def eval: PeriodSeries[F] = this match {
      // Catch bad outcomes before they happen
      case Roll(Upsample(_, _, _), _, _) =>
        throw new AssertionError("Attempting to take a rolling window stats over an interpolated series, you probably want to switch the order of operations here")
      case Roll(self, f, window) => self.eval.roll(f, window)
      case Upsample(self, newfreq, gt) => self.eval.upSample(newfreq = newfreq)(gt)
      case Downsample(self, newfreq, gt) => self.eval.downSample(newfreq = newfreq)(gt)
      case Intersect(self, other, op) => self.eval.intersection(other.eval, op)
      case Pure(self) => self
    }
  }
  case class Roll[F <: Frequency](self: PeriodSeriesAlgebra[F], f: PeriodSeries[F] => Double, window: Int) extends PeriodSeriesAlgebra[F]
  case class Upsample[F <: Frequency, FF <: Frequency](self: PeriodSeriesAlgebra[F], freq: FrequencyValue[FF], gt: Gt[FF, F]) extends PeriodSeriesAlgebra[FF]
  case class Downsample[F <: Frequency, FF <: Frequency](self: PeriodSeriesAlgebra[F], freq: FrequencyValue[FF], gt: Gt[F, FF]) extends PeriodSeriesAlgebra[FF]
  case class Intersect[F <: Frequency](self: PeriodSeriesAlgebra[F], other: PeriodSeriesAlgebra[F], op: (Double, Double) => Double) extends PeriodSeriesAlgebra[F]
  case class Pure[F <: Frequency](p: PeriodSeries[F]) extends PeriodSeriesAlgebra[F]

### The GADT suspends computation of PeriodSeries[F] and allows us to treat logic like an object.
### Here's some syntactic sugar for our eDSL

In [None]:
  // Some Syntactic sugar for our eDSL
  implicit class PeridSeriesPure[F <: Frequency](ps: PeriodSeries[F]){
      def reify: PeriodSeriesAlgebra[F] = Pure(ps)
  }
  implicit class PeriodSeriesAlgebraSyntax[F <: Frequency](self: PeriodSeriesAlgebra[F]) {
    def rollVol(window: Int = 10): PeriodSeriesAlgebra[F] = Roll(self, _.vol, window)
    def upSample[FF <: Frequency](newfreq: FrequencyValue[FF])(implicit ev: Gt[FF, F]): PeriodSeriesAlgebra[FF] = Upsample[F, FF](self, newfreq, ev)
    def downSample[FF <: Frequency](newfreq: FrequencyValue[FF])(implicit ev: Gt[F, FF]): PeriodSeriesAlgebra[FF] = Downsample[F, FF](self, newfreq, ev)
    def +(other: PeriodSeriesAlgebra[F]): PeriodSeriesAlgebra[F] = Intersect(self, other, _ + _)
    def -(other: PeriodSeriesAlgebra[F]): PeriodSeriesAlgebra[F] = Intersect(self, other, _ - _)
    def *(other: PeriodSeriesAlgebra[F]): PeriodSeriesAlgebra[F] = Intersect(self, other, _ * _)
    def /(other: PeriodSeriesAlgebra[F]): PeriodSeriesAlgebra[F] = Intersect(self, other, _ / _)
    def plot() = self.eval.plot() 
  }

### Is PeriodSeriesAlgebra[F] safer than PeriodSeries[F]?

In [None]:
stocks.reify.upSample(Frequency.Daily).rollVol().plot()

### Yes

In [None]:
stocks.reify.rollVol().upSample(Frequency.Daily).plot()

### Questions
1. Can we catch any more bad PeriodSeries math at runtime? 
2. What about compile-time?
3. What about well-typed runtime-errors (`Either` or `Try`)

### Summary
1. ~Promoted frequency errors to compile-time.~
2. ~Promote bad math to runtime errors.~
3. Finally a simple trick will give us Provenance

### Scala functions lose their name
### Make a tiny eDSL to allow them to keep their names

In [None]:
  sealed trait Lambda[A] {
    def desc: String
    def f: A
  }
  case class Unary[A,B](desc: String, f: A => B) extends Lambda[A => B]
  case class Binary[A, B, C](desc: String, f: (A, B) => C) extends Lambda[(A, B) => C]

In [None]:
  sealed trait PeriodSeriesAlgebra[F <: Frequency] {
    def eval: PeriodSeries[F] = this match {
      case Roll(Upsample(_, _, _), _, _) =>
        throw new AssertionError("Attempting to take a rolling window stats over an interpolated series, you probably want to switch the order of operations here")
      case Roll(self, namedfun, window) => self.eval.roll(namedfun.f, window)
      case Upsample(self, newfreq, gt) => self.eval.upSample(newfreq = newfreq)(gt)
      case Downsample(self, newfreq, gt) => self.eval.downSample(newfreq = newfreq)(gt)
      case Intersect(self, other, namedop) => self.eval.intersection(other.eval, namedop.f)
      case Pure(self) => self
    }
  }
  case class Roll[F <: Frequency](self: PeriodSeriesAlgebra[F], f: Lambda[PeriodSeries[F] => Double], window: Int) extends PeriodSeriesAlgebra[F]
  case class Upsample[F <: Frequency, FF <: Frequency](self: PeriodSeriesAlgebra[F], freq: FrequencyValue[FF], gt: Gt[FF, F]) extends PeriodSeriesAlgebra[FF]
  case class Downsample[F <: Frequency, FF <: Frequency](self: PeriodSeriesAlgebra[F], freq: FrequencyValue[FF], gt: Gt[F, FF]) extends PeriodSeriesAlgebra[FF]
  case class Intersect[F <: Frequency](self: PeriodSeriesAlgebra[F], other: PeriodSeriesAlgebra[F], op: Lambda[(Double, Double) => Double]) extends PeriodSeriesAlgebra[F]
  case class Pure[F <: Frequency](p: PeriodSeries[F]) extends PeriodSeriesAlgebra[F]

  // Some Syntactic sugar for our eDSL
  implicit class PeridSeriesPure[F <: Frequency](ps: PeriodSeries[F]){
    def reify: PeriodSeriesAlgebra[F] = Pure(ps)
  }
  implicit class PeriodSeriesAlgebraSyntax[F <: Frequency](self: PeriodSeriesAlgebra[F]) {
    def rollVol(window: Int = 10): PeriodSeriesAlgebra[F] = Roll(self, Unary("Volatility", _.vol), window)
    def upSample[FF <: Frequency](newfreq: FrequencyValue[FF])(implicit ev: Gt[FF, F]): PeriodSeriesAlgebra[FF] = Upsample[F, FF](self, newfreq, ev)
    def downSample[FF <: Frequency](newfreq: FrequencyValue[FF])(implicit ev: Gt[F, FF]): PeriodSeriesAlgebra[FF] = Downsample[F, FF](self, newfreq, ev)
    def +(other: PeriodSeriesAlgebra[F]): PeriodSeriesAlgebra[F] = Intersect(self, other, Binary("Add", _ + _))
    def -(other: PeriodSeriesAlgebra[F]): PeriodSeriesAlgebra[F] = Intersect(self, other, Binary("Sub", _ - _))
    def *(other: PeriodSeriesAlgebra[F]): PeriodSeriesAlgebra[F] = Intersect(self, other, Binary("Mult", _ * _))
    def /(other: PeriodSeriesAlgebra[F]): PeriodSeriesAlgebra[F] = Intersect(self, other, Binary("Div", _ / _))
    def plot() = self.eval.plot()
  }


## Provenance =  State

In [None]:
  // Provenance is just a trace
  type Provenance[F <: Frequency] = State[Seq[String], PeriodSeries[F]]

  implicit class PeriodSeriesEvalWithState[F <: Frequency](self: PeriodSeriesAlgebra[F]) {
    def evalWithProvenance: Provenance[F] = self match {
      case Roll(Upsample(_, _, _), _, _) =>
        throw new AssertionError("Attempting to take a rolling window stats over an interpolated series, you probably want to switch the order of operations here")
      case Roll(self, Unary(desc, f), window) =>
        self.evalWithProvenance.flatMap { series =>
          State(st => (st :+ s"Roll with $desc and window = $window", series.roll(f, window)))
        }
      case Upsample(self, newfreq, gt) =>
        self.evalWithProvenance.flatMap { series =>
          val upsampled = series.upSample(newfreq)(gt)
          State(st => (st :+ s"Up sample to $newfreq", upsampled))
        }
      case Downsample(self, newfreq, gt) =>
        self.evalWithProvenance.flatMap { series =>
          val downsampled = series.downSample(newfreq)(gt)
          State(st => (st :+ s"Down sample to $newfreq", downsampled))
        }
      case Intersect(self, other, Binary(desc, f)) =>
        self.evalWithProvenance.flatMap { series =>
          other.evalWithProvenance.flatMap { other =>
            State(st => (st :+ s"Combine with $desc and $other", series.intersection(other, f)))
          }
        }
      case Pure(self) => State.pure(self)
    }

    def provenance = evalWithProvenance.run(Seq()).value._1
    def eval = evalWithProvenance.run(Seq()).value._2
  }

## Let's try this 

In [None]:
stocks.reify.rollVol().upSample(Frequency.Daily).eval.plot()

// stocks.reify.upSample(Frequency.Daily).rollVol().provenance()
// Scala notebook doesn't like this, show it in IntelliJ

In [None]:
stocks.reify.rollVol().upSample(Frequency.Daily).provenance
// Scala notebook doesn't like this, show it in IntelliJ

## Summary of PeriodSeriesAlgebra (eDSL A)
1. Promoted frequency errors to compile-time.
2. Promote bad math to runtime errors.
3. Finally a simple trick will give us Provenance

## Questions?