## Compile and load dependencies

In [None]:
%%bash
lein uberjar

In [None]:
%classpath add jar ../target/jvm/uberjar/hdsm-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.java.io :as io]
         '[clojure.string :as string]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [run-and-save-experiments load-experiment site-summary-table experiment-timeline experiments-summary-table
                                   display-html confusion-summary display-experiment-models get-best-experiment-label load-experiment-pair plot-lines plot-monitor-timelines]]
         '[hdsm.classifier.distributed.sites :refer [make-site-structure p-site t-site]]
         '[hdsm.classifier.moa-classifier :refer [adaptive-random-forest naive-bayes hoeffding-tree]]
         '[hdsm.dataset.base :refer [concat-datasets]]
         '[hdsm.dataset.probabilistic :refer [make-probabilistic-dataset next-bool-for-prob!]]
         '[hdsm.evaluation :refer [creation-monitor-timelines accuracy-removal-monitor-timelines
                                                            usage-removal-monitor-timelines]]
         '[hdsm.utils :refer [map-vals]])

## Experiment Setup

In [None]:
;; Dataset Configuration

(defn record-fn-a [rng]
    (let [f1 (next-bool-for-prob! rng 0.5)
          f2 (next-bool-for-prob! rng 0.5)
          f3 (next-bool-for-prob! rng 0.5)
          f4 (next-bool-for-prob! rng 0.5)
          f5 (next-bool-for-prob! rng 0.5)
          f6 (next-bool-for-prob! rng 0.5)
          class (if (or (and f1 f2)
                        (and f3 f4))
                    (next-bool-for-prob! rng 1)
                    (next-bool-for-prob! rng 0))]
        (map #(if % 1 0) [f1 f2 f3 f4 f5 f6 class])))

(defn record-fn-b [rng]
    (let [f1 (next-bool-for-prob! rng 0.5)
          f2 (next-bool-for-prob! rng 0.5)
          f3 (next-bool-for-prob! rng 0.5)
          f4 (next-bool-for-prob! rng 0.5)
          f5 (next-bool-for-prob! rng 0.5)
          f6 (next-bool-for-prob! rng 0.5)
          class (if (or (and f1 f2)
                        (and f5 f6))
                    (next-bool-for-prob! rng 1)
                    (next-bool-for-prob! rng 0))]
        (map #(if % 1 0) [f1 f2 f3 f4 f5 f6 class])))

(defn dataset-fn []
    (concat-datasets [(make-probabilistic-dataset record-fn-a 10000 5)
                      (make-probabilistic-dataset record-fn-b 10000 0)
                      (make-probabilistic-dataset record-fn-a 10000 1)]))
(def dataset-name "synthetic-cross-term-drift")
(def feature-count 6) ;; Not including class
(def features-per-site 1)
(def trouble-factors (map #(* features-per-site %) [1]))
(def naive-base-site-structure
    (apply 
        make-site-structure
        feature-count
        (for [[i site-features] (map vector (range feature-count) (partition features-per-site (range feature-count)))]
            (p-site (keyword (str "p-" i)) site-features))))

(def dataset (dataset-fn))
(defn dataset-fn [] dataset)

In [None]:
;; Experiment Configuration

(def base-classifier hoeffding-tree)
(def trouble-classifier hoeffding-tree)

(def base-setup
    {:dataset-description (keyword dataset-name)
     :dataset-fn dataset-fn
     :trouble-classifier trouble-classifier
     :base-site-structure naive-base-site-structure})

(def aggregation-rules {:max-conf {:type :max-conf}
    })

(def naive-experiments
    (for [[aggregation-rule-key aggregation-rule] aggregation-rules]
        (assoc base-setup
            :label (keyword (str dataset-name "-naive" aggregation-rule-key))
            :p-site-aggregation-rule aggregation-rule
            :system-config :naive)))

(def hdsm-experiments
    (for [trouble-factor trouble-factors
          [aggregation-rule-key aggregation-rule] aggregation-rules]
        (assoc base-setup
            :label (keyword (str dataset-name "-hdsm"
                                 aggregation-rule-key
                                 "-tf" trouble-factor))
            :p-site-aggregation-rule aggregation-rule
            :system-config {:site-window-size 1000
                            :site-training-time 0
                            :shared-sources? false
                            :creation-window-size 1000
                            :creation-time-threshold 500
                            :removal-window-size 1000
                            :removal-time-threshold 500
                            :trouble-factor trouble-factor
                            :creation-agreement-threshold {:type :smoothed-hoeffding-bound
                                                           :r 1 :delta 0.001 :sharpness 5}
                            :removal-accuracy-threshold {:type :hoeffding-bound
                                                         :r 1 :delta 0.001}
                            :removal-usage-threshold 0.05})))

(def experiments (concat naive-experiments
                         hdsm-experiments
                         ))

(def output-dir (str "workspace/" dataset-name))

## Run Experiments

In [None]:
(import '[com.twosigma.beakerx.widget Output])
(def experiment-output (Output.))
experiment-output

In [None]:
(run-and-save-experiments output-dir base-classifier experiments
                          :beaker-output experiment-output
                          :thread-count 2)

## Experiments Summary

In [None]:
(def experiments-summary
    (load-experiment output-dir :summary))

(experiments-summary-table experiments-summary)

## Accuracy and Transmission Over Time 

In [None]:
(def performance-measure [:skip-1000 :accuracy])
(def partition-size 100)

### Maximum Confidence Aggregation

In [None]:
(def max-conf-experiments (load-experiment-pair output-dir experiments-summary performance-measure :max-conf))

In [None]:
(let [experiments max-conf-experiments
      result-count (count (:results (:hdsm experiments)))]
    (.display (experiment-timeline (:hdsm experiments)
                                   (dissoc experiments :hdsm)
                                   :partition-size partition-size
                                   :extra-events [[:drift 10000]
                                                  [:drift 20000]]
                                   :x-bounds [0 result-count]
                                   :y-bounds-acc [0.45 1.8]
                                   :y-bounds-data [0 0.7]
                                   :event-rows 7
                                   :event-top-padding 0.15
                                   :event-spacing 0.12
                                   :width 750
                                   :height 500)))

## Site Usage Breakdown

In [None]:
(site-summary-table (:naive max-conf-experiments)
                    :skip-records 1000)

In [None]:
(site-summary-table (:hdsm max-conf-experiments)
                    :skip-records 1000
                    :collapse-trouble-sites true)

## Monitor Timelines

In [None]:
(plot-monitor-timelines (:hdsm max-conf-experiments) [3 2]
                        :width 750
                        :height 550
                        :event-rows 3
                        :event-top-padding 0.35
                        :event-spacing 0.3
                        :extra-events [[:drift 10000]
                                       [:drift 20000]])

In [None]:
(def hdsm-results (:results (:hdsm max-conf-experiments)))

### Creation Monitors - Agreement

In [None]:
(->> hdsm-results
     creation-monitor-timelines
     (map-vals :proportion)
     (plot-lines 100))

#### Creation Monitors - Agreement Thresholds

In [None]:
(->> hdsm-results
     creation-monitor-timelines
     (map-vals :threshold)
     (plot-lines 100))

### Removal Monitors - Usage

In [None]:
(->> hdsm-results
     (usage-removal-monitor-timelines)
     (map-vals :proportion)
     (plot-lines 100))

#### Removal Monitors - Usage Thresholds

In [None]:
(->> hdsm-results
     (usage-removal-monitor-timelines)
     (map-vals :threshold)
     (plot-lines 100))

### Removal Monitors - Accuracy

In [None]:
(->> hdsm-results
     (accuracy-removal-monitor-timelines)
     (map-vals :proportion)
     (plot-lines 100))

#### Removal Monitors - Accuracy Thresholds

In [None]:
(->> hdsm-results
     (accuracy-removal-monitor-timelines)
     (map-vals :threshold)
     (plot-lines 100))