In [None]:
%%bash
lein uberjar

In [None]:
%classpath add jar ../target/jvm/uberjar/hdsm-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.java.io :as io]
         '[clojure.string :as string]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [run-and-save-experiments load-experiment site-summary-table experiment-timeline experiments-summary-table
                                   display-html confusion-summary display-experiment-models get-best-experiment-label load-experiment-pair
                                   get-timing-evaluation timing-table get-experiment-block-accuracies experiment-block-accuracies-table
                                   save-data load-data]]
         '[hdsm.classifier.distributed.sites :refer [make-site-structure p-site t-site]]
         '[hdsm.classifier.moa-classifier :refer [adaptive-random-forest naive-bayes]]
         '[hdsm.evaluation :refer [get-order-summary]]
         '[hdsm.dataset.csv-dataset :refer [read-csv-dataset]]
         '[hdsm.utils.stats :refer [mann-whitney-u-test wilcoxon-signed-rank-test]])

## Experiment Setup

In [None]:
;; Dataset Configuration

(defn dataset-fn []
    (read-csv-dataset "datasets/wall-following-robot/wall-following-robot-twice.csv"
                      999999 ;; All records
                      ;; Numeric features (sensor_0 - sensor_23)
                      (into [] (map #(str "sensor_" %) (range 24)))))
(def dataset-name "wfr-sensitivity-analysis")
(def feature-count 24) ;; Not including class
(def features-per-site 4)

(def trouble-factor features-per-site)
(def p-sites
    (for [[i site-features] (map vector (range feature-count) (partition features-per-site (range feature-count)))]
        (p-site (keyword (str "p-" i)) site-features)))

(def dataset (dataset-fn))
(defn dataset-fn [] dataset)

In [None]:
;; Experiment Configuration

(def base-classifier naive-bayes)
(def trouble-classifier adaptive-random-forest)

(def base-setup
    {:dataset-description (keyword dataset-name)
     :dataset-fn dataset-fn
     :trouble-classifier trouble-classifier
     :base-site-structure (apply make-site-structure feature-count p-sites)
     ;; Disable monitor logging because this is a long-running experiment
     ;; where detailed logging data is excessively large.
     :disable-monitor-logging true
     :p-site-aggregation-rule {:type :simple-voting}})

(def base-system-config
    {:site-window-size 1000
     :site-training-time 0
     :shared-sources? false
     :creation-window-size 1000
     :creation-time-threshold 500
     :removal-window-size 1000
     :removal-time-threshold 500
     :creation-agreement-threshold {:type :smoothed-hoeffding-bound
                                    :r 1 :delta 0.001 :sharpness 5}
     :removal-accuracy-threshold {:type :hoeffding-bound
                                  :r 1 :delta 0.001}
     :removal-usage-threshold 0.05
     :trouble-factor trouble-factor})

(def window-sizes [500 1000 2000])
(def ws-experiments
    (for [window-size window-sizes]
        (assoc base-setup
            :label (keyword (str dataset-name "-hdsm"
                                 "-ws" window-size))
            :system-config (assoc base-system-config
                               :site-window-size window-size
                               :creation-window-size window-size
                               :removal-window-size window-size))))

(def threshold-times [100 500 1000])
(def tt-experiments
    (for [threshold-time threshold-times]
        (assoc base-setup
            :label (keyword (str dataset-name "-hdsm"
                                 "-tt" threshold-time))
            :system-config (assoc base-system-config
                               :creation-time-threshold threshold-time
                               :removal-time-threshold threshold-time))))

(def removal-usage-thresholds [0.01 0.05 0.15])
(def rut-experiments
    (for [removal-usage-threshold removal-usage-thresholds]
        (assoc base-setup
            :label (keyword (str dataset-name "-hdsm"
                                 "-rut" removal-usage-threshold))
            :system-config (assoc base-system-config
                               :removal-usage-threshold removal-usage-threshold))))

(def hoeffding-bounds [0.000000001 0.001 0.1])
(def hb-experiments
    (for [hoeffding-bound hoeffding-bounds]
        (assoc base-setup
            :label (keyword (str dataset-name "-hdsm"
                                 "-hb" hoeffding-bound))
            :system-config (-> base-system-config
                               (assoc-in [:creation-agreement-threshold :delta] hoeffding-bound)
                               (assoc-in [:removal-accuracy-threshold :delta] hoeffding-bound)))))

(def smoothing-factors [1 5 10])
(def sf-experiments
    (for [smoothing-factor smoothing-factors]
        (assoc base-setup
            :label (keyword (str dataset-name "-hdsm"
                                 "-sf" smoothing-factor))
            :system-config (assoc-in base-system-config
                               [:creation-agreement-threshold :sharpness] smoothing-factor))))

(def experiments (concat ws-experiments
                         tt-experiments
                         rut-experiments
                         hb-experiments
                         sf-experiments
                         ws-experiments
                         tt-experiments
                         rut-experiments
                         hb-experiments
                         sf-experiments
                         ))

(def output-dir (str "workspace/" dataset-name))

## Run Experiments

In [None]:
(import '[com.twosigma.beakerx.widget Output])
(def experiment-output (Output.))
experiment-output

In [None]:
(run-and-save-experiments output-dir base-classifier experiments
                          :beaker-output experiment-output
                          :thread-count 1)

## Experiments Summary

In [None]:
(def experiments-summary
    (load-experiment output-dir :summary))

(experiments-summary-table experiments-summary)

## Visualisation

In [None]:
(def experiments
    (->> experiments-summary
         (map :label)
         (filter #(string/starts-with? (str %) ":wfr-sensitivity-analysis-hdsm-ws"))
         (map #(load-experiment output-dir %))))
(count experiments)

In [None]:
(def partition-size 100)
(let [result-count (count (:results (first experiments)))]
    (.display (experiment-timeline (last experiments)
                                   (into {}
                                         (map vector
                                              (map :label experiments)
                                              experiments))
                                   :partition-size partition-size
                                   :x-bounds [0 result-count]
                                   :y-bounds-acc [0.35 1.2]
                                   :y-bounds-data [0 1.2]
                                   :event-rows 6
                                   :include-timing-plot? false)))

## Timing Evaluation

In [None]:
(def timing-evaluation (get-timing-evaluation output-dir
                                              (distinct (map :label experiments-summary)) ;; Drop repeated experiment labels
                                              1000))

In [None]:
(save-data (str output-dir "/:timing-summary.edn") timing-evaluation)

In [None]:
(timing-table timing-evaluation)

## Block-based Summaries

In [None]:
(def experiment-block-accuracies (get-experiment-block-accuracies output-dir
                                                                  (distinct (map :label experiments-summary)) ;; Drop repeated experiment labels
                                                                  1000))
(save-data (str output-dir "/block-accuracies.edn") experiment-block-accuracies)
nil

In [None]:
(def experiment-block-accuracies (load-data (str output-dir "/block-accuracies.edn")))

In [None]:
(experiment-block-accuracies-table experiment-block-accuracies)