In [None]:
%%bash
lein uberjar

In [None]:
 %classpath add jar ../target/jvm/uberjar/ppdsp-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.string :as string]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [save-data load-data
                                   run-masking-experiments plot-lines]]
         '[ppdsp.classifier.moa-classifier :refer [inspectable-adaptive-random-forest]]
         '[ppdsp.dataset.base :refer [dataset-feature-count dataset-record-count]]
         '[ppdsp.dataset.csv-dataset :refer [read-csv-dataset]]
         '[ppdsp.masking.evaluation :refer [flatten-masking-experiment-recoveries
                                           unknown-record-relative-position
                                           add-combined-result
                                           get-cumulative-noise-sigma]]
         '[ppdsp.utils :refer [map-vals mean]])

## Experiment Configuration

In [None]:
(def dataset-label "nyc-taxi-tree-depth")
(def dataset (read-csv-dataset "datasets/nyc-taxi/nyc-50k.csv"
                                99999999 ;; All records
                                ;; Numeric features
                                ["pickup_longitude" "pickup_latitude" "dropoff_longitude" "dropoff_latitude" "pickup_day" "pickup_hour" "dist"]))

In [None]:
(def feature-count (dataset-feature-count dataset))
(def record-count (dataset-record-count dataset))

(def independent-noise-sigmas [0.25])
(def cumulative-noise-sigmas (map #(get-cumulative-noise-sigma % record-count) independent-noise-sigmas))

(def base-configuration
    {:dataset dataset
     :projection-feature-counts [feature-count]
     :projection-sigmas [1.0]
     :translations [0]
     :classifier-fns {:arf inspectable-adaptive-random-forest}
     :threads-per-configuration 2
     :threads-per-evaluation 1
     :seed 1
     :evaluations [:accuracy]})

(def cumulative-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/cumulative.edn")
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas cumulative-noise-sigmas}))

(def independent-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/independent.edn")
            :independent-noise-sigmas independent-noise-sigmas
            :cumulative-noise-sigmas [0.0]}))

(def rp-only-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/rp-only.edn")
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas [0.0]}))

## Run Experiments

In [None]:
(run-masking-experiments cumulative-noise-configuration)

In [None]:
(run-masking-experiments independent-noise-configuration)

In [None]:
(run-masking-experiments rp-only-configuration)

## Load and Extend Experiment Results

In [None]:
(def output-cumulative (load-data (:output-file cumulative-noise-configuration)))
(def results-cumulative (:results output-cumulative))

In [None]:
(def output-independent (load-data (:output-file independent-noise-configuration)))
(def results-independent (:results output-independent))

In [None]:
(def output-rp-only (load-data (:output-file rp-only-configuration)))
(def results-rp-only (:results output-rp-only))

## ARF Tree Depth Over Time

In [None]:
(def ensemble-size 10)
(defn tree-depth-plot
    [result ensemble-size]
    (let [tree-size-timelines
      (for [tree-index (range ensemble-size)]
        (->> result
             :accuracy
             :arf
             :raw-results
             (map :tree-depths)
             (map #(get % tree-index))))
      tree-size-series (zipmap (range) tree-size-timelines)]
        (-> (plot-lines 100 tree-size-series :init-width 1200)
            (.setShowLegend false)
            (.setYLabel "Tree Depth")
            (.setXLabel "Records")
            (.display))))

### RP

In [None]:
(tree-depth-plot (first results-rp-only) ensemble-size)

### RPIN

In [None]:
(tree-depth-plot (apply max-key :independent-noise-sigma results-independent) ensemble-size)

### RPCN

In [None]:
(tree-depth-plot (apply max-key :cumulative-noise-sigma results-cumulative) ensemble-size)