In [None]:
%%bash
lein uberjar

In [None]:
 %classpath add jar ../target/jvm/uberjar/ppdsp-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.string :as string]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [save-data load-data run-masking-experiments
                                   line-plot grid-plot-results]]
         '[ppdsp.classifier.moa-classifier :refer [adaptive-random-forest]]
         '[ppdsp.dataset.base :refer [dataset-feature-count dataset-record-count]]
         '[ppdsp.dataset.csv-dataset :refer [read-csv-dataset]]
         '[ppdsp.masking.evaluation :refer [flatten-masking-experiment-recoveries
                                           unknown-record-relative-position map-attack-results
                                           get-cumulative-noise-sigma]]
         '[ppdsp.utils :refer [map-vals mean]]
         '[ppdsp.utils.timing :refer [nano-to-seconds]])
(import '[java.awt Color])

## Experiment Configuration

In [None]:
(def dataset-label "timing-nyc-taxi")
(def dataset (read-csv-dataset "datasets/nyc-taxi/nyc-50k.csv"
                                99999999 ;; All records
                                ;; Numeric features
                                ["pickup_longitude" "pickup_latitude" "dropoff_longitude" "dropoff_latitude" "pickup_day" "pickup_hour" "dist"]))

In [None]:
(def feature-count (dataset-feature-count dataset))
(def record-count (dataset-record-count dataset))

(def independent-noise-sigmas [0.25])
(def cumulative-noise-sigmas (map #(get-cumulative-noise-sigma % record-count) independent-noise-sigmas))

(def base-configuration
    {:dataset dataset
     :projection-feature-counts [feature-count]
     :projection-sigmas [1.0]
     :translations [0]
     :known-record-counts (distinct [(dec feature-count) (int (Math/ceil (/ feature-count 2))) 1])
     :known-record-ranges [1]
     :classifier-fns {:arf adaptive-random-forest}
     :attack-count 1
     :attempt-count 1
     :threads-per-configuration 1
     :threads-per-evaluation 1
     :seed 1
     :evaluations [:privacy]})

(def cumulative-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/cumulative.edn")
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]}))

(def independent-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/independent.edn")
            :independent-noise-sigmas independent-noise-sigmas
            :cumulative-noise-sigmas [0.0]
            :attack-strategies [:a-rp :a-rpin :a-rpin-1]}))

(def rp-only-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/rp-only.edn")
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas [0.0]
            :attack-strategies [:a-rp]}))

## Run Experiments

Run the set of experiments twice to allow for JVM warmup (the second set of results will overwrite the first).

In [None]:
(run-masking-experiments cumulative-noise-configuration)
(run-masking-experiments independent-noise-configuration)
(run-masking-experiments rp-only-configuration)
;; Second run
(run-masking-experiments cumulative-noise-configuration)
(run-masking-experiments independent-noise-configuration)
(run-masking-experiments rp-only-configuration)

## Load and Extend Experiment Results

In [None]:
(def output-cumulative (load-data (:output-file cumulative-noise-configuration)))
(def results-cumulative (-> (filter #(contains? (set cumulative-noise-sigmas) (:cumulative-noise-sigma %))
                                    (:results output-cumulative))))
(def flat-results-cumulative (->> results-cumulative
                                  flatten-masking-experiment-recoveries
                                  (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))

In [None]:
(def output-independent (load-data (:output-file independent-noise-configuration)))
(def results-independent (-> (filter #(contains? (set independent-noise-sigmas) (:independent-noise-sigma %))
                                     (:results output-independent))))
(def flat-results-independent (->> results-independent
                                   flatten-masking-experiment-recoveries
                                   (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))

In [None]:
(def output-rp-only (load-data (:output-file rp-only-configuration)))
(def results-rp-only (-> (:results output-rp-only)))
(def flat-results-rp-only (->> results-rp-only
                               flatten-masking-experiment-recoveries
                               (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))

In [None]:
(def all-flat-results (concat (map #(assoc % :mask :cumulative-noise) flat-results-cumulative)
                              (map #(assoc % :mask :independent-noise) flat-results-independent)
                              (map #(assoc % :mask :rp-only) flat-results-rp-only)))

## Attack Timing Comparison

In [None]:
(grid-plot-results all-flat-results
                   :all :mask
                   (fn [[_ mask] plot-results]
                     (let [series (->> plot-results
                                       (group-by :strategy)
                                       (map-vals
                                        (fn [strategy-results]
                                          (->> strategy-results
                                               (group-by :known-record-count)
                                               (map-vals #(mean (map :mean-attempt-cpu-nanoseconds %)))
                                               (map-vals #(Math/log %))
                                               (sort-by first))))
                                       (sort-by first)
                                       (reverse))]
                         (doto (line-plot series :colours [(Color/decode "#7570b3")  (Color/decode "#d95f02") (Color/decode "#1b9e77")])
                           (.setShowLegend false)
                           (.setTitle (str mask))
                           (.setXLabel "Known records")
                           (.setYLabel (str "Mean CPU time (log-nanoseconds)")))))
                   :plot-width 400
                   :plot-height 400)