In [None]:
%%bash
lein uberjar

In [None]:
 %classpath add jar ../target/jvm/uberjar/ppdsp-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.string :as string]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [save-data load-data display-table
                                   display-masking-error-plots run-masking-experiments
                                   attack-strategy-comparison-plots
                                   noise-accuracy-plot accuracy-privacy-tradeoff-comparison
                                   display-html accuracy-privacy-tradeoff-legend
                                   accuracy-privacy-tradeoff attack-strategy-comparison
                                   round-known-record-counts]]
         '[ppdsp.classifier.moa-classifier :refer [adaptive-random-forest]]
         '[ppdsp.dataset.base :refer [dataset-feature-count dataset-record-count]]
         '[ppdsp.dataset.csv-dataset :refer [read-csv-dataset]]
         '[ppdsp.masking.evaluation :refer [flatten-masking-experiment-recoveries
                                           unknown-record-relative-position
                                           add-combined-result
                                           get-cumulative-noise-sigma]]
         '[ppdsp.utils :refer [map-vals mean]])

## Experiment Configuration

In [None]:
(def dataset-label "rbf-f")
(def dataset (read-csv-dataset "datasets/moa-generators/rbf-f.csv"
                                99999999 ;; All records
                                ;; Numeric features
                                (into [] (map #(str "att" %) (range 1 11)))))

In [None]:
(def epsilons [0.1 0.2])

(def feature-count (dataset-feature-count dataset))
(def record-count (dataset-record-count dataset))

(def independent-noise-sigmas [0.05 0.1 0.25])
(def cumulative-noise-sigmas (map #(get-cumulative-noise-sigma % record-count) independent-noise-sigmas))

(def base-configuration
    {:dataset dataset
     :projection-feature-counts [feature-count]
     :projection-sigmas [1.0]
     :translations [0]
     :known-record-counts (distinct [(dec feature-count) (int (Math/ceil (/ feature-count 2))) 1])
     :known-record-ranges [1]
     :classifier-fns {:arf adaptive-random-forest}
     :attack-count 500
     :attempt-count 3
     :threads-per-configuration 4
     :threads-per-evaluation 1
     :seed 1
     :evaluations [:privacy :accuracy]})

(def cumulative-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/cumulative.edn")
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas cumulative-noise-sigmas
            :attack-strategies [:a-rp :a-rpcn :a-rpcn-1]}))

(def independent-noise-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/independent.edn")
            :independent-noise-sigmas independent-noise-sigmas
            :cumulative-noise-sigmas [0.0]
            :attack-strategies [:a-rp :a-rpin :a-rpin-1]}))

(def rp-only-configuration
    (merge base-configuration
           {:output-file (str "workspace/" dataset-label "/rp-only.edn")
            :independent-noise-sigmas [0.0]
            :cumulative-noise-sigmas [0.0]
            :attack-strategies [:a-rp]}))

## Run Experiments

In [None]:
(run-masking-experiments cumulative-noise-configuration)

In [None]:
(run-masking-experiments independent-noise-configuration)

In [None]:
(run-masking-experiments rp-only-configuration)

## Load and Extend Experiment Results

In [None]:
(def output-cumulative (load-data (:output-file cumulative-noise-configuration)))
(def original-accuracy-cumulative (-> output-cumulative :original :accuracy))
(def results-cumulative (-> (filter #(contains? (set cumulative-noise-sigmas) (:cumulative-noise-sigma %))
                                    (:results output-cumulative))
                            (add-combined-result :score [:a-rp :a-rpcn])
                            (add-combined-result :score [:a-rp :a-rpcn-1])))
(def flat-results-cumulative (->> results-cumulative
                                  flatten-masking-experiment-recoveries
                                  (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))

In [None]:
(def output-independent (load-data (:output-file independent-noise-configuration)))
(def original-accuracy-independent (-> output-independent :original :accuracy))
(def results-independent (-> (filter #(contains? (set independent-noise-sigmas) (:independent-noise-sigma %))
                                     (:results output-independent))
                             (add-combined-result :score [:a-rp :a-rpin])
                             (add-combined-result :score [:a-rp :a-rpin-1])))
(def flat-results-independent (->> results-independent
                                   flatten-masking-experiment-recoveries
                                   (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))

In [None]:
(def output-rp-only (load-data (:output-file rp-only-configuration)))
(def original-accuracy-rp-only (-> output-rp-only :original :accuracy))
(def results-rp-only (-> (:results output-rp-only)))
(def flat-results-rp-only (->> results-rp-only
                               flatten-masking-experiment-recoveries
                               (map #(assoc % :unknown-record-relative-position (unknown-record-relative-position %)))))

## Comparison of Attack Strategies

In [None]:
(doseq [epsilon epsilons]
    (display-html (str "<h3>Epsilon = " epsilon "</h3>"))
    (.display (attack-strategy-comparison-plots flat-results-cumulative :all epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? true))
    (display-html (str "<h4>Breakdown by noise amount</h4>"))
    (.display (attack-strategy-comparison-plots flat-results-cumulative :cumulative-noise-sigma epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? false)))

In [None]:
(doseq [epsilon epsilons]
    (display-html (str "<h3>Epsilon = " epsilon "</h3>"))
    (.display (attack-strategy-comparison-plots flat-results-independent :all epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? true))
    (display-html (str "<h4>Breakdown by noise amount</h4>"))
    (.display (attack-strategy-comparison-plots flat-results-independent :independent-noise-sigma epsilon
                                                :plot-width 400
                                                :plot-height 400
                                                :show-legend? false)))

In [None]:
(let [comparison (attack-strategy-comparison flat-results-cumulative epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/cumulative-attack-strategies-comparison.edn") comparison)
    (display-table comparison))

In [None]:
(let [comparison (attack-strategy-comparison flat-results-independent epsilons
                                  :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/independent-attack-strategies-comparison.edn") comparison)
    (display-table comparison))

In [None]:
(def best-cumulative-attack-strategy :a-rpcn-1)
(def best-independent-attack-strategy :a-rpin-1)

## Cumulative vs. Independent Noise

In [None]:
(display-html (accuracy-privacy-tradeoff-legend independent-noise-sigmas cumulative-noise-sigmas))
nil

In [None]:
(doseq [epsilon epsilons]
    (.display (accuracy-privacy-tradeoff-comparison results-cumulative results-independent results-rp-only
                                                    :arf best-cumulative-attack-strategy best-independent-attack-strategy epsilon
                                                    :plot-width 400
                                                    :plot-height 400)))

### Comparison of Square Distance From Origin

Performance is the sum of the squares of: (1) Prob. of e-privacy breach and (2) classification error.

In [None]:
(let [comparison (accuracy-privacy-tradeoff results-cumulative results-independent results-rp-only
                               :arf best-cumulative-attack-strategy best-independent-attack-strategy epsilons
                               :square-distance? true
                               :row-per-noise-level? true
                               :known-record-count (apply max (:known-record-counts base-configuration)))]
    (save-data (str "workspace/" dataset-label "/mask-comparison.edn") comparison)
    (display-table comparison))

## Effect of Cumulative Noise on Accuracy over Time

In [None]:
(noise-accuracy-plot results-cumulative results-independent results-rp-only :arf
                     :init-width 800
                     :init-height 400)

## Effect of Cumulative Noise on Privacy over Time

In [None]:
(display-masking-error-plots (->> flat-results-cumulative
                                  (filter #(= best-cumulative-attack-strategy (:strategy %)))
                                  (filter #(= (apply max cumulative-noise-sigmas) (:cumulative-noise-sigma %)))
                                  (filter #(= (apply max (:known-record-counts base-configuration)) (:known-record-count %))))
                             :cumulative-noise-sigma :known-record-count :strategy
                             :plot-width 500
                             :plot-height 500)