In [None]:
import json
from privacypacking.utils.utils import load_logs, global_metrics
import pandas as pd
from experiments.ray.analysis import load_ray_experiment, load_latest_ray_experiment, load_latest_scheduling_results, load_latest_scheduling_results, load_latest_ray_experiment
import plotly.express as px
from privacypacking.budget.curves import  LaplaceCurve, GaussianCurve, SubsampledGaussianCurve
from privacypacking.budget import Budget, Task, Block
from privacypacking.schedulers.metrics import OverflowRelevance, FlatRelevance
from privacypacking.budget.block_selection import RandomBlocks
from privacypacking.utils.plot import plot_budgets
import yaml
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from experiments.ray.analysis import get_percentiles

In [None]:
# m = 1000
# T from 0.01 to 50
# Seed 1
# Profit grid.
# Show profit and delay
# Unlocking, blocks.

rdf = load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-09_21-39-48"))
px.line(
    rdf.query("T <= 10").sort_values("T"),
    x="T",
    y="realized_profit",
    color="scheduler_metric",
    log_x=True,
    width=800,
    height=600,
    # range_y=[0,1000],
    title="Realized profit depending on the scheduling step size<br><sup>Online PrivateKube workload, 1000 tasks per block on average, lifetime = 10 blocks</sup>"
)

In [None]:
def map_metric_to_id(row):
    d = {
        "DominantShares": 0,
        "DynamicFlatRelevance": 1,
        "BatchOverflowRelevance":2,
        "SoftKnapsack":3

    }
    return d[row]

gnuplot_df = rdf.query("T <= 10 and scheduler_metric not in ['Fcfs']")
gnuplot_df["id"] = gnuplot_df.scheduler_metric.apply(map_metric_to_id)
gnuplot_df = gnuplot_df[["T", "realized_profit", "id", "n_tasks", "scheduler", "scheduler_metric"]].sort_values(["id","T"])
gnuplot_df.to_csv("/home/pierre/privacypacking-paper/results/privekube_workload/simulator_sk/privekube_workload_profits.csv", index=False)

In [None]:
rdf = load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-09_21-39-48"))
rdf.columns
rdf["avg_delay"] = rdf.allocated_tasks_scheduling_delays.apply(lambda x: np.mean(x))
rdf["p95"] = rdf.allocated_tasks_scheduling_delays.apply(lambda x: np.percentile(x, 95))

fig = px.line(
    rdf.query("T <= 10").sort_values(["T", "scheduler_metric"]),
    x="T",
    y="avg_delay",
    color="scheduler_metric",
    log_x=True,
    width=1200,
    height=1000,
    # range_y=[0,15_000],
    # title="10 + 20 blocks, privatekube, online. With and without profits.",
    facet_col="metric_recomputation_period",
    facet_row="data_path",
    facet_col_wrap=2,
    title="Average delay depending on the scheduling step size<br><sup>Online PrivateKube workload, 1000 tasks per block on average, lifetime = 10 blocks</sup>"

)
fig

In [None]:
def map_metric_to_id(row):
    d = {
        "DominantShares": 0,
        "DynamicFlatRelevance": 1,
        "BatchOverflowRelevance":2,
        "SoftKnapsack":3

    }
    return d[row]

gnuplot_df = rdf.query("T <= 10 and scheduler_metric not in ['Fcfs']")
gnuplot_df["id"] = gnuplot_df.scheduler_metric.apply(map_metric_to_id)
gnuplot_df = gnuplot_df[["T", "avg_delay", "id", "n_tasks", "scheduler", "scheduler_metric"]].sort_values(["id","T"])
gnuplot_df.to_csv("/home/pierre/privacypacking-paper/results/privekube_workload/simulator_sk/privekube_workload_profits_T_delay.csv", index=False)

In [None]:
# Online, increasing load. T = 1 fixed. Profits.
# Running on ds9 at 22:53
rdf = load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-09_22-52-48"))

dpf = load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-10_09-18-06")).query("data_path == 'privatekube_event_g0.0_l0.5_p=grid'")
dpf["scheduler_metric"] = "Original DPF"

rdf = pd.concat([rdf, dpf])

px.line(
    rdf.sort_values(["mean_task_per_block", "scheduler_metric"]),
    x="mean_task_per_block",
    y="realized_profit",
    color="scheduler_metric",
    # log_x=True,
    width=800,
    height=600,
    # range_y=[0,1200],
    facet_col="normalize_by",
    facet_col_wrap=1,
    title="Realized profit depending on the number of tasks<br><sup>Online PrivateKube workload, T = 1, lifetime = 10 blocks</sup>"

)

In [None]:
def map_metric_to_id(row):
    d = {
        "DominantShares": 0,
        "DynamicFlatRelevance": 1,
        "BatchOverflowRelevance":2,
        "SoftKnapsack":3,
        "Original DPF":4,
    }
    return d[row]

gnuplot_df = rdf.query("scheduler_metric not in ['Fcfs']")
gnuplot_df["id"] = gnuplot_df.scheduler_metric.apply(map_metric_to_id)
gnuplot_df = gnuplot_df[["mean_task_per_block", "realized_profit", "id", "n_tasks", "scheduler", "scheduler_metric"]].sort_values(["id","mean_task_per_block"])
gnuplot_df.to_csv("/home/pierre/privacypacking-paper/results/privekube_workload/simulator_sk/privekube_workload_profits_ntasks.csv", index=False)

In [None]:
# TODO: add delays too now (update DPF too)
# rdf = load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-09_22-52-48"))
rdf.columns
rdf["avg_delay"] = rdf.allocated_tasks_scheduling_delays.apply(lambda x: np.mean(x))
rdf["p95"] = rdf.allocated_tasks_scheduling_delays.apply(lambda x: np.percentile(x, 95))

fig = px.line(
    rdf.sort_values(["mean_task_per_block", "scheduler_metric"]),
    x="mean_task_per_block",
    y="avg_delay",
    color="scheduler_metric",
    # log_x=True,
    width=1200,
    height=1000,
    # range_y=[0,15_000],
    # title="10 + 20 blocks, privatekube, online. With and without profits.",
    facet_col="metric_recomputation_period",
    facet_row="data_path",
    facet_col_wrap=2,
    title="Average delay depending on the load size<br><sup>Online PrivateKube workload, 1000 tasks per block on average, lifetime = 10 blocks</sup>"

)
fig

In [None]:
def map_metric_to_id(row):
    d = {
        "DominantShares": 0,
        "DynamicFlatRelevance": 1,
        "BatchOverflowRelevance":2,
        "SoftKnapsack":3,
        "Original DPF":4,

    }
    return d[row]

gnuplot_df = rdf.query("scheduler_metric not in ['Fcfs']")
gnuplot_df["id"] = gnuplot_df.scheduler_metric.apply(map_metric_to_id)
gnuplot_df = gnuplot_df[["mean_task_per_block", "avg_delay", "id", "n_tasks", "scheduler", "scheduler_metric"]].sort_values(["id","mean_task_per_block"])
gnuplot_df.to_csv("/home/pierre/privacypacking-paper/results/privekube_workload/simulator_sk/privekube_workload_profits_ntasks_delay.csv", index=False)


In [None]:
load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-10_17-34-55")).mean_task_per_block.unique()

In [None]:
# TODO: add a line for DPF original (merge)
rdf = load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-10_09-18-06"))

# rdf = pd.concat([rdf, load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-10_17-34-55"))])

rdf["avg_delay"] = rdf.allocated_tasks_scheduling_delays.apply(lambda x: np.mean(x))
print(rdf.avg_delay.unique())
print(rdf.mean_task_per_block.unique())
print(rdf.data_path.unique())

# TODO: gather data from C2 too
#
px.line(
    rdf.sort_values(["mean_task_per_block", "scheduler_metric"]),
    x="mean_task_per_block",
    y="realized_profit",
    color="scheduler_metric",
    # log_x=True,
    width=800,
    height=600,
    # range_y=[0,1200],
    facet_col="data_path",
    facet_col_wrap=1,
    title="Realized profit depending on the number of tasks<br><sup>Online PrivateKube workload, T = 1, lifetime = 10 blocks</sup>"

)

In [None]:
# Online, increasing load. T = 1 fixed. No profits.
# Running on ds9 at 22:53
rdf = load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-09_22-55-05"))
rdf = rdf[rdf.scheduler_metric != "SoftKnapsack"]
dpf = load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-10_09-18-06")).query("data_path == 'privatekube_event_g0.0_l0.5_p=1'")

dpf["scheduler_metric"] = "Original DPF"

rdf = pd.concat([rdf, 
                 load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-10_12-26-26")),
                 dpf,
                 ])

px.line(
    rdf.sort_values(["mean_task_per_block", "scheduler_metric"]),
    x="mean_task_per_block",
    y="n_allocated_tasks",
    color="scheduler_metric",
    # log_x=True,
    width=800,
    height=600,
    # range_y=[0,1200],
    facet_col="normalize_by",
    facet_col_wrap=1,
    title="Number of allocated tasks (no profits) depending on the numver of tasks<br><sup>Online PrivateKube workload,  T = 1, lifetime = 10 blocks</sup>"

)

In [None]:
def map_metric_to_id(row):
    d = {
        "DominantShares": 0,
        "DynamicFlatRelevance": 1,
        "BatchOverflowRelevance":2,
        "SoftKnapsack":3,
        "Original DPF":4,
    }
    return d[row]

gnuplot_df = rdf.query("scheduler_metric not in ['Fcfs']")
gnuplot_df["id"] = gnuplot_df.scheduler_metric.apply(map_metric_to_id)
gnuplot_df = gnuplot_df[["mean_task_per_block", "realized_profit", "id", "n_tasks", "scheduler", "scheduler_metric"]].sort_values(["id","mean_task_per_block"])
gnuplot_df.to_csv("/home/pierre/privacypacking-paper/results/privekube_workload/simulator_sk/privekube_workload_noprofits_ntasks.csv", index=False)

In [None]:
rdf.columns
rdf["avg_delay"] = rdf.allocated_tasks_scheduling_delays.apply(lambda x: np.mean(x))
rdf["p95"] = rdf.allocated_tasks_scheduling_delays.apply(lambda x: np.percentile(x, 95))

fig = px.line(
    rdf.sort_values(["mean_task_per_block", "scheduler_metric"]),
    x="mean_task_per_block",
    y="avg_delay",
    color="scheduler_metric",
    # log_x=True,
    width=1200,
    height=1000,
    # range_y=[0,15_000],
    # title="10 + 20 blocks, privatekube, online. With and without profits.",
    facet_col="metric_recomputation_period",
    facet_row="data_path",
    facet_col_wrap=2,
    title="Average delay depending on the load size<br><sup>Online PrivateKube workload, 1000 tasks per block on average, lifetime = 10 blocks</sup>"

)
fig

In [None]:
def map_metric_to_id(row):
    d = {
        "DominantShares": 0,
        "DynamicFlatRelevance": 1,
        "BatchOverflowRelevance":2,
        "SoftKnapsack":3,
        "Original DPF":4,

    }
    return d[row]

gnuplot_df = rdf.query("scheduler_metric not in ['Fcfs']")
gnuplot_df["id"] = gnuplot_df.scheduler_metric.apply(map_metric_to_id)
gnuplot_df = gnuplot_df[["mean_task_per_block", "avg_delay", "id", "n_tasks", "scheduler", "scheduler_metric"]].sort_values(["id","mean_task_per_block"])
gnuplot_df.to_csv("/home/pierre/privacypacking-paper/results/privekube_workload/simulator_sk/privekube_workload_noprofits_ntasks_delay.csv", index=False)


In [None]:
# Temperature tuning
rdf = load_ray_experiment(Path("/home/pierre/privacypacking/logs/ray/run_and_report_2021-12-10_09-11-01"))
px.line(
    rdf.sort_values(["mean_task_per_block", "temperature"]),
    x="mean_task_per_block",
    y="n_allocated_tasks",
    color="temperature",
    # log_x=True,
    width=800,
    height=600,
    # range_y=[0,1200],
    facet_col="normalize_by",
    facet_col_wrap=1,
    title="Number of allocated tasks (no profits) depending on the scheduling step size<br><sup>Online PrivateKube workload,  T = 1, lifetime = 10 blocks</sup>"

)
