In [169]:
%load_ext autoreload 
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [170]:
import json
from privacypacking.utils.utils import load_logs, global_metrics
import pandas as pd
from experiments.ray.analysis import load_ray_experiment, load_latest_ray_experiment, load_latest_scheduling_results, load_latest_scheduling_results
import plotly.express as px
from privacypacking.budget.curves import  LaplaceCurve, GaussianCurve, SubsampledGaussianCurve
from privacypacking.budget import Budget, Task, Block
from privacypacking.schedulers.metrics import OverflowRelevance, FlatRelevance
from privacypacking.budget.block_selection import RandomBlocks
from privacypacking.utils.plot import plot_budgets
import yaml
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

In [171]:
df = load_latest_scheduling_results(alphas=True)

/home/pierre/privacypacking/logs/exp_1119-103558/time_based_budget_unlocking_DominantShares/1119-103603_c2f0f5.json
/home/pierre/privacypacking/logs/exp_1119-103558/time_based_budget_unlocking_BatchOverflowRelevance/1119-103602_af29ea.json
/home/pierre/privacypacking/logs/exp_1119-103558/time_based_budget_unlocking_DynamicFlatRelevance/1119-103603_30e8dc.json
/home/pierre/privacypacking/logs/exp_1119-103558/time_based_budget_unlocking_FlatRelevance/1119-103603_b52fdf.json
/home/pierre/privacypacking/logs/exp_1119-103558/time_based_budget_unlocking_Fcfs/1119-103603_076d2b.json


In [172]:
maxeps = {}
for task_file in Path("/home/pierre/privacypacking/data/mixed_curves_large/tasks").glob("*.yaml"):
    task_dict = yaml.safe_load(task_file.open("r"))
    maxeps[f"{task_dict['rdp_epsilons'][-1]:.3f}"]=task_file.stem
maxeps

{'34.083': 'gaussian_5.0',
 '12.270': 'gaussian_3.0',
 '1.989': 'laplace_2.0',
 '5.453': 'gaussian_2.0',
 '1611676.972': 'subsampled_gaussian_5.0',
 '2.989': 'laplace_3.0',
 '679315.804': 'subsampled_gaussian_2.0',
 '3.989': 'laplace_4.0',
 '1085676.505': 'subsampled_gaussian_3.0',
 '4.989': 'laplace_5.0',
 '1385079.754': 'subsampled_gaussian_4.0',
 '21.813': 'gaussian_4.0'}

In [173]:
def get_task_name(s):
    n,m = s.split("-")
    return f"{n}-{maxeps[m]}"

In [174]:
df["task"] = df["nblocks_maxeps"].apply(get_task_name)

In [175]:
grouped = df[["id","task","allocated","metric"]].drop_duplicates(subset=['id', 'metric']).groupby(["task","metric"]).agg([np.sum, "count"])
grouped = grouped.reset_index()
grouped["n_allocated"] = grouped["allocated"]["sum"]
grouped["total"] = grouped["allocated"]["count"]
grouped = grouped.drop(["id", "allocated"], axis=1)
grouped["n_rejected"] = grouped["total"] - grouped["n_allocated"]
# grouped


dropping on a non-lexsorted multi-index without a level parameter may impact performance.



In [176]:
grouped[["metric", "n_allocated"]].groupby("metric").sum().reset_index()

Unnamed: 0,metric,n_allocated
,,
0.0,BatchOverflowRelevance,120.0
1.0,DominantShares,127.0
2.0,DynamicFlatRelevance,120.0
3.0,Fcfs,71.0
4.0,FlatRelevance,141.0


In [177]:
px.bar(grouped[["metric", "n_allocated"]].groupby("metric").sum().reset_index(), 
         x = "metric",
         y = "n_allocated",
         title = 'Total number of tasks allocated per scheduler', 
        # facet_col="metric",
        #     facet_col_wrap=2,
            # height=600,
            width=1000
             )

In [178]:
px.bar(grouped, 
         x = "task",
         y = ["n_allocated", "n_rejected"],
         title = 'Type of task allocated per scheduler', 
        facet_col="metric",
            facet_col_wrap=2,
            height=600,
             )

In [179]:
# Sanity check: verify that the workload is the same for all the schedulers for fair comparison

px.bar(
    df,
    # df.query("metric == 'BatchOverflowRelevance'"),
    x="blockid_alpha",
    y="normalized_epsilon",
    range_y=[0,20],
#     color="log_id",
    color="task",
    # barmode="group",
    # pattern_shape="allocated",
    facet_col="metric",
    facet_col_wrap=1,
    height=500,
    title="All demands per block and alpha (workload)"
#     animation_frame="id"
)

In [180]:
df.head()

Unnamed: 0,alpha,blockid_alpha,epsilon,normalized_epsilon,id,hashed_id,allocated,scheduler,total_blocks,n_blocks,creation_time,scheduling_time,scheduling_delay,block,block_selection,totalblocks_scheduler_selection,metric,nblocks_maxeps,task
25635,0,000-00,0.0,0.0,1,66,True,time_based_budget_unlocking,30,1,0.021552,10.0,9.978448,0,LatestBlocksFirst,30-time_based_budget_unlocking-LatestBlocksFirst,BatchOverflowRelevance,1-1.989,1-laplace_2.0
38730,0,000-00,0.0,0.0,1,66,True,time_based_budget_unlocking,30,1,0.021552,10.0,9.978448,0,LatestBlocksFirst,30-time_based_budget_unlocking-LatestBlocksFirst,DynamicFlatRelevance,1-1.989,1-laplace_2.0
0,0,000-00,0.0,0.0,1,66,False,time_based_budget_unlocking,30,1,0.021552,,,0,LatestBlocksFirst,30-time_based_budget_unlocking-LatestBlocksFirst,DominantShares,1-1.989,1-laplace_2.0
39285,0,000-00,0.0,0.0,1,66,False,time_based_budget_unlocking,30,1,0.021552,,,0,LatestBlocksFirst,30-time_based_budget_unlocking-LatestBlocksFirst,FlatRelevance,1-1.989,1-laplace_2.0
52380,0,000-00,0.0,0.0,1,66,False,time_based_budget_unlocking,30,1,0.021552,,,0,LatestBlocksFirst,30-time_based_budget_unlocking-LatestBlocksFirst,Fcfs,1-1.989,1-laplace_2.0


In [182]:
px.bar(
    # df,
    df.query("allocated"),
    x="blockid_alpha",
    y="normalized_epsilon",
    range_y=[0,3],
#     color="log_id",
    color="task",
    # barmode="group",
    # pattern_shape="allocated",
    facet_col="metric",
    facet_col_wrap=1,
    height=1200,
    title="Allocated demands for each scheduler",
    animation_frame="scheduling_time"
#     animation_frame="id"
)