In [None]:
%load_ext autoreload 
%autoreload 2

In [None]:
import json
from privacypacking.utils.utils import load_logs, global_metrics
import pandas as pd
from experiments.ray.analysis import load_ray_experiment, load_latest_ray_experiment, load_latest_scheduling_results, load_latest_scheduling_results
import plotly.express as px
from privacypacking.budget.curves import  LaplaceCurve, GaussianCurve, SubsampledGaussianCurve
from privacypacking.budget import Budget, Task, Block
from privacypacking.schedulers.metrics import OverflowRelevance, FlatRelevance
from privacypacking.budget.block_selection import RandomBlocks
from privacypacking.utils.plot import plot_budgets
import yaml
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

In [None]:
df = load_latest_scheduling_results(alphas=True)

In [None]:
df.allocated.nunique()

In [None]:
maxeps = {}
for task_file in Path("/home/pierre/privacypacking/data/mixed_curves/tasks").glob("*.yaml"):
    task_dict = yaml.safe_load(task_file.open("r"))
    maxeps[f"{task_dict['rdp_epsilons'][-1]:.3f}"]=task_file.stem
maxeps

In [None]:
def get_task_name(s):
    n,m = s.split("-")
    return f"{n}-{maxeps[m]}"

In [None]:
df["task"] = df["nblocks_maxeps"].apply(get_task_name)

In [None]:
grouped = df[["id","task","allocated","metric"]].drop_duplicates(subset=['id', 'metric']).groupby(["task","metric"]).agg([np.sum, "count"])
grouped = grouped.reset_index()
grouped["n_allocated"] = grouped["allocated"]["sum"]
grouped["total"] = grouped["allocated"]["count"]
grouped = grouped.drop(["id", "allocated"], axis=1)
grouped["n_rejected"] = grouped["total"] - grouped["n_allocated"]
# grouped

In [None]:
grouped[["metric", "n_allocated"]].groupby("metric").sum().reset_index()

In [None]:
px.bar(grouped[["metric", "n_allocated"]].groupby("metric").sum().reset_index(), 
         x = "metric",
         y = "n_allocated",
         title = 'Total number of tasks allocated per scheduler', 
        # facet_col="metric",
        #     facet_col_wrap=2,
            # height=600,
            width=1000
             )

In [None]:
px.bar(grouped, 
         x = "task",
         y = ["n_allocated", "n_rejected"],
         title = 'Type of task allocated per scheduler', 
        facet_col="metric",
            facet_col_wrap=2,
            height=600,
             )

In [None]:
# Sanity check: verify that the workload is the same for all the schedulers for fair comparison

px.bar(
    df,
    # df.query("metric == 'BatchOverflowRelevance'"),
    x="blockid_alpha",
    y="normalized_epsilon",
    range_y=[0,20],
#     color="log_id",
    color="task",
    # barmode="group",
    # pattern_shape="allocated",
    facet_col="metric",
    facet_col_wrap=1,
    height=500,
    title="All demands per block and alpha (workload)"
#     animation_frame="id"
)

In [None]:
df.scheduling_time.nunique()

In [None]:
# df = df.sample(frac=1).reset_index(drop=True)

In [None]:
df.head()

In [None]:
df["task_and_id"] = df["task"] + "#" + df["id"].apply(str)

In [None]:
class LazyMap():
    def __getitem__(self, task_and_id):
        # print(task_and_id)
        # task_name = task_and_id.split("#")[0]
        # n_blocks, type_eps = task_name.split("-")
        # task_type, eps = type_eps.split("_")
        
        return [0.5, "rgb(165,0,38)"]
    
    def copy(self):
        return self
        
        

In [None]:
def get_color(task_and_id):
        # print(task_and_id)
        # task_name = task_and_id.split("#")[0]
        # n_blocks, type_eps = task_name.split("-")
        # task_type, eps = type_eps.split("_")
        
        return [0.5, "rgb(0,256,0)"]

In [None]:
cached_colors = {}
for task_and_id in df.query("allocated").task_and_id:
    cached_colors[task_and_id] = get_color(task_and_id) 

In [None]:
len(cached_colors)

In [None]:
px.bar(
    # df,
    # df.query("allocated and scheduling_time <= 15"),
    df.query("allocated").sort_values("creation_time"),
    x="blockid_alpha",
    y="normalized_epsilon",
    hover_name="id",
    range_y=[0,3],
    range_x = [0, 20 * 5],
#     color="log_id",
    color="task_and_id",
    # barmode="group",
    # pattern_shape="allocated",
    facet_col="metric",
    facet_col_wrap=1,
    height=1200,
    title="Allocated demands for each scheduler",
    color_discrete_map=cached_colors,
    # animation_frame="scheduling_time"
#     animation_frame="id"
)