In [None]:
%load_ext autoreload 
%autoreload 2

In [None]:
import json
from privacypacking.utils.utils import load_logs, global_metrics
import pandas as pd
from experiments.ray.analysis import load_tasks, load_ray_experiment, load_latest_ray_experiment, load_latest_scheduling_results, load_latest_scheduling_results, load_latest_ray_experiment, load_scheduling_queue
import plotly.express as px
from privacypacking.budget.curves import  LaplaceCurve, GaussianCurve, SubsampledGaussianCurve
from privacypacking.budget import Budget, Task, Block
from privacypacking.schedulers.metrics import OverflowRelevance, FlatRelevance
from privacypacking.budget.block_selection import RandomBlocks
from privacypacking.utils.plot import plot_budgets
import yaml
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

In [None]:
from omegaconf import OmegaConf
from pathlib import Path
from collections import defaultdict

In [None]:
block = Budget.from_epsilon_delta(epsilon=10, delta=1e-5)

In [None]:
block

In [None]:
def load_task_dir(path: str) -> pd.DataFrame:
    dict_list = defaultdict(list)
    for curve_file in Path(path).glob("*.yaml"):
        d = OmegaConf.load(curve_file)
        for alpha, epsilon in zip(d["alphas"], d["rdp_epsilons"]):
            if block.epsilon(alpha) > 0:
                dict_list["alphas"].append(alpha)
                dict_list["rdp_epsilons"].append(epsilon)
                dict_list["normalized_epsilons"].append(epsilon / block.epsilon(alpha))
                dict_list["task"].append(curve_file.name)
    return pd.DataFrame(dict_list)

In [None]:
def sigma_range() -> pd.DataFrame:
    dict_list = defaultdict(list)
    for sigma in np.linspace(0.000001, 10, 30):
        gaussian = GaussianCurve(sigma=sigma)
        # d = OmegaConf.load(curve_file)
        for alpha, epsilon in zip(gaussian.alphas, gaussian.epsilons):
            if block.epsilon(alpha) > 0:
                dict_list["alphas"].append(alpha)
                dict_list["rdp_epsilons"].append(epsilon)
                dict_list["normalized_epsilons"].append(epsilon / block.epsilon(alpha))
                dict_list["task"].append(sigma)
    return pd.DataFrame(dict_list)

In [None]:
# df = load_task_dir("/home/pierre/privacypacking/data/mixed_curves/tasks")
# df = load_task_dir("/home/pierre/privacypacking/data/privatekube_event_g0.0_l0.5_p=grid/tasks")
df = sigma_range()

In [None]:
# df.groupby("task").agg({"normalized_epsilons": "min", "alphas": "first"})
indx = df.groupby('task')['normalized_epsilons'].idxmin()
best_alpha = df.loc[indx]

In [None]:
px.line(
    df,
    x="alphas",
    y="normalized_epsilons",
    color="task",
    log_y=True,
    log_x=True,
)

In [None]:
px.scatter(
    best_alpha,
    x="alphas",
    y="normalized_epsilons",
    color="task",
    log_y=True,
    log_x=True,
    title="Epsilon for the best alpha of each task",
)