In [None]:
%load_ext autoreload 
%autoreload 2

In [None]:
import json
from privacypacking.utils.utils import load_logs, global_metrics
import pandas as pd
from experiments.ray.analysis import load_tasks, load_ray_experiment, load_latest_ray_experiment, load_latest_scheduling_results, load_latest_scheduling_results, load_latest_ray_experiment, load_scheduling_queue
import plotly.express as px
from privacypacking.budget.curves import  LaplaceCurve, GaussianCurve, SubsampledGaussianCurve
from privacypacking.budget import Budget, Task, Block
from privacypacking.schedulers.metrics import OverflowRelevance, FlatRelevance
from privacypacking.budget.block_selection import RandomBlocks
from privacypacking.utils.plot import plot_budgets
import yaml
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

In [None]:
from omegaconf import OmegaConf
from pathlib import Path
from collections import defaultdict

In [None]:
block = Budget.from_epsilon_delta(epsilon=10, delta=1e-8)

In [None]:
block

In [None]:
def load_task_dir(path: str) -> pd.DataFrame:
    dict_list = defaultdict(list)
    for curve_file in Path(path).glob("*.yaml"):
        d = OmegaConf.load(curve_file)
        for alpha, epsilon in zip(d["alphas"], d["rdp_epsilons"]):
            if block.epsilon(alpha) > 0:
                dict_list["alphas"].append(alpha)
                dict_list["rdp_epsilons"].append(epsilon)
                dict_list["normalized_epsilons"].append(epsilon / block.epsilon(alpha))
                dict_list["task"].append(curve_file.name)
    return pd.DataFrame(dict_list)

In [None]:
def sigma_range(min=0.01, max=100) -> pd.DataFrame:
    dict_list = defaultdict(list)
    for sigma in np.geomspace(min, max, 30):
        # gaussian = GaussianCurve(sigma=sigma)
        curve = LaplaceCurve(laplace_noise=sigma)
        # d = OmegaConf.load(curve_file)
        for alpha, epsilon in zip(curve.alphas, curve.epsilons):
            if block.epsilon(alpha) > 0:
                dict_list["alphas"].append(alpha)
                dict_list["rdp_epsilons"].append(epsilon)
                dict_list["normalized_epsilons"].append(epsilon / block.epsilon(alpha))
                dict_list["task"].append(sigma)
    return pd.DataFrame(dict_list)

In [None]:
from autodp.mechanism_zoo import ExponentialMechanism, RandresponseMechanism, GaussianSVT_Mechanism
from privacypacking.budget.utils import ALPHAS

In [None]:
m = ExponentialMechanism(eps=1.0)

In [None]:
def autodp_range(min=0.1, max=100, mechanism=GaussianSVT_Mechanism) -> pd.DataFrame:
    dict_list = defaultdict(list)
    for eps in np.geomspace(min, max, 10):
        for k in np.arange(1,100, step=10):
            # gaussian = GaussianCurve(sigma=sigma)
            # curve = mechanism(eps=eps)
            # curve=mechanism(p=eps)
            curve=mechanism(params={"sigma": eps, "k": k, "c": 100}, rdp_c_1=False)
            # d = OmegaConf.load(curve_file)
            # for alpha, epsilon in zip(curve.alphas, curve.epsilons):
            for alpha in ALPHAS:
                epsilon = curve.get_RDP(alpha)
                if block.epsilon(alpha) > 0:
                    dict_list["alphas"].append(alpha)
                    dict_list["rdp_epsilons"].append(epsilon)
                    dict_list["normalized_epsilons"].append(epsilon / block.epsilon(alpha))
                    dict_list["task"].append((eps,k))
    return pd.DataFrame(dict_list)

In [None]:
def subsampled_range(min=0.01, max=100) -> pd.DataFrame:
    dict_list = defaultdict(list)
    for sigma in np.geomspace(min, max, 10):
        for sampling in np.geomspace(1e-8, 1, 10):
            # gaussian = GaussianCurve(sigma=sigma)
            curve = SubsampledGaussianCurve(sigma=sigma, sampling_probability=sampling, steps=1)
            # d = OmegaConf.load(curve_file)
            for alpha, epsilon in zip(curve.alphas, curve.epsilons):
                if block.epsilon(alpha) > 0:
                    dict_list["alphas"].append(alpha)
                    dict_list["rdp_epsilons"].append(epsilon)
                    dict_list["normalized_epsilons"].append(epsilon / block.epsilon(alpha))
                    dict_list["task"].append((sigma, sampling))
    return pd.DataFrame(dict_list)

In [None]:
# df = load_task_dir("/home/pierre/privacypacking/data/mixed_curves/tasks")
# df = load_task_dir("/home/pierre/privacypacking/data/privatekube_event_g0.0_l0.5_p=grid/tasks")
# df = sigma_range()
# df = subsampled_range()
df = autodp_range()

In [None]:
# df.groupby("task").agg({"normalized_epsilons": "min", "alphas": "first"})
indx = df.groupby('task')['normalized_epsilons'].idxmin()
best_alpha = df.loc[indx]

In [None]:
px.line(
    df,
    x="alphas",
    y="normalized_epsilons",
    color="task",
    log_y=True,
    log_x=True,
)

In [None]:
px.scatter(
    best_alpha,
    x="alphas",
    y="normalized_epsilons",
    color="task",
    log_y=True,
    log_x=True,
    title="Epsilon for the best alpha of each task",
)

In [None]:
curve_zoo = []
for sigma in np.geomspace(0.01, 10, 100):
# for sigma in np.linspace(0.01, 100, 100):

    gaussian = GaussianCurve(sigma=sigma)
    curve_zoo.append(LaplaceCurve(laplace_noise=sigma))
for sigma in np.geomspace(0.01, 10, 10):
# for sigma in np.linspace(0.01, 100, 100):

    for sampling in np.geomspace(1e-5, 1, 10):
        for steps in np.arange(1,100, step=50):
            curve_zoo.append(SubsampledGaussianCurve(sigma=sigma, sampling_probability=sampling, steps=steps))


In [None]:
def zoo_df(zoo: list) -> pd.DataFrame:
    dict_list = defaultdict(list)
    for index, curve in enumerate(zoo):
        for alpha, epsilon in zip(curve.alphas, curve.epsilons):
            if block.epsilon(alpha) > 0:
                dict_list["alphas"].append(alpha)
                dict_list["rdp_epsilons"].append(epsilon)
                dict_list["normalized_epsilons"].append(epsilon / block.epsilon(alpha))
                dict_list["task"].append(float(index))
    return pd.DataFrame(dict_list)

In [None]:
df = zoo_df(curve_zoo)
px.line(
    df,
    x="alphas",
    y="normalized_epsilons",
    color="task",
    log_y=True,
    log_x=True,
)

In [None]:
indx = df.groupby('task')['normalized_epsilons'].idxmin()
best_alpha = df.loc[indx]
px.scatter(
    best_alpha,
    x="alphas",
    y="normalized_epsilons",
    color="task",
    log_y=True,
    log_x=True,
    title="Epsilon for the best alpha of each task",
)

In [None]:
df["epsilon_min"] = df.groupby('task')['normalized_epsilons'].agg(min)
df["epsilon_max"] = df.groupby('task')['normalized_epsilons'].agg(max)
df["epsilon_range"] = df["epsilon_max"] - df["epsilon_min"]


In [None]:
px.histogram(
    df.query("epsilon_min < 1 and epsilon_max < 1"),
    x="epsilon_min",
    # nbins=100,
)

In [None]:
px.histogram(
    df.query("epsilon_min < 1 and epsilon_max < 1"),
    x="epsilon_max",
    # nbins=100,
)

In [None]:
px.histogram(
    df.query("epsilon_min < 1 and epsilon_max < 1"),
    x="epsilon_range",
    # nbins=100,
)

In [None]:
px.histogram(
    df.query("epsilon_min < 1"),
    x="epsilon_range",
    # nbins=100,
)