In [73]:
import pandas as pd
import numpy as np
from privacypacking.utils.utils import LOGS_PATH
from experiments.ray.analysis import load_ray_experiment
import plotly.express as px

In [87]:
logs = LOGS_PATH.joinpath("ray/run_and_report_2021-10-03_22-31-54")
df = load_ray_experiment(logs)

In [22]:
df.keys()

Index(['scheduler', 'scheduler_N', 'scheduler_metric', 'frequency_file',
       'n_allocated_tasks', 'total_tasks', 'realized_profit', 'n_tasks',
       'maximum_profit', 'scheduling_time', 'task_scheduling_times',
       'time_this_iter_s', 'done', 'timesteps_total', 'episodes_total',
       'training_iteration', 'experiment_id', 'date', 'timestamp',
       'time_total_s', 'pid', 'hostname', 'node_ip', 'config',
       'time_since_restore', 'timesteps_since_restore',
       'iterations_since_restore', 'trial_id'],
      dtype='object')

 ### Mice and Elephants example
 
- Both types of tasks might ask 1 block with 0.75 probability and 10 blocks with 0.25
- Block Selection Policy: Latest First
- Tasks arrive following a Poisson distribution
- A block arrives every 1 second
- The experiment lasts for 10 seconds

#### DPF - BudgetUnlocking + Dominant_Shares
- Experiment using different values of N and different percentages of mice tasks



In [89]:
q1 = df.query("scheduler_metric == 'dominant_shares'").sort_values(["frequency_file", "scheduler_N"])
px.line(q1, x="frequency_file", y="n_allocated_tasks", color="scheduler_N")

# q2 = df.query("scheduler_metric == 'dominant_shares' and scheduler_N == 120 and frequency_file == 'mice_80.yaml'").sort_values(["task_scheduling_times"])
# x = pd.DataFrame(q2["task_scheduling_times"])
# x = np.array(x.values[0][0])
# x1 = x[np.where(x <= 0.001)]
# x2 = x[np.where(x <= 0.01)]
# x3 = x[np.where(x <= 0.1)]
# x4 = x[np.where(x <= 1)]

# px.bar(q2, x=[0.001, 0.01, 0.1, 1], y=[len(x1), len(x2), len(x3), len(x4)])

#### BudgetUnlocking - Dominant_Shares vs FlatRelevance
- N = 120

In [108]:
q = df.query("(scheduler_metric == 'dominant_shares' or scheduler_metric == 'flat_relevance') and scheduler_N == 120").sort_values(["frequency_file"])
px.line(q, x="frequency_file", y="n_allocated_tasks", color='scheduler_metric')

#### BudgetUnlocking + Dominant_Shares vs ThresholdUpdating (NaiveAverage) + FlatRelevance
- N = 120

In [132]:
logs = LOGS_PATH.joinpath("ray/run_and_report_2021-10-03_23-52-45")
df = load_ray_experiment(logs)
q1 = df.sort_values(["frequency_file"]).sort_values(["frequency_file"])

X = pd.concat([q,q1]).sort_values(["frequency_file"])
X['color'] = X['scheduler'] + "_" + X['scheduler_metric']
px.line(X, x="frequency_file", y="n_allocated_tasks", color='color')

### Questions:
- Why is dominant_shares and flat_relevance identical in BudgetUnlocking? Can we find an example where they differ?
- Naive average threshold updating is unstable when big tasks and small tasks come with around the same probability. Better threshold mechanisms?


### Multiblock DPF Killer example

In [144]:
logs = LOGS_PATH.joinpath("ray/run_and_report_2021-10-04_00-20-36")
df = load_ray_experiment(logs)
q1 = df.sort_values(["scheduler_N"])
px.scatter(q1, x="scheduler_N", y="n_allocated_tasks", color="scheduler_metric")

In [198]:
logs = LOGS_PATH.joinpath("ray/run_and_report_2021-10-04_00-29-08")
df1 = load_ray_experiment(logs)
df1['scheduler_N'] = 0
df2 = df.query("scheduler_N == 12")
X = pd.concat([df1,df2])
X = X.astype({'scheduler_N': 'str'})
X['color'] = X['scheduler'] + "_" + X['scheduler_metric'] + "_" + X['scheduler_N']
px.scatter(X, x="scheduler_N", y="n_allocated_tasks", color="color")