## Experiment Setup

### Algorithm
    - Scheduling time T: 1 (in block-time e.g once per day if block arrival is once per day)
    - Scheduling policy: FCFS
    - No budget unlocking: data-lifetime=0.1, n=1 (At time 0.1 block gets fully unlocked)       

### Workload
    - Total number of blocks: 400 (initial blocks=1)
    - Blocks arrival time: 1 (e.g. 1 day)
    - Tasks lifetime: 1 (block-time e.g. 1 day = after that the task can't run)
    - Budget demand (epsilon): 0.5 (fixed for all incoming tasks for now)
    - Block budget initial capacity: 10
    - Number of tasks: (See task arrival time below)
    - Number of blocks: 1,2,3,4,5,6,7,8,9,10,11,12 months
    - Number of distinct queries: 2
    - Query types: Count only

In [None]:
from privacypacking.utils.utils import load_logs, LOGS_PATH
import pandas as pd
from experiments.ray.analysis import load_ray_experiment
import plotly.express as px
from privacypacking.budget import Budget, Task, Block
from privacypacking.utils.plot import plot_budgets
import plotly.graph_objects as go
from plotly.offline import plot,iplot
from plotly.subplots import make_subplots
import numpy as np

tasks = pd.read_csv("/home/kelly/privacypacking/data/covid19/covid19_workload/privacy_tasks.csv")
tasks.reset_index()
tasks['id'] = tasks.index
# print(tasks)

fig = px.histogram(
    tasks,
    x="submit_time",
    nbins=400,
    title="arrival time",
)
fig.update_layout(bargap=0.2)
fig.show()


In [None]:
fig = px.histogram(
    tasks,
    x="n_blocks",
    nbins=400,
    title="Requested numbers of Blocks",
)
fig.update_layout(bargap=0.2)
fig.show()


In [None]:
def get_df(path):
    logs = LOGS_PATH.joinpath("ray/" + path)
    df = load_ray_experiment(logs)
    return df

def plot_profit(df):
    return px.line(
        df,
        x="planner",
        y="realized_profit",
        title="Total profit",
        range_y=[0, df['realized_profit'].max()+1000]
    )

def plot_num_allocated_tasks(df):
    total_tasks = df['total_tasks'].max()
    return px.scatter(
        df,
        x="key",
        y="n_allocated_tasks",
        color='key',
        title=f"Num allocated tasks - total tasks={total_tasks}",
    )

def plot_errors(df):
    return px.scatter(
        df,
        x="num_requested_blocks",
        y="error",
        color='key',
        title="Error wrt to the ground truth",
    )

def plot_results(df):
    return px.scatter(
        df,
        x="num_requested_blocks",
        y="result",
        color='key',
        title="Result",
    )

def plot_planning_time(df):
    return px.scatter(
        df,
        x="num_requested_blocks",
        y="planning_time",
        color='key',
        title="Planning time",
    )

def plot_bu(df, planner):
    print(f"For planner = {planner}: \n  Total Profit: {df['realized_profit'].values}")
    print(f"             \n  Num-allocated: {df['n_allocated_tasks'].values}")
    
    blocks = []
    for b in df['blocks'].values[0]: 
        blocks.append(pd.DataFrame([{'id': b['id'], 
                                    'initial_budget': b['initial_budget']['epsilon'],
                                    'budget': b['budget']['epsilon']
                                   }]))
    blocks_df = pd.concat(blocks)
    blocks_df['budget'] = 10 - blocks_df['budget']
    blocks_df
    p = px.bar(
        blocks_df,
        x="id",
        y="budget",
        range_y=[0,10],
        title=f"Budget Utilization for planner={planner}",
    )
    return p

    
def plot_bu_planner(df):
    dfs = []
    for (blocks, planner) in df[['blocks', 'planner']].values:
        blockslen = len(blocks)
        for block in blocks:
            dfs.append(pd.DataFrame([{"budget": block['budget']['epsilon'], "planner": planner,}]))
    df = pd.concat(dfs)
    df['budget'] = 10 - df['budget']
    df = df.groupby('planner')['budget'].mean().reset_index()
    p = px.scatter(
        df,
        x="planner",
        y="budget",
        range_y=[0, 10],
        title=f"Budget Utilization",
    )
    return p

def get_tasks_information(df):
    dfs = []
    for (tasks, key) in df[['tasks', 'key']].values:
        for task in tasks:
            dfs.append(pd.DataFrame([{"id": task['id'], 
                                      "result": task['result'], 
                                      "error": task['error'],
                                      "planning_time": task['planning_time'],
                                      "num_requested_blocks": task['num_blocks'],
                                      "key": key,
                                    }]))
    if dfs:
        df = pd.concat(dfs)
        df['result'] = df['result'].astype(float)
        df['error'] = df['error'].astype(float)
        df['planning_time'] = df['planning_time'].astype(float)
        return df
    return None

def all_plots(path):
    df = get_df(path)
#     print(df['tasks'].values)
#     iplot(plot_profit(df))
#     iplot(plot_realized_budget(df))
    iplot(plot_num_allocated_tasks(df))


In [None]:
df = get_df("run_and_report_2022-11-09_17-25-58")
df['key'] = df['planner'] #+ " " + df['cache']
df.drop(columns=['planner', 'cache'], inplace=True)

metrics = df[['key', 'n_allocated_tasks', 'total_tasks']]
df = df[['key', 'tasks']]

tasks = get_tasks_information(df)
tasks = tasks.groupby(['id', 'key', 'num_requested_blocks']).mean().reset_index()

iplot(plot_num_allocated_tasks(metrics))
iplot(plot_errors(tasks))
iplot(plot_planning_time(tasks))
iplot(plot_results(tasks))


    