## Experiment Setup

### Algorithm
    - Scheduling time T: 1 (in block-time e.g once per day if block arrival is once per day)
    - Scheduling policy: FCFS
    - No budget unlocking: data-lifetime=0.1, n=1 (At time 0.1 block gets fully unlocked)       

### Workload
    - Total number of blocks: 400 (initial blocks=1)
    - Blocks arrival time: 1 (e.g. 1 day)
    - Tasks lifetime: 1 (block-time e.g. 1 day = after that the task can't run)
    - Budget demand (epsilon): 0.5 (fixed for all incoming tasks for now)
    - Block budget initial capacity: 10
    - Number of tasks: (See task arrival time below)
    - Number of blocks: 1,2,3,4,5,6,7,8,9,10,11,12 months
    - Number of distinct queries: 2
    - Query types: Count only

In [26]:
from privacypacking.utils.utils import load_logs, LOGS_PATH
import pandas as pd
from experiments.ray.analysis import load_ray_experiment
import plotly.express as px
from privacypacking.budget import Budget, Task, Block
from privacypacking.utils.plot import plot_budgets
import plotly.graph_objects as go
from plotly.offline import plot,iplot
from plotly.subplots import make_subplots
import numpy as np

tasks = pd.read_csv("/home/kelly/privacypacking/data/covid19/covid19_workload/privacy_tasks.csv")
tasks.reset_index()
tasks['id'] = tasks.index
# print(tasks)
px.line(
    tasks,
    x="id",
    y="submit_time",
    title="arrival time per task",
)

In [27]:
px.histogram(
    tasks,
    x="n_blocks",
    nbins=400,
    title="Requested numbers of Blocks",
)

In [21]:
def get_df(path):
    logs = LOGS_PATH.joinpath("ray/" + path)
    df = load_ray_experiment(logs)
    return df

def plot_profit(df):
    return px.line(
        df,
        x="planner",
        y="realized_profit",
        title="Total profit",
        range_y=[0, df['realized_profit'].max()+1000]
    )

def plot_tasks(df):
    return px.scatter(
        df,
        x="planner",
        y="n_allocated_tasks",
        title="Num allocated tasks",
        range_y=[0, df['n_allocated_tasks'].max()+1000]
    )


def plot_realized_budget(df):
    return px.scatter(
        df,
        x="planner",
        y="realized_budget",
        title="Total budget",
        range_y=[0, df['realized_budget'].max()+1000]
    )

def plot_bu(df, planner):
    print(f"For planner = {planner}: \n  Total Profit: {df['realized_profit'].values}")
    print(f"             \n  Num-allocated: {df['n_allocated_tasks'].values}")
    
    blocks = []
    for b in df['blocks'].values[0]: 
        blocks.append(pd.DataFrame([{'id': b['id'], 
                                    'initial_budget': b['initial_budget']['epsilon'],
                                    'budget': b['budget']['epsilon']
                                   }]))
    blocks_df = pd.concat(blocks)
    blocks_df['budget'] = 10 - blocks_df['budget']
    blocks_df
    p = px.bar(
        blocks_df,
        x="id",
        y="budget",
        range_y=[0,10],
        title=f"Budget Utilization for planner={planner}",
    )
    return p

    
def plot_bu_planner(df):
    dfs = []
    for (blocks, planner) in df[['blocks', 'planner']].values:
        blockslen = len(blocks)
        for block in blocks:
            dfs.append(pd.DataFrame([{"budget": block['budget']['epsilon'], "planner": planner,}]))
    df = pd.concat(dfs)
    df['budget'] = 10 - df['budget']
    df = df.groupby('planner')['budget'].mean().reset_index()
    p = px.scatter(
        df,
        x="planner",
        y="budget",
        range_y=[0, 10],
        title=f"Budget Utilization",
    )
    return p

def get_task_results(df):
    dfs = []
    for (tasks, planner) in df[['tasks', 'planner']].values:
        for task in tasks:
            dfs.append(pd.DataFrame([{"id": task['id'], 
                                      "result_no_planner_no_cache_dp": task['result_no_planner_no_cache_dp'], 
                                      "result_no_planner_no_cache_no_dp": task['result_no_planner_no_cache_no_dp'],
                                      "result_planner_cache_dp": task['result_planner_cache_dp'],
                                      "num_requested_blocks": task['num_blocks'],
                                      "planner": planner,
                                    }]))
    if dfs:
        df = pd.concat(dfs)
        df['result_no_planner_no_cache_dp'] = df['result_no_planner_no_cache_dp'].astype(float)
        df['result_no_planner_no_cache_no_dp'] = df['result_no_planner_no_cache_no_dp'].astype(float)
        df['result_planner_cache_dp'] = df['result_planner_cache_dp'].astype(float)
        df['error_no_planner_no_cache_dp'] = np.abs(df['result_no_planner_no_cache_dp']-
                                                    df['result_no_planner_no_cache_no_dp'])
        df['error_planner_cache_dp'] = np.abs(df['result_planner_cache_dp'] -
                                               df['result_no_planner_no_cache_no_dp'])
#         df = df.sort_values('num_requested_blocks')
        df = df.sort_values('error_no_planner_no_cache_dp')
        df.insert(0, 'newId', range(1, 1+len(df)))
        return df
    return None


def plot_results(df, planner):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(
        go.Scatter(x=df['newId'], 
                   y=df['result_no_planner_no_cache_no_dp'], 
                   marker=dict(color='green', size=14),
                   line=dict(color='black', width=10), 
                   mode='markers', 
                   name="NoDP NoPlanner NoCache"),
        secondary_y=True,
    )
    fig.add_trace(
        go.Scatter(x=df['newId'], 
                   y=df['result_no_planner_no_cache_dp'],
                   marker=dict(color='LightSkyBlue', size=10),
                   line=dict(color='black', width=10), 
                   mode='markers', 
                   name="DP NoPlanner NoCache"),
        secondary_y=True,
    )
    fig.add_trace(
        go.Scatter(x=df['newId'], 
                   y=df['result_planner_cache_dp'], 
                   marker=dict(color='red', size=6),
                   line=dict(color='black', width=10), 
                   mode='markers', 
                   name="DP Planner Cache"),
        secondary_y=True,
    )
    fig.update_layout(
        title_text=f"Results for planner={planner}"
    )
    fig.update_xaxes(title_text="task id")
    return fig

def plot_errors(df, planner):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(
        go.Scatter(x=df['newId'], 
                   y=df['error_no_planner_no_cache_dp'],
                   marker=dict(color='LightSkyBlue', size=10),
                   line=dict(color='black', width=10), 
                   mode='markers', 
                   name="DP NoPlanner NoCache"),
        secondary_y=True,
    )
    fig.add_trace(
        go.Scatter(x=df['newId'], 
                   y=df['error_planner_cache_dp'],
                   marker=dict(color='red', size=6),
                   line=dict(color='black', width=10), 
                   mode='markers', 
                   name="DP Planner Cache"),
        secondary_y=True,
    )
    fig.update_layout(
        title_text=f"Errors for planner={planner}"
    )
    fig.update_xaxes(title_text="task id")
    return fig


# def plot_errors_cdf(df):
#     df = get_errors_planner(df)
#     df['error'] = np.abs(df['result_no_planner_no_cache_dp']-df['result_planner_cache_dp'])
#     p = px.ecdf(
#         df,
#         x="error",
#         log_x=False,
#         color='planner',
#         range_x=[0, 1],
#         title=f"CDF of errors",
#     )
#     return p

def all_plots(path):
    df = get_df(path)
#     print(df['tasks'].values)
#     iplot(plot_profit(df))
#     iplot(plot_realized_budget(df))
#     iplot(plot_tasks(df))
#     iplot(plot_bu_planner(df))
#     iplot(plot_errors_cdf(df))
    for planner in ["PerBlockPlanner"]:
        dff = df.query(f"planner=='{planner}'")
        dff = get_task_results(dff)
#         iplot(plot_bu(dff, planner))
        iplot(plot_results(dff, planner))
        iplot(plot_errors(dff, planner))


In [22]:
# all_plots("run_and_report_2022-11-02_19-18-14/")
all_plots("run_and_report_2022-11-02_19-29-55/")