In [143]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [144]:
import mlflow
import pandas as pd

from cookiemonster.utils import LOGS_PATH

In [145]:
from mlflow.tracking import MlflowClient

tracking_uri = str(LOGS_PATH.joinpath("mlflow"))

mlflow.set_tracking_uri(tracking_uri)
client = MlflowClient(
    tracking_uri=tracking_uri
)

In [146]:

# experiment_name="bias_detection_09-09_11-39"
# experiment_name="bias_detection_09-09_18-06" # Shuffled?
# experiment_name="bias_detection_09-10_11-01"
# experiment_name="bias_detection_09-10_11-14" # 60. Good but CM has small bias.
# experiment_name="bias_detection_09-10_11-27" # 45. Good
# experiment_name="bias_detection_09-10_11-38" # 120 Barely more than IPA (more overshooting?) and starts to go in steady state, crossing ARA. But CM itself is not much better than IPA.
# experiment_name="bias_detection_09-10_16-17" # 120 with more schedules, 200. Just higher error in steady state. Better strategy would be to wait.

# experiment_name="bias_detection_09-11_15-55" # 60, fixed sensitivity.
# experiment_name="bias_detection_09-11_18-17" # 60, 400 queries
# experiment_name="bias_detection_09-11_20-46" # 60, 250 queries

experiment_name="bias_detection_09-12_22-18"


# experiment_name="bias_detection_criteo_09-10_13-46"
# experiment_name="bias_detection_criteo_09-10_15-32" # Actually PATCG. CM stays below 0.05 the whole time. But not too bad? Not much overestimation. Slow exp though.





runs = mlflow.search_runs(experiment_names=[experiment_name])
run_ids = list(runs.run_id)
run_ids

['918eeeaba43c4e76b4639b4bbc3567d4',
 '2d178459ac0e4aae8c0217e0391c36a9',
 '30eb650409fe49b3baeafc1ada4127de',
 'b2b4bd309345494d91d6ddc10ff9eff0',
 'cdcb87a818704fa9b8ca9d0e0c6fa017',
 'f2e8e5e047a44e149aaf01d2f5fe8b3a',
 '68e8b70cd0ae4b56a221c608ec9097a5',
 '919473ee39cc46e3b4f9db2c6462f0dc']

In [147]:
data = []
metric_names = ["rmsre", "rmsre_prediction"]
param_names = ["user.baseline", "user.bias_detection_knob", "dataset.num_days_per_epoch", "user.is_monotonic_scalar_query"]

for run_id in run_ids:
    
    run_params = client.get_run(run_id).data.params
    
    metric_histories = {}
    for metric_name in metric_names:
        metric_histories[metric_name] = client.get_metric_history(run_id, metric_name)
    
    for step in range(len(metric_histories[metric_names[0]])):
        step_data = {
            "step": step,
        }
        for metric_name in metric_names:
            try:
                step_data[metric_name] = metric_histories[metric_name][step].value
            except IndexError:
                step_data[metric_name] = None
                    
    
        for param_name in param_names:
            step_data[param_name] = run_params[param_name]
    
        data.append(step_data)

df = pd.DataFrame(data)
df

Unnamed: 0,step,rmsre,rmsre_prediction,user.baseline,user.bias_detection_knob,dataset.num_days_per_epoch,user.is_monotonic_scalar_query
0,0,0.015541,,cookiemonster,0,7,True
1,1,0.015557,,cookiemonster,0,7,True
2,2,0.015565,,cookiemonster,0,7,True
3,3,0.015510,,cookiemonster,0,7,True
4,4,0.015628,,cookiemonster,0,7,True
...,...,...,...,...,...,...,...
3195,395,0.351672,0.509952,cookiemonster,2,7,True
3196,396,0.338774,0.540779,cookiemonster,2,7,True
3197,397,0.370116,0.637756,cookiemonster,2,7,True
3198,398,0.372278,0.664601,cookiemonster,2,7,True


In [148]:
df["is_monotonic_scalar_query"] = df["user.is_monotonic_scalar_query"]
df["bias_detection_knob"] = df["user.bias_detection_knob"]
df["num_days_per_epoch"] = df["dataset.num_days_per_epoch"]

def get_baseline_name(row):
    if row["user.bias_detection_knob"] == "0":
        return row["user.baseline"]
    return row["user.baseline"] + "_bias"

df["baseline"] = df.apply(get_baseline_name, axis=1)



In [149]:
is_monotonic_scalar_query="True"
bias_detection_knob="0.5"
rmsre_prediction_cutoff = 0.05

query_df = df.query(f"(bias_detection_knob == '0' or bias_detection_knob == '{bias_detection_knob}') and num_days_per_epoch == '7' and is_monotonic_scalar_query == '{is_monotonic_scalar_query}'")

In [150]:
query_df.baseline.unique()

array(['cookiemonster', 'ipa', 'cookiemonster_base', 'cookiemonster_bias'],
      dtype=object)

In [151]:
query_df.bias_detection_knob.unique()

array(['0', '0.5'], dtype=object)

In [152]:
num_queries = query_df.step.max() + 1
num_queries

400

In [153]:
from plotting.macros import *
from plotting.plot_template import *


In [154]:
cdf_args = {
        "df": query_df,
        "metric": "rmsre",
        "x_axis": None,
        "x_axis_title": RMSRE_CDF_X,
        "y_axis_title": RMSRE_Y,
        "ordering": None,
        "log_y": False,
        "x_range": [1, 100],
        "showlegend": True,
        "marker_pos": 0.98,
        "baselines_order": BIAS_BASELINES_ORDER
    }

In [155]:
baseline = COOKIEMONSTER_BIAS
metric = "rmsre"
unsorted_metric = "rmsre_prediction"

csv_name = csv_mapping[baseline]
group = query_df.query("baseline == @csv_name")
group = group.sort_values(by=[metric])
len_values = group.shape[0]
start = 1

group.dropna(inplace=True, subset=[metric])
stop = group.shape[0]
values = group[unsorted_metric].values
cumulative_probabilities = np.arange(start, stop + 1) / float(len_values) * 100

trace_unsorted = go.Scatter(
        x=cumulative_probabilities,
        y=values,
        legendgroup=baseline,
        name=f"{baseline} (RMSRE bound)",
        # marker_color=color_discrete_map[baseline],
        marker_color="rgba(128, 0, 128, 0.2)",
        marker_symbol=symbol_map[baseline],
        showlegend=True,
        mode="lines",
        line=dict(dash=lines_map[baseline]),
    )

trace_cutoff = go.Scatter(
        x=cumulative_probabilities,
        y=[rmsre_prediction_cutoff]*len(cumulative_probabilities),
        legendgroup=baseline,
        name=f"{baseline} (RMSRE bound cutoff)",
        # marker_color=color_discrete_map[baseline],
        marker_color="black",
        marker_symbol=symbol_map[baseline],
        showlegend=True,
        mode="lines",
        line=dict(dash=lines_map[baseline], width=1),
    )

In [156]:
full_cdfs = go.Figure()
full_cdfs.add_trace(trace_unsorted)
full_cdfs.add_trace(trace_cutoff)
for trace in cdf(**cdf_args):
    full_cdfs.add_trace(trace)
full_cdfs.update_layout(title=f"CDF of RMRSRE. Bias detection knob: {bias_detection_knob}. #queries {num_queries}")
full_cdfs

In [157]:

def apply_bias_detection(row):
    

    if rmsre_prediction_cutoff == 0.5:
        print("Got {}".format(row["rmsre_prediction"]))
    
    if row["rmsre_prediction"] and row["rmsre_prediction"] > rmsre_prediction_cutoff:
        if rmsre_prediction_cutoff == 0.5:
            print("dropping it")

        return None
    return row["rmsre_original"]

chopped_df = query_df.copy()
chopped_df["rmsre_original"] = chopped_df["rmsre"]
chopped_df["rmsre"] = chopped_df.apply(apply_bias_detection, axis=1)
chopped_df.drop(columns=["rmsre_prediction"], inplace=True)

In [158]:
cut_cdf_args = cdf_args.copy()
cut_cdf_args["df"] = chopped_df
cut_cdf_args["showlegend"] = False

cut_cdfs_fig = go.Figure()
for trace in cdf(**cut_cdf_args):
    cut_cdfs_fig.add_trace(trace)
cut_cdfs_fig.update_layout(title=f"CDF of RMSRE. Bias detection knob: {bias_detection_knob}. rmsre_prediction_cutoff: {rmsre_prediction_cutoff}. is_monotonic_scalar_query: {is_monotonic_scalar_query}. #queries {num_queries}")
cut_cdfs_fig

In [159]:
chopped_df = df.query(f"is_monotonic_scalar_query == '{is_monotonic_scalar_query}'").copy()
chopped_df["rmsre_original"] = chopped_df["rmsre"]
chopped_df["rmsre"] = chopped_df.apply(apply_bias_detection, axis=1)
chopped_df.drop(columns=["rmsre_prediction"], inplace=True)

In [160]:
def fake_detection_knob(row):
    if row["baseline"] == "cookiemonster_bias":
        # return float(row["bias_detection_knob"])
        return str(row["bias_detection_knob"])
        
    if row["baseline"] == "cookiemonster":
        # return 0
        # return COOKIEMONSTER
        return str("N/A")
    if row["baseline"] == "cookiemonster_base":
        # return -1
        # return COOKIEMONSTER_BASE
        return str("N/A")
        
        
    if row["baseline"] == "ipa":
        # return -2
        # return IPA
        return str("N/A")
        

chopped_df["bias_detection_knob"] = chopped_df.apply(fake_detection_knob, axis=1)

In [161]:
rmsre_prediction_cutoffs = [0.016, 0.05, 0.1, 0.2]
fake_cutoff_labels = [" ", "N/A", "  "]

dfs = []

for baseline, fake_cutoff_label in zip(DEFAULT_BASELINES_ORDER, fake_cutoff_labels):
    csv_name = csv_mapping[baseline] 
    base_df = query_df.query(f"baseline == '{csv_name}'").copy()
    base_df["rmsre_original"] = base_df["rmsre"]
    base_df.drop(columns=["rmsre_prediction"], inplace=True)
    # base_df["rmsre_prediction_cutoff"] = baseline
    base_df["rmsre_prediction_cutoff"] = fake_cutoff_label
    # base_df["rmsre_prediction_cutoff"] = "N/A"
    
    dfs.append(base_df)

for rmsre_prediction_cutoff in rmsre_prediction_cutoffs:
    chopped_df = query_df.query(f"baseline == 'cookiemonster_bias'").copy()
    chopped_df["rmsre_original"] = chopped_df["rmsre"]
    chopped_df["rmsre"] = chopped_df.apply(apply_bias_detection, axis=1)
    l = len(chopped_df.query("baseline == 'cookiemonster_bias'"))
    chopped_df.drop(columns=["rmsre_prediction"], inplace=True)
    chopped_df["rmsre_prediction_cutoff"] = str(rmsre_prediction_cutoff)
    
    dfs.append(chopped_df)
    
cutoffs_df = pd.concat(dfs, ignore_index=True)




In [162]:
cutoffs_df

Unnamed: 0,step,rmsre,user.baseline,user.bias_detection_knob,dataset.num_days_per_epoch,user.is_monotonic_scalar_query,is_monotonic_scalar_query,bias_detection_knob,num_days_per_epoch,baseline,rmsre_original,rmsre_prediction_cutoff
0,0,0.015541,cookiemonster,0,7,True,True,0,7,cookiemonster,0.015541,
1,1,0.015557,cookiemonster,0,7,True,True,0,7,cookiemonster,0.015557,
2,2,0.015565,cookiemonster,0,7,True,True,0,7,cookiemonster,0.015565,
3,3,0.015510,cookiemonster,0,7,True,True,0,7,cookiemonster,0.015510,
4,4,0.015628,cookiemonster,0,7,True,True,0,7,cookiemonster,0.015628,
...,...,...,...,...,...,...,...,...,...,...,...,...
2795,395,,cookiemonster,0.5,7,True,True,0.5,7,cookiemonster_bias,0.239818,0.2
2796,396,,cookiemonster,0.5,7,True,True,0.5,7,cookiemonster_bias,0.215505,0.2
2797,397,,cookiemonster,0.5,7,True,True,0.5,7,cookiemonster_bias,0.239574,0.2
2798,398,,cookiemonster,0.5,7,True,True,0.5,7,cookiemonster_bias,0.238823,0.2


In [163]:
variable = "rmsre_prediction_cutoff"
boxes_args = {
        "df": cutoffs_df,
        "metric": "rmsre",
        "x_axis": variable,
        "x_axis_title": "RMSRE bound cutoff",
        "y_axis_title": RMSRE_Y,
        "ordering": (variable, "str"),
        # "ordering": None,
        "showlegend": False,
        "baselines_order": BIAS_BASELINES_ORDER,
        "baselines_with_nqueries": BIAS_BASELINES_ORDER,
        "nqueries_style": "percentage",
        "hspace": 0,
        "vspace": 0.1,
        "n_total_queries": num_queries,
        
    }

cutoff_boxes_fig = go.Figure()
# fig.update_yaxes(type="log")
for trace in boxes(**boxes_args):
    cutoff_boxes_fig.add_trace(trace)
cutoff_boxes_fig.update_layout(title=f"Quartiles for RMSRE depending on rmsre_prediction_cutoff. bias detection knob: {bias_detection_knob}. is_monotonic_scalar_query: {is_monotonic_scalar_query}.")
cutoff_boxes_fig

In [164]:

data = []
global_metric_names = ["avg_budget"]
param_names = ["user.baseline", "user.bias_detection_knob", "dataset.num_days_per_epoch", "user.is_monotonic_scalar_query"]

for run_id in run_ids:
    datapoint = {}

    run_params = client.get_run(run_id).data.params
    for param_name in param_names:
        datapoint[param_name] = run_params[param_name]
        
    for metric_name in global_metric_names:
        global_metric = client.get_metric_history(run_id, metric_name)[0]        
        datapoint[metric_name] = global_metric.value        
        
    data.append(datapoint)
    
budget_df = pd.DataFrame(data)
budget_df["baseline"] = budget_df.apply(get_baseline_name, axis=1)
budget_df["is_monotonic_scalar_query"] = budget_df["user.is_monotonic_scalar_query"]
budget_df["bias_detection_knob"] = budget_df["user.bias_detection_knob"]
budget_df["num_days_per_epoch"] = budget_df["dataset.num_days_per_epoch"]


In [165]:

bars_args = {
            "df": budget_df.query("bias_detection_knob == '0' or bias_detection_knob == '0.5'"),
            "metric": "avg_budget",
            "x_axis": "num_days_per_epoch",
            "x_axis_title": "",
            "y_axis_title": BUDGET_CONSUMPTION_Y_AVG,
            # "ordering": ("rmsre_prediction_cutoff", "str"),
            "ordering": None,
            "log_y": False,
            "showlegend": False,
            "baselines_order": BIAS_BASELINES_ORDER,
        }
budget_fig = go.Figure()
for trace in bars(**bars_args):
    budget_fig.add_trace(trace)
budget_fig.update_layout(title=f"Quartiles for RMSRE depending on rmsre_prediction_cutoff. bias detection knob: {bias_detection_knob}. is_monotonic_scalar_query: {is_monotonic_scalar_query}.")
budget_fig

In [166]:
def make_bias_plots(
    args,
    cols,
    legend,
    axis_title_font_size,
    axis_tick_font_size,
    output_path,
    height=None,
    width=None,
    titles=None,
):
    bars_args, cdf_args, cut_cdf_args, boxes_args = args
    column_widths = [0.25* 0.3, 0.25, 0.25, 0.25]  # First subplot is smaller

    fig = make_subplots(
        rows=1, cols=cols, specs=[[{"type": "xy"}] * cols], horizontal_spacing=0.08, subplot_titles=titles,
        column_widths=column_widths,row_heights=[1]
    )
    
    for trace in bars(**bars_args):
            fig.add_trace(trace, row=1, col=1)
            
    fig.update_xaxes(
        showticklabels=False,  # Remove tick labels
        row=1,
        col=1
    )
    
    fig.add_trace(trace_unsorted, row=1, col=2)
    fig.add_trace(trace_cutoff, row=1, col=2)
    for trace in cdf(**cdf_args):
        fig.add_trace(trace, row=1, col=2)
        
    for trace in cdf(**cut_cdf_args):
        fig.add_trace(trace, row=1, col=3)
        
    for trace in boxes(**boxes_args):
        fig.add_trace(trace, row=1, col=4)
        
    fig.update_xaxes(
        categoryorder='array',
        categoryarray=fake_cutoff_labels + rmsre_prediction_cutoffs,
        row=1,
        col=3
    )
    


    for i in range(cols):
        fig.update_xaxes(
            title=args[i].get("x_axis_title"),
            tickfont=dict(size=axis_tick_font_size.get("x")),
            title_font=dict(size=axis_title_font_size.get("x")),
            showgrid=True,
            range=args[i].get("x_range"),
            row=1,
            col=i + 1,
        )
        fig.update_yaxes(
            title=args[i].get("y_axis_title"),
            title_standoff=0,
            tickfont=dict(size=axis_tick_font_size.get("y")),
            title_font=dict(size=axis_title_font_size.get("y")),
            type="log" if args[i].get("log_y") else "linear",
            range=args[i].get("y_range"),
            showgrid=True,
            row=1,
            col=i + 1,
            domain=[0, 1], # Use more space
        )

    fig.update_layout(
        legend={
            "title": None,
            "font": {"size": 20},
            "traceorder": "reversed",
            **legend,
        },
        template=TEMPLATE,
        showlegend=True,
        height=height,
        width=width,
        # barmode="group",
        # boxmode="group",
        margin=dict(t=0, b=0),
    )
    fig.write_image(f"{output_path}", engine="kaleido")
    fig.show()



In [167]:
figs_args = {
    "axis_title_font_size": {"x": 18, "y": 18},
    "axis_tick_font_size": {"x": 14, "y": 14},
    "legend": {
        "yanchor": "bottom",
        "y": 1.1,
        "xanchor": "left",
        "x": 0.2,
        "orientation": "h",
    },
    "output_path": "bias_plots.pdf",
    "height": 300,
    "width": 1500,
}
# make_plots(
#            [full_cdfs, cut_cdfs_fig, cutoff_boxes_fig, budget_fig], cols=4, **figs_args)

make_bias_plots(
           [bars_args, cdf_args, cut_cdf_args, boxes_args], cols=4, **figs_args)