In [434]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [435]:
import mlflow
import pandas as pd

from cookiemonster.utils import LOGS_PATH

In [436]:
from mlflow.tracking import MlflowClient

tracking_uri = str(LOGS_PATH.joinpath("mlflow"))

mlflow.set_tracking_uri(tracking_uri)
client = MlflowClient(
    tracking_uri=tracking_uri
)

In [437]:

# experiment_name="bias_detection_09-09_11-39"
# experiment_name="bias_detection_09-09_18-06" # Shuffled?
# experiment_name="bias_detection_09-10_11-01"
# experiment_name="bias_detection_09-10_11-14" # 60. Good but CM has small bias.
# experiment_name="bias_detection_09-10_11-27" # 45. Good
# experiment_name="bias_detection_09-10_11-38" # 120 Barely more than IPA (more overshooting?) and starts to go in steady state, crossing ARA. But CM itself is not much better than IPA.
# experiment_name="bias_detection_09-10_16-17" # 120 with more schedules, 200. Just higher error in steady state. Better strategy would be to wait.

# experiment_name="bias_detection_09-11_15-55" # 60, fixed sensitivity.
# experiment_name="bias_detection_09-11_18-17" # 60, 400 queries
# experiment_name="bias_detection_09-11_20-46" # 60, 250 queries

experiment_name="bias_detection_09-12_22-18"


# experiment_name="bias_detection_criteo_09-10_13-46"
# experiment_name="bias_detection_criteo_09-10_15-32" # Actually PATCG. CM stays below 0.05 the whole time. But not too bad? Not much overestimation. Slow exp though.





runs = mlflow.search_runs(experiment_names=[experiment_name])
run_ids = list(runs.run_id)
run_ids

['918eeeaba43c4e76b4639b4bbc3567d4',
 '2d178459ac0e4aae8c0217e0391c36a9',
 '30eb650409fe49b3baeafc1ada4127de',
 'b2b4bd309345494d91d6ddc10ff9eff0',
 'cdcb87a818704fa9b8ca9d0e0c6fa017',
 'f2e8e5e047a44e149aaf01d2f5fe8b3a',
 '68e8b70cd0ae4b56a221c608ec9097a5',
 '919473ee39cc46e3b4f9db2c6462f0dc']

In [438]:
data = []
metric_names = ["rmsre", "rmsre_prediction"]
param_names = ["user.baseline", "user.bias_detection_knob", "dataset.num_days_per_epoch", "user.is_monotonic_scalar_query"]

for run_id in run_ids:
    
    run_params = client.get_run(run_id).data.params
    
    metric_histories = {}
    for metric_name in metric_names:
        metric_histories[metric_name] = client.get_metric_history(run_id, metric_name)
    
    for step in range(len(metric_histories[metric_names[0]])):
        step_data = {
            "step": step,
        }
        for metric_name in metric_names:
            try:
                step_data[metric_name] = metric_histories[metric_name][step].value
            except IndexError:
                step_data[metric_name] = None
                    
    
        for param_name in param_names:
            step_data[param_name] = run_params[param_name]
    
        data.append(step_data)

df = pd.DataFrame(data)
df

Unnamed: 0,step,rmsre,rmsre_prediction,user.baseline,user.bias_detection_knob,dataset.num_days_per_epoch,user.is_monotonic_scalar_query
0,0,0.015541,,cookiemonster,0,7,True
1,1,0.015557,,cookiemonster,0,7,True
2,2,0.015565,,cookiemonster,0,7,True
3,3,0.015510,,cookiemonster,0,7,True
4,4,0.015628,,cookiemonster,0,7,True
...,...,...,...,...,...,...,...
3195,395,0.351672,0.509952,cookiemonster,2,7,True
3196,396,0.338774,0.540779,cookiemonster,2,7,True
3197,397,0.370116,0.637756,cookiemonster,2,7,True
3198,398,0.372278,0.664601,cookiemonster,2,7,True


In [439]:
df["is_monotonic_scalar_query"] = df["user.is_monotonic_scalar_query"]
df["bias_detection_knob"] = df["user.bias_detection_knob"]
df["num_days_per_epoch"] = df["dataset.num_days_per_epoch"]

def get_baseline_name(row):
    if row["user.bias_detection_knob"] == "0":
        return row["user.baseline"]
    return row["user.baseline"] + "_bias"

df["baseline"] = df.apply(get_baseline_name, axis=1)



In [440]:
is_monotonic_scalar_query="True"
bias_detection_knob="1"
query_df = df.query(f"(bias_detection_knob == '0' or bias_detection_knob == '{bias_detection_knob}') and num_days_per_epoch == '7' and is_monotonic_scalar_query == '{is_monotonic_scalar_query}'")

In [441]:
query_df.baseline.unique()

array(['cookiemonster', 'ipa', 'cookiemonster_base', 'cookiemonster_bias'],
      dtype=object)

In [442]:
query_df.bias_detection_knob.unique()

array(['0', '1'], dtype=object)

In [443]:
num_queries = query_df.step.max() + 1
num_queries

400

In [444]:
from plotting.macros import *
from plotting.plot_template import *


In [445]:
cdf_args = {
        "df": query_df,
        "metric": "rmsre",
        "x_axis": None,
        "x_axis_title": RMSRE_CDF_X,
        "y_axis_title": RMSRE_Y,
        "ordering": None,
        "log_y": False,
        "x_range": [1, 100],
        "showlegend": True,
        "marker_pos": 0.98,
        "baselines_order": BIAS_BASELINES_ORDER
    }

In [446]:
figs_args = {
    "axis_title_font_size": {"x": 18, "y": 18},
    "axis_tick_font_size": {"x": 14, "y": 14},
    "legend": {
        "yanchor": "top",
        "y": 1.2,
        "xanchor": "left",
        "x": 0.2,
        "orientation": "h",
    },
    "output_path": f"cdf_rmsre_{experiment_name}.png",
    "height": 300,
    "width": 1500,
}
# make_plots([(cdf, cdf_args)], cols=1, **figs_args)


In [447]:
fig = go.Figure()
for trace in cdf(**cdf_args):
    fig.add_trace(trace)
fig.update_layout(title=f"CDF of RMRSRE. Bias detection knob: {bias_detection_knob}. #queries {num_queries}")

fig

In [448]:
rmsre_prediction_cutoff = 0.1

def apply_bias_detection(row):
    if row["rmsre_prediction"] and row["rmsre_prediction"] > rmsre_prediction_cutoff:
        return None
    return row["rmsre_original"]

chopped_df = query_df.copy()
chopped_df["rmsre_original"] = chopped_df["rmsre"]
chopped_df["rmsre"] = chopped_df.apply(apply_bias_detection, axis=1)
chopped_df.drop(columns=["rmsre_prediction"], inplace=True)

In [449]:
cdf_args["df"] = chopped_df
fig = go.Figure()
for trace in cdf(**cdf_args):
    fig.add_trace(trace)
fig.update_layout(title=f"CDF of RMSRE. Bias detection knob: {bias_detection_knob}. rmsre_prediction_cutoff: {rmsre_prediction_cutoff}. is_monotonic_scalar_query: {is_monotonic_scalar_query}. #queries {num_queries}")

fig

In [450]:
chopped_df = df.query(f"is_monotonic_scalar_query == '{is_monotonic_scalar_query}'").copy()
chopped_df["rmsre_original"] = chopped_df["rmsre"]
chopped_df["rmsre"] = chopped_df.apply(apply_bias_detection, axis=1)
chopped_df.drop(columns=["rmsre_prediction"], inplace=True)

In [451]:
def fake_detection_knob(row):
    if row["baseline"] == "cookiemonster_bias":
        # return float(row["bias_detection_knob"])
        return str(row["bias_detection_knob"])
        
    if row["baseline"] == "cookiemonster":
        # return 0
        return COOKIEMONSTER
    if row["baseline"] == "cookiemonster_base":
        # return -1
        return COOKIEMONSTER_BASE
        
    if row["baseline"] == "ipa":
        # return -2
        return IPA

chopped_df["bias_detection_knob"] = chopped_df.apply(fake_detection_knob, axis=1)

In [452]:
chopped_df

Unnamed: 0,step,rmsre,user.baseline,user.bias_detection_knob,dataset.num_days_per_epoch,user.is_monotonic_scalar_query,is_monotonic_scalar_query,bias_detection_knob,num_days_per_epoch,baseline,rmsre_original
0,0,0.015541,cookiemonster,0,7,True,True,Cookie Monster,7,cookiemonster,0.015541
1,1,0.015557,cookiemonster,0,7,True,True,Cookie Monster,7,cookiemonster,0.015557
2,2,0.015565,cookiemonster,0,7,True,True,Cookie Monster,7,cookiemonster,0.015565
3,3,0.015510,cookiemonster,0,7,True,True,Cookie Monster,7,cookiemonster,0.015510
4,4,0.015628,cookiemonster,0,7,True,True,Cookie Monster,7,cookiemonster,0.015628
...,...,...,...,...,...,...,...,...,...,...,...
3195,395,,cookiemonster,2,7,True,True,2,7,cookiemonster_bias,0.351672
3196,396,,cookiemonster,2,7,True,True,2,7,cookiemonster_bias,0.338774
3197,397,,cookiemonster,2,7,True,True,2,7,cookiemonster_bias,0.370116
3198,398,,cookiemonster,2,7,True,True,2,7,cookiemonster_bias,0.372278


In [453]:
variable = "bias_detection_knob"
boxes_args = {
        "df": chopped_df,
        "metric": "rmsre",
        "x_axis": variable,
        # "x_axis_title": x_axis_title,
        "y_axis_title": RMSRE_Y,
        "ordering": (variable, "str"),
        # "ordering": None,
        "showlegend": True,
        "baselines_order": BIAS_BASELINES_ORDER,
        "show_nqueries": BIAS_BASELINES_ORDER,
        "hspace": 0,
        "vspace": 0.05,
        "n_total_queries": num_queries,
        
    }

fig = go.Figure()
# fig.update_yaxes(type="log")

for trace in boxes(**boxes_args):
    fig.add_trace(trace)
fig.update_layout(title=f"Quartiles for RMSRE depending on bias detection knob. rmsre_prediction_cutoff: {rmsre_prediction_cutoff}. is_monotonic_scalar_query: {is_monotonic_scalar_query}.")
fig

In [454]:
rmsre_prediction_cutoffs = [0.016, 0.05, 0.1, 0.2, 0.5, 1, 5]

dfs = []

for baseline in DEFAULT_BASELINES_ORDER:
    csv_name = csv_mapping[baseline] 
    base_df = query_df.query(f"baseline == '{csv_name}'").copy()
    base_df["rmsre_original"] = base_df["rmsre"]
    base_df.drop(columns=["rmsre_prediction"], inplace=True)
    base_df["rmsre_prediction_cutoff"] = baseline
    dfs.append(base_df)

for rmsre_prediction_cutoff in rmsre_prediction_cutoffs:
    chopped_df = query_df.query(f"baseline == 'cookiemonster_bias'").copy()
    chopped_df["rmsre_original"] = chopped_df["rmsre"]
    chopped_df["rmsre"] = chopped_df.apply(apply_bias_detection, axis=1)
    chopped_df.drop(columns=["rmsre_prediction"], inplace=True)
    chopped_df["rmsre_prediction_cutoff"] = str(rmsre_prediction_cutoff)
    
    dfs.append(chopped_df)
    
cutoffs_df = pd.concat(dfs, ignore_index=True)


In [455]:
cutoffs_df

Unnamed: 0,step,rmsre,user.baseline,user.bias_detection_knob,dataset.num_days_per_epoch,user.is_monotonic_scalar_query,is_monotonic_scalar_query,bias_detection_knob,num_days_per_epoch,baseline,rmsre_original,rmsre_prediction_cutoff
0,0,0.015541,cookiemonster,0,7,True,True,0,7,cookiemonster,0.015541,Cookie Monster
1,1,0.015557,cookiemonster,0,7,True,True,0,7,cookiemonster,0.015557,Cookie Monster
2,2,0.015565,cookiemonster,0,7,True,True,0,7,cookiemonster,0.015565,Cookie Monster
3,3,0.015510,cookiemonster,0,7,True,True,0,7,cookiemonster,0.015510,Cookie Monster
4,4,0.015628,cookiemonster,0,7,True,True,0,7,cookiemonster,0.015628,Cookie Monster
...,...,...,...,...,...,...,...,...,...,...,...,...
3995,395,0.279969,cookiemonster,1,7,True,True,1,7,cookiemonster_bias,0.279969,5
3996,396,0.262152,cookiemonster,1,7,True,True,1,7,cookiemonster_bias,0.262152,5
3997,397,0.278160,cookiemonster,1,7,True,True,1,7,cookiemonster_bias,0.278160,5
3998,398,0.287517,cookiemonster,1,7,True,True,1,7,cookiemonster_bias,0.287517,5


In [456]:
variable = "rmsre_prediction_cutoff"
boxes_args = {
        "df": cutoffs_df,
        "metric": "rmsre",
        "x_axis": variable,
        # "x_axis_title": x_axis_title,
        "y_axis_title": RMSRE_Y,
        # "ordering": (variable, "str"),
        "ordering": None,
        "showlegend": True,
        "baselines_order": BIAS_BASELINES_ORDER,
        "show_nqueries": BIAS_BASELINES_ORDER,
        "hspace": 0,
        "vspace": 0.05,
        "n_total_queries": num_queries,
        
    }

fig = go.Figure()
# fig.update_yaxes(type="log")



for trace in boxes(**boxes_args):
    fig.add_trace(trace)
fig.update_layout(title=f"Quartiles for RMSRE depending on rmsre_prediction_cutoff. bias detection knob: {bias_detection_knob}. is_monotonic_scalar_query: {is_monotonic_scalar_query}.")
fig

In [457]:

# How is this metric even defined for IPA? Avg across epochs, and some early epochs are not exhausted?
# df.groupby(["baseline", "bias_detection_knob"]).avg_budget.max()
data = []
global_metric_names = ["avg_budget"]
param_names = ["user.baseline", "user.bias_detection_knob", "dataset.num_days_per_epoch", "user.is_monotonic_scalar_query"]

for run_id in run_ids:
    datapoint = {}

    run_params = client.get_run(run_id).data.params
    for param_name in param_names:
        datapoint[param_name] = run_params[param_name]
        
    for metric_name in global_metric_names:
        global_metric = client.get_metric_history(run_id, metric_name)[0]        
        datapoint[metric_name] = global_metric.value        
        
    data.append(datapoint)
    
df = pd.DataFrame(data)
df
    


Unnamed: 0,user.baseline,user.bias_detection_knob,dataset.num_days_per_epoch,user.is_monotonic_scalar_query,avg_budget
0,cookiemonster,0.0,7,True,0.357483
1,ipa,0.0,7,True,0.967086
2,cookiemonster_base,0.0,7,True,0.564601
3,cookiemonster,0.1,7,True,0.37668
4,cookiemonster,1.0,7,True,0.49436
5,cookiemonster,0.2,7,True,0.384054
6,cookiemonster,0.5,7,True,0.427219
7,cookiemonster,2.0,7,True,0.615387
