In [421]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [422]:
import mlflow
import pandas as pd

from cookiemonster.utils import LOGS_PATH

In [423]:
from mlflow.tracking import MlflowClient

tracking_uri = str(LOGS_PATH.joinpath("mlflow"))

mlflow.set_tracking_uri(tracking_uri)
client = MlflowClient(
    tracking_uri=tracking_uri
)

In [424]:
# experiment_name="bias_detection_09-09_11-39"
# experiment_name="bias_detection_09-09_18-06" # Shuffled?
# experiment_name="bias_detection_09-10_11-01"
# experiment_name="bias_detection_09-10_11-14" # 60. Good but CM has small bias.
# experiment_name="bias_detection_09-10_11-27" # 45. Good
# experiment_name="bias_detection_09-10_11-38" # 120 Barely more than IPA (more overshooting?) and starts to go in steady state, crossing ARA. But CM itself is not much better than IPA.
# experiment_name="bias_detection_09-10_16-17" # 120 with more schedules, 200. Just higher error in steady state. Better strategy would be to wait.

# experiment_name="bias_detection_09-11_15-55" # 60, fixed sensitivity.
experiment_name="bias_detection_09-11_18-17" # 60, 400 queries
# experiment_name="bias_detection_09-11_20-46" # 60, 250 queries


# experiment_name="bias_detection_criteo_09-10_13-46"
# experiment_name="bias_detection_criteo_09-10_15-32" # Actually PATCG. CM stays below 0.05 the whole time. But not too bad? Not much overestimation. Slow exp though.





runs = mlflow.search_runs(experiment_names=[experiment_name])
run_ids = list(runs.run_id)
run_ids

['a10e016bd9e34e26aa7771554cbd1207',
 '800f22cafebe4b16ab914cbe5a50b46e',
 'e194b391dbbb4b62b0a60a74128a9e8a',
 'a3d588e4cb3e49c6905df50a0d3d161c',
 '4c0f4ffe1d2244529ebac993ea3262eb',
 '389b9bf29ecf4fd8969038428328688f',
 '410fe8ddb6394067b626ff83a28f0e4b',
 'c811c5cf70024edbbfb786a1df386701',
 '0cef22ec4d974d16ae5043f5e772b58a',
 'a7230e3a60ea4698b5ad63db0b1da057',
 '902eff92349940ca96dc634b622e11f6',
 '3c3f697724ec47fd9182e79891488a63']

In [425]:
data = []
metric_names = ["rmsre", "rmsre_prediction"]
param_names = ["user.baseline", "user.bias_detection_knob", "dataset.num_days_per_epoch", "user.is_monotonic_scalar_query"]

for run_id in run_ids:
    
    run_params = client.get_run(run_id).data.params
    
    metric_histories = {}
    for metric_name in metric_names:
        metric_histories[metric_name] = client.get_metric_history(run_id, metric_name)
    
    for step in range(len(metric_histories[metric_names[0]])):
        step_data = {
            "step": step,
        }
        for metric_name in metric_names:
            try:
                step_data[metric_name] = metric_histories[metric_name][step].value
            except IndexError:
                step_data[metric_name] = None
                    
    
        for param_name in param_names:
            step_data[param_name] = run_params[param_name]
    
        data.append(step_data)

df = pd.DataFrame(data)
df

Unnamed: 0,step,rmsre,rmsre_prediction,user.baseline,user.bias_detection_knob,dataset.num_days_per_epoch,user.is_monotonic_scalar_query
0,0,0.015557,,cookiemonster_base,0,7,True
1,1,0.015541,,cookiemonster_base,0,7,True
2,2,0.015541,,cookiemonster_base,0,7,True
3,3,0.015533,,cookiemonster_base,0,7,True
4,4,0.015604,,cookiemonster_base,0,7,True
...,...,...,...,...,...,...,...
4795,395,0.231644,1.443730,cookiemonster,0.5,7,False
4796,396,0.229040,1.580992,cookiemonster,0.5,7,False
4797,397,0.241727,1.548726,cookiemonster,0.5,7,False
4798,398,0.238767,1.344885,cookiemonster,0.5,7,False


In [426]:
df["is_monotonic_scalar_query"] = df["user.is_monotonic_scalar_query"]
df["bias_detection_knob"] = df["user.bias_detection_knob"]
df["num_days_per_epoch"] = df["dataset.num_days_per_epoch"]

def get_baseline_name(row):
    if row["user.bias_detection_knob"] == "0":
        return row["user.baseline"]
    return row["user.baseline"] + "_bias"

df["baseline"] = df.apply(get_baseline_name, axis=1)



In [427]:
is_monotonic_scalar_query="False"
bias_detection_knob="1"
query_df = df.query(f"(bias_detection_knob == '0' or bias_detection_knob == '{bias_detection_knob}') and num_days_per_epoch == '7' and is_monotonic_scalar_query == '{is_monotonic_scalar_query}'")

In [428]:
query_df.baseline.unique()

array(['cookiemonster', 'cookiemonster_base', 'ipa', 'cookiemonster_bias'],
      dtype=object)

In [429]:
query_df.bias_detection_knob.unique()

array(['0', '1'], dtype=object)

In [430]:
num_queries = query_df.step.max() + 1
num_queries

400

In [431]:
from plotting.macros import *
from plotting.plot_template import *


In [432]:
cdf_args = {
        "df": query_df,
        "metric": "rmsre",
        "x_axis": None,
        "x_axis_title": RMSRE_CDF_X,
        "y_axis_title": RMSRE_Y,
        "ordering": None,
        "log_y": False,
        "x_range": [1, 100],
        "showlegend": True,
        "marker_pos": 0.98,
        "baselines_order": BIAS_BASELINES_ORDER
    }

In [433]:
figs_args = {
    "axis_title_font_size": {"x": 18, "y": 18},
    "axis_tick_font_size": {"x": 14, "y": 14},
    "legend": {
        "yanchor": "top",
        "y": 1.2,
        "xanchor": "left",
        "x": 0.2,
        "orientation": "h",
    },
    "output_path": f"cdf_rmsre_{experiment_name}.png",
    "height": 300,
    "width": 1500,
}
# make_plots([(cdf, cdf_args)], cols=1, **figs_args)


In [434]:
fig = go.Figure()
for trace in cdf(**cdf_args):
    fig.add_trace(trace)
fig.update_layout(title=f"CDF of RMRSRE. Bias detection knob: {bias_detection_knob}. #queries {num_queries}")

fig

In [435]:
rmsre_prediction_cutoff = 0.2

def apply_bias_detection(row):
    if row["rmsre_prediction"] and row["rmsre_prediction"] > rmsre_prediction_cutoff:
        return None
    return row["rmsre_original"]

chopped_df = query_df.copy()
chopped_df["rmsre_original"] = chopped_df["rmsre"]
chopped_df["rmsre"] = chopped_df.apply(apply_bias_detection, axis=1)
chopped_df.drop(columns=["rmsre_prediction"], inplace=True)

In [436]:
cdf_args["df"] = chopped_df
fig = go.Figure()
for trace in cdf(**cdf_args):
    fig.add_trace(trace)
fig.update_layout(title=f"CDF of RMSRE. Bias detection knob: {bias_detection_knob}. rmsre_prediction_cutoff: {rmsre_prediction_cutoff}. is_monotonic_scalar_query: {is_monotonic_scalar_query}. #queries {num_queries}")

fig

In [437]:
chopped_df = df.query(f"is_monotonic_scalar_query == '{is_monotonic_scalar_query}'").copy()
chopped_df["rmsre_original"] = chopped_df["rmsre"]
chopped_df["rmsre"] = chopped_df.apply(apply_bias_detection, axis=1)
chopped_df.drop(columns=["rmsre_prediction"], inplace=True)

In [438]:
def fake_detection_knob(row):
    if row["baseline"] == "cookiemonster_bias":
        # return float(row["bias_detection_knob"])
        return str(row["bias_detection_knob"])
        
    if row["baseline"] == "cookiemonster":
        # return 0
        return COOKIEMONSTER
    if row["baseline"] == "cookiemonster_base":
        # return -1
        return COOKIEMONSTER_BASE
        
    if row["baseline"] == "ipa":
        # return -2
        return IPA

chopped_df["bias_detection_knob"] = chopped_df.apply(fake_detection_knob, axis=1)

In [439]:
chopped_df

Unnamed: 0,step,rmsre,user.baseline,user.bias_detection_knob,dataset.num_days_per_epoch,user.is_monotonic_scalar_query,is_monotonic_scalar_query,bias_detection_knob,num_days_per_epoch,baseline,rmsre_original
400,0,0.015557,cookiemonster,0,7,False,False,Cookie Monster,7,cookiemonster,0.015557
401,1,0.015541,cookiemonster,0,7,False,False,Cookie Monster,7,cookiemonster,0.015541
402,2,0.015541,cookiemonster,0,7,False,False,Cookie Monster,7,cookiemonster,0.015541
403,3,0.015533,cookiemonster,0,7,False,False,Cookie Monster,7,cookiemonster,0.015533
404,4,0.015604,cookiemonster,0,7,False,False,Cookie Monster,7,cookiemonster,0.015604
...,...,...,...,...,...,...,...,...,...,...,...
4795,395,,cookiemonster,0.5,7,False,False,0.5,7,cookiemonster_bias,0.231644
4796,396,,cookiemonster,0.5,7,False,False,0.5,7,cookiemonster_bias,0.229040
4797,397,,cookiemonster,0.5,7,False,False,0.5,7,cookiemonster_bias,0.241727
4798,398,,cookiemonster,0.5,7,False,False,0.5,7,cookiemonster_bias,0.238767


In [440]:
variable = "bias_detection_knob"
boxes_args = {
        "df": chopped_df,
        "metric": "rmsre",
        "x_axis": variable,
        # "x_axis_title": x_axis_title,
        "y_axis_title": RMSRE_Y,
        "ordering": (variable, "str"),
        # "ordering": None,
        "showlegend": True,
        "baselines_order": BIAS_BASELINES_ORDER
    }

fig = go.Figure()
# fig.update_yaxes(type="log")

for trace in boxes(**boxes_args):
    fig.add_trace(trace)
fig