In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
import pandas as pd
import plotly.io as pio
pio.renderers.default = "notebook"
import plotly.offline as pyo
pyo.init_notebook_mode(connected=True)
from utils import analyze_results, plot_cdf_accuracy, plot_null_reports_analysis

# Thoughts on driving forces behind IPA, User-Epoch ARA, and Cookie Monster

## System Differences
1. IPA really suffers quickly. Each advertiser gets a budget across all their queries. It pays query budget in each epoch each time a query is run. So, all else fixed, a lot of queries on the same set of epochs will result in insufficient budget.
2. Each advertiser query gets their own per-epoch budget in User-Epoch ARA. User-Epoch ARA pays query-epsilon amount for each epoch that is scans. So, it lasts longer than IPA.
3. Each advertiser query gets their own per-epoch budget in Cookie Monster. However, unlike User-Epoch ARA, Cookie Monster only pays query-epsilon amount in an epoch if there is a relevant impression found within that epoch. So, block budgets lasts longer than User-Epoch ARA.

## Toggles to exploit differences
1. Running lots of queries is going to show IPA run out of budget fast. We don't have to be clever about how we do this -- with the Criteo dataset, it happens regardless of the evaluation strategy.
2. Forcing User-Epoch ARA to scan a lot of epochs will force it to run out of budget faster than Cookie Monster. So, for users who convert more than once, a mixture of reducing epoch size and increasing attribution window size should result in Cookie Monster performing better. Specifically, we'd want to run three evaluations: fix epoch size and vary attribution window size; vary epoch size and fix attribution window size; and vary epoch size and vary attribution window size. That said, the Criteo data comes with attribution window already fixed to 30 days. So, we only have epoch size to play with.

## How to exploit these differences
Initially, we thought simply taking the n largest advertisers (advertisers with the most conversions, and thus biggest query pool size) would be a good set. However, in the dataset we have, we cap an individual user's contribution per query to 1 and, the set of users who convert multiple times on a given advertiser is extremely low. Define multi-conversion rate as `(number of users who convert more than once for an advertiser) / (number of users who convert for an advertiser)`, the highest multi-conversion rate we have is 8.2877%. The largest advertisers did not necessarily yield the best set of multi-conversion users.

So, now, we pivot to experiment on the n advertisers with a decently large query pool, but a high multi-conversion rate. This should increase the chances that a user runs out of budget for a particular epoch (because they convert more than once for an advertiser. And, with an appropriately sized epsilon and epoch budget, the multiple conversions can yield insufficient budgets).

# Varying Epoch Sizes

In [18]:
path = "ray/criteo/bias_varying_epoch_size"
results = analyze_results(path, "bias", parallelize=False)
results.head(20)

Unnamed: 0,destination,workload_size,requested_workload_size,fraction_queries_without_null_reports,null_report_bias_average_relative_accuracy,fraction_queries_relatively_accurate_e2e,e2e_bias_average_relative_accuracy,e2e_rmsre_accuracy,baseline,num_days_per_epoch,initial_budget,e2e_bias_relative_accuracies,null_report_bias_relative_accuracies,rmsre_accuracies,num_days_attribution_window
0,319A2412BDB0EF669733053640B80112,100,100,0.03,0.03,0.03,0.029755,0.029168,ipa,1,1.0,"[0.9894225434154292, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[0.9648367282611808, 0, 0, 0, 0, 0, 0, 0, 0, 0...",30
1,9D9E93D1D461D7BAE47FB67EC0E01B62,100,100,0.03,0.03,0.03,0.02986,0.029544,ipa,1,1.0,"[0.998273202493338, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[0.9819357309890772, 0, 0, 0, 0, 0, 0, 0, 0, 0...",30
2,9FF550C0B17A3C493378CB6E2DEEE6E4,100,100,0.03,0.03,0.03,0.029242,0.029461,ipa,1,1.0,"[0.9543052624328366, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[0.9811213582057979, 0, 0, 0, 0, 0, 0, 0, 0, 0...",30
3,F122B91F6D102E4630817566839A4F1F,100,100,0.03,0.03,0.03,0.029646,0.029489,ipa,1,1.0,"[0.9795314504454479, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[0.9810955324304296, 0, 0, 0, 0, 0, 0, 0, 0, 0...",30
0,319A2412BDB0EF669733053640B80112,100,100,0.0,0.569333,0.02,0.572723,0.568309,user_epoch_ara,1,1.0,"[0.5070163032626382, 0.5573698384139916, 0.548...","[0.4783715012722646, 0.5582959641255605, 0.572...","[0.4771876566450166, 0.5572105404417251, 0.571...",30
1,9D9E93D1D461D7BAE47FB67EC0E01B62,100,100,0.02,0.956351,0.62,0.957917,0.953329,user_epoch_ara,1,1.0,"[0.9859594122454542, 0.9901542650713051, 0.955...","[1.0, 1.0, 0.9951219512195122, 0.9941037735849...","[0.9819357309890772, 0.9833904257291395, 0.982...",30
2,9FF550C0B17A3C493378CB6E2DEEE6E4,100,100,0.03,0.980143,0.92,0.978057,0.972425,user_epoch_ara,1,1.0,"[0.9824948390023155, 0.9899614760597881, 0.979...","[1.0, 0.993734335839599, 0.9883268482490273, 0...","[0.9811213582057979, 0.9815840909085226, 0.978...",30
3,F122B91F6D102E4630817566839A4F1F,100,100,0.02,0.952325,0.43,0.950367,0.948374,user_epoch_ara,1,1.0,"[0.9955471725996532, 0.9580175275159448, 0.980...","[1.0, 1.0, 0.9857142857142858, 0.9983249581239...","[0.9810955324304296, 0.9800012072455051, 0.977...",30
0,319A2412BDB0EF669733053640B80112,100,100,0.0,0.566182,0.01,0.568469,0.565198,user_epoch_ara,7,1.0,"[0.4583676248753358, 0.6127843702604009, 0.599...","[0.4783715012722646, 0.5582959641255605, 0.572...","[0.4771876566450166, 0.5572105404417251, 0.571...",30
1,9D9E93D1D461D7BAE47FB67EC0E01B62,100,100,0.02,0.954286,0.57,0.955635,0.951347,user_epoch_ara,7,1.0,"[0.9929095447920452, 0.9872471371133565, 0.995...","[1.0, 1.0, 0.9951219512195122, 0.9941037735849...","[0.9819357309890772, 0.9833904257291395, 0.982...",30


In [19]:
destination_abb = results.assign(
    destination_abb = results.apply(
        lambda r: r.destination[0:5],
        axis=1,
    )
)
destination_abb.drop(columns=["destination"], inplace=True)
destination_abb.rename(columns={"destination_abb": "destination"}, inplace=True)

In [20]:

for epoch_size in results.num_days_per_epoch.unique():
    print(epoch_size)
    fig = plot_cdf_accuracy(
        df=results,
        epoch_size=epoch_size,
        by_destination=False,
        log_y=False
    )
    fig.write_image(f"criteo/e2e_rmsre_rel_acc_epoch_{epoch_size}.png")

1


7


In [21]:
for epoch_size in destination_abb.num_days_per_epoch.unique():
    print(epoch_size)
    fig = plot_cdf_accuracy(
        df=destination_abb,
        epoch_size=epoch_size,
        by_destination=True,
        log_y=False,
        category_orders={
            "destination": [
                "319A2",
                "F122B",
                "9D9E9",
                "9FF55"
            ]
        }
    )
    fig.write_image(f"criteo/e2e_rmsre_dest_rel_acc_epoch_{epoch_size}.png")

1


7


### Results
Turns out, varying epoch size does not impact accuracy all that much.


# Varying Number of Queries

In [22]:
path = "ray/criteo/bias_varying_num_queries"
t = .90
results = analyze_results(path, "bias", parallelize=False, t=t)
results.head(20)

Unnamed: 0,destination,workload_size,requested_workload_size,fraction_queries_without_null_reports,null_report_bias_average_relative_accuracy,fraction_queries_relatively_accurate_e2e,e2e_bias_average_relative_accuracy,e2e_rmsre_accuracy,baseline,num_days_per_epoch,initial_budget,e2e_bias_relative_accuracies,null_report_bias_relative_accuracies,rmsre_accuracies,num_days_attribution_window
0,319A2412BDB0EF669733053640B80112,375,375,0.0,0.156188,0.0,0.156435,0.155847,user_epoch_ara,7,1.0,"[0.24461792960660012, 0.15984102003711842, 0.0...","[0.1968911917098446, 0.16455696202531644, 0.13...","[0.19609362124314733, 0.16382475759653858, 0.1...",30
1,9D9E93D1D461D7BAE47FB67EC0E01B62,375,375,0.0,0.305049,0.0,0.305246,0.304903,user_epoch_ara,7,1.0,"[0.2685738508013813, 0.27723426893839065, 0.25...","[0.29281045751633983, 0.27176781002638517, 0.2...","[0.29257978060197665, 0.2715396406558336, 0.25...",30
2,9FF550C0B17A3C493378CB6E2DEEE6E4,375,375,0.0,0.27151,0.0,0.273026,0.271287,user_epoch_ara,7,1.0,"[0.2750243455070117, 0.19963425092160525, 0.20...","[0.274798927613941, 0.1994535519125683, 0.2102...","[0.2745623755502844, 0.19923098295363806, 0.20...",30
3,F122B91F6D102E4630817566839A4F1F,375,375,0.0,0.282076,0.0,0.282543,0.281872,user_epoch_ara,7,1.0,"[0.27863701696655296, 0.32813146418886885, 0.2...","[0.2703101920236337, 0.2885431400282885, 0.238...","[0.27002474001568055, 0.2882746894569618, 0.23...",30
0,319A2412BDB0EF669733053640B80112,135,135,0.0,0.461663,0.0,0.465091,0.461132,cookiemonster,7,1.0,"[0.3729608070765298, 0.46254506645527316, 0.44...","[0.40458015267175573, 0.45964125560538116, 0.4...","[0.40354275067548406, 0.4587536394285223, 0.44...",30
1,9D9E93D1D461D7BAE47FB67EC0E01B62,135,135,0.0,0.8848,0.57037,0.883153,0.880997,cookiemonster,7,1.0,"[0.8393926412124645, 0.823789986930529, 0.5212...","[0.8326797385620915, 0.828125, 0.5181818181818...","[0.831707434202966, 0.8273243109686276, 0.5177...",30
2,9FF550C0B17A3C493378CB6E2DEEE6E4,135,135,0.125926,0.750074,0.42963,0.743822,0.743514,cookiemonster,7,1.0,"[0.6612023029872252, 0.48020968691071453, 0.59...","[0.657103825136612, 0.4654654654654654, 0.5927...","[0.6565845229871884, 0.46506289128055944, 0.59...",30
3,F122B91F6D102E4630817566839A4F1F,135,135,0.022222,0.799952,0.385185,0.796553,0.79516,cookiemonster,7,1.0,"[0.7627356186502293, 0.721118953216062, 0.5091...","[0.7578659370725034, 0.7510853835021708, 0.517...","[0.757129080942436, 0.7502832844643783, 0.5172...",30
0,319A2412BDB0EF669733053640B80112,75,75,0.0,0.757105,0.28,0.757352,0.755173,user_epoch_ara,7,1.0,"[0.48269336942255947, 0.5637765956969145, 0.57...","[0.4783715012722646, 0.5582959641255605, 0.572...","[0.4771876566450166, 0.5572105404417251, 0.571...",30
1,9D9E93D1D461D7BAE47FB67EC0E01B62,75,75,0.026667,0.958032,1.0,0.958501,0.954712,user_epoch_ara,7,1.0,"[0.9888609551728, 0.976051440131495, 0.9809849...","[1.0, 1.0, 0.9951219512195122, 0.9941037735849...","[0.9819357309890772, 0.9833904257291395, 0.982...",30


In [23]:
destination_abb = results.assign(
    destination_abb = results.apply(
        lambda r: r.destination[0:5],
        axis=1,
    )
)
destination_abb.drop(columns=["destination"], inplace=True)
destination_abb.rename(columns={"destination_abb": "destination"}, inplace=True)

In [24]:

for workload_size in results.requested_workload_size.unique():
    print(workload_size)
    fig = plot_cdf_accuracy(
        df=results,
        workload_size=workload_size,
        by_destination=False,
        log_y=False,
    )
    fig.write_image(f"criteo/e2e_rmsre_rel_acc_ws_{workload_size}.png")

375


135


75


450


300


225


In [25]:
for workload_size in destination_abb.requested_workload_size.unique():
    print(workload_size)
    fig = plot_cdf_accuracy(
        df=destination_abb, 
        workload_size=workload_size,
        by_destination=True,
        log_y=False,
        category_orders={
            "destination": [
                "319A2",
                "F122B",
                "9D9E9",
                "9FF55"
            ]
        }
    )
    fig.write_image(f"criteo/e2e_rmsre_dest_rel_acc_ws_{workload_size}.png")

375


135


75


450


300


225


In [26]:
plot_null_reports_analysis(df=destination_abb, save_dir="criteo")