In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import plotly.io as pio
pio.renderers.default = "notebook"
import plotly.offline as pyo
import plotly.express as px
pyo.init_notebook_mode(connected=True)
from utils import analyze_results, plot_null_reports_analysis

In [7]:
# conversions = pd.read_csv("../data/criteo/archive/criteo_query_pool_large_conversions.csv")
# advertiser_query_counts = (conversions[["partner_id", "key"]]
#     .drop_duplicates()
#     .groupby(["partner_id"])
#     .key
#     .count()
#     .rename_axis("destination"))
# advertiser_query_counts = advertiser_query_counts.reset_index()

In [6]:
path = "ray/criteo/large/bias_varying_workload_size"
t = .90
results = analyze_results(path, "bias", parallelize=False, t=t)


0 sum for ('319A2412BDB0EF669733053640B80112',), __116
0 sum for ('319A2412BDB0EF669733053640B80112',), __290
0 sum for ('9D9E93D1D461D7BAE47FB67EC0E01B62',), __118
0 sum for ('9D9E93D1D461D7BAE47FB67EC0E01B62',), __228
0 sum for ('9FF550C0B17A3C493378CB6E2DEEE6E4',), __185
0 sum for ('9FF550C0B17A3C493378CB6E2DEEE6E4',), __42
0 sum for ('9FF550C0B17A3C493378CB6E2DEEE6E4',), __231
0 sum for ('E3DDEB04F8AFF944B11943BB57D2F620',), __162
0 sum for ('E3DDEB04F8AFF944B11943BB57D2F620',), __163
0 sum for ('E3DDEB04F8AFF944B11943BB57D2F620',), __164
0 sum for ('E3DDEB04F8AFF944B11943BB57D2F620',), __165
0 sum for ('319A2412BDB0EF669733053640B80112',), __116
0 sum for ('9D9E93D1D461D7BAE47FB67EC0E01B62',), __118
0 sum for ('9FF550C0B17A3C493378CB6E2DEEE6E4',), __185
0 sum for ('E3DDEB04F8AFF944B11943BB57D2F620',), __163
0 sum for ('319A2412BDB0EF669733053640B80112',), __116
0 sum for ('319A2412BDB0EF669733053640B80112',), __290
0 sum for ('9D9E93D1D461D7BAE47FB67EC0E01B62',), __118
0 sum for (

In [None]:
# results = pd.merge(advertiser_query_counts, results, how="inner", on="destination")
# results = results.loc[(results.requested_workload_size - results.key < 5)] # distance between buckets
# results

In [8]:
requested_workload_sizes = results.requested_workload_size.unique()
advertisers = results.destination.unique()
baselines = results.baseline.unique()
attribution_windows = results.num_days_attribution_window.unique()

records = []
for requested_workload_size in requested_workload_sizes:
    for advertiser in advertisers:
        for baseline in baselines:
            for attribution_window in attribution_windows:
                section = results[
                    (results.baseline == baseline) &
                    (results.destination == advertiser) &
                    (results.requested_workload_size == requested_workload_size) &
                    (results.num_days_attribution_window == attribution_window)
                ]
                for _, row in section.iterrows():
                    accuracies = zip(row.e2e_bias_relative_accuracies, row.null_report_bias_relative_accuracies)
                    for i, dimension in enumerate(accuracies):
                        records.append({
                            "e2e_bias_accuracy": dimension[0],
                            "null_report_bias_accuracy": dimension[1],
                            "requested_workload_size": requested_workload_size,
                            "advertiser": advertiser,
                            "baseline": baseline,
                            "attribution_window": attribution_window,
                        })
p = pd.DataFrame.from_records(records)

In [28]:
workload_size = 320
attribution_window = 90
ds = p.loc[(p.requested_workload_size == workload_size) & (p.e2e_bias_accuracy >= 0) & (p.attribution_window == attribution_window)]
ds.head()

Unnamed: 0,e2e_bias_accuracy,null_report_bias_accuracy,requested_workload_size,advertiser,baseline,attribution_window
318,0.31715,0.282383,320,319A2412BDB0EF669733053640B80112,cookiemonster,90
319,0.216845,0.236641,320,319A2412BDB0EF669733053640B80112,cookiemonster,90
320,0.151697,0.141256,320,319A2412BDB0EF669733053640B80112,cookiemonster,90
321,0.204232,0.20082,320,319A2412BDB0EF669733053640B80112,cookiemonster,90
322,0.174216,0.1939,320,319A2412BDB0EF669733053640B80112,cookiemonster,90


## Thoughts on driving forces behind IPA, User-Epoch ARA and Cookie Monster

### System Differences
1. IPA really suffers quickly. Each advertiser gets a budget across all their queries. It pays query budget in each epoch each time a query is run. So, all else fixed, a lot of queries on the same set of epochs will result in insufficient budget.
2. Each advertiser query gets their own per-epoch budget in User-Epoch ARA. User-Epoch ARA pays query-epsilon amount for each epoch that is scans. So, it lasts longer than IPA.
3. Each advertiser query gets their own per-epoch budget in Cookie Monster. However, unlike User-Epoch ARA, Cookie Monster only pays query-epsilon amount in an epoch if there is a relevant impression found within that epoch. So, it lasts longer than User-Epoch ARA.

### Toggles to exploit differences
1. Running lots of queries is going to show IPA run out of budget fast. We don't have to be clever about how we do this -- with the Criteo dataset, it happens a lot.
2. Forcing User-Epoch ARA to scan a lot of epochs will force it to run out of budget faster than Cookie Monster. So, a mixture of reducing epoch size and increasing attribution window size should result in Cookie Monster performing better.

In [18]:

for log_y in [True, False]:
    title = (
        f"Zoomed in CDF for relative accuracy (workload size {workload_size} attribution window {attribution_window})"
        if log_y else
        f"CDF for relative accuracy (workload size {workload_size} attribution window {attribution_window})"
    )
    filename = (
        f"cdf_zoomed_relative_accuracy_ws_{workload_size}_aw_{attribution_window}"
        if log_y else
        f"cdf_relative_accuracy_ws_{workload_size}_aw_{attribution_window}"
    )
    figcdf = px.ecdf(
        ds,
        y="e2e_bias_accuracy",
        orientation='h',
        color="baseline",
        log_y=log_y,
        
    )
    figcdf.update_layout(
        title=title,
        xaxis_title="proportion of queries",
        yaxis_title="relative accuracy"
    )
    figcdf.show()
    figcdf.write_image(f"./large/{filename}.png")

In [29]:
for log_y in [True, False]:
    title = (
        f"Zoomed in CDF for null report bias accuracy (workload size {workload_size} attribution window {attribution_window})"
        if log_y else
        f"CDF for null report accuracy (workload size {workload_size} attribution window {attribution_window})"
    )
    filename = (
        f"cdf_zoomed_null_report_bias_relative_accuracy_ws_{workload_size}_as_{attribution_window}"
        if log_y else
        f"cdf_null_report_bias_relative_accuracy_ws_{workload_size}_as_{attribution_window}"
    )
    figcdf = px.ecdf(
        ds,
        y="null_report_bias_accuracy",
        orientation='h',
        color="baseline",
        log_y=log_y,
        
    )
    figcdf.update_layout(
        title=title,
        xaxis_title="proportion of queries",
        yaxis_title="relative accuracy"
    )
    figcdf.show()
    figcdf.write_image(f"./large/{filename}.png")

In [1]:
print(p[p.e2e_bias_accuracy < 0].shape)
print(p.shape)

NameError: name 'p' is not defined

In [49]:
# for requested_workload_size in m.requested_workload_size.unique():
#     chunk = m.loc[(m.requested_workload_size == requested_workload_size)]
#     fig = px.line(
#         chunk,
#         x="proportion_of_queries",
#         y="relative_accuracy",
#         range_y=[0, 1],
#         range_x=[0, 1],
#         color="baseline",
#         markers=True,
#         title=f"average relative accuracy per proportion of queries by baseline for workload size {requested_workload_size}"

#     )
#     pyo.iplot(fig)

Can you also plot the following graph: x axis: advertisers (sorted in descending order
of the number of queries in their workload); y axis: fraction of queries that the
advertiser x is able to execute by the end of his experiment with a target relative
accuracy t. This t could be (say) 90% if the workload generation had provisioned the
epsilons for absolute accuracy 95% with 99% probability. You can take a sample of
advertisers, you dont have to run experiments for absolutely all of them!

In [11]:
sorted_df = results.sort_values(by=["key", "baseline"], ascending=False)

KeyError: 'key'

In [16]:
workload_sizes = [5, 10, 15, 20, 25, 30]
for requested_workload_size in workload_sizes:
    content = sorted_df.loc[sorted_df.requested_workload_size == requested_workload_size]
    fig1 = px.bar(
        content,
        x="destination",
        y="fraction_queries_relatively_accurate_e2e",
        color="baseline",
        barmode="group",
        title=f"Frac. queries with >= {t * 100}% rel. accuracy by destination (workload size {requested_workload_size})"
    )
    fig2 = px.bar(
        content,
        x="destination",
        y="e2e_bias_average_relative_accuracy",
        color="baseline",
        barmode="group",
        title=f"Avg. rel. accuracy accross queries by destination (workload size {requested_workload_size})"
    )
    pyo.iplot(fig1)
    pyo.iplot(fig2)
    fig1.write_image(f"./large/e2e_bias_fraction_relative_accuracy_ws_{requested_workload_size}.png")
    fig2.write_image(f"./large/e2e_bias_average_relative_accuracy_ws_{requested_workload_size}.png")

NameError: name 'sorted_df' is not defined

In [27]:
for destination in results.groupby(['destination']).destination.unique():
    advertiser = results[results['destination'].isin(destination)]
    plot_null_reports_analysis(advertiser, save_dir="large")