In [None]:
import json
import math
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import ks_2samp

In [None]:
# LTR, no defense, no manipulation

ltr_MQ2007 = pd.read_csv("../output/ltr_MQ2007_metrics.csv")
ltr_MSLR10K = pd.read_csv("../output/ltr_MSLR10K_metrics.csv")

datasets = {"MQ2007": ltr_MQ2007, "MSLR-WEB10K": ltr_MSLR10K}
linestyle = {"linear": "-", "neural": "--"}
marker = {"informational": "o", "navigational": "^"}
color = {"MQ2007": "#ff7f0e", "MSLR-WEB10K": "#1f77b4"}

for dataset_name, df in datasets.items():
    df = df[["name", "auc"]].groupby("name").describe()
    df.columns = df.columns.droplevel()

    df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
    df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
    df["model"] = df.index.map(lambda x: x.split("_")[0]).values
    df["data"] = dataset_name

    df = df[(df.index.str.contains("eps_inf")) & (df["query"] <= 16) & (df["model"] != "random")]

    df = df[["mean", "query", "model", "click_model", "data"]].reset_index(drop=True)

    for model in ["linear", "neural"]:
        for click_model in ["informational", "navigational"]:
            line_df = df[(df["model"] == model) & (df["click_model"] == click_model)].sort_values(by=["query"])
            plt.plot(line_df["query"].astype(str).tolist(), line_df["mean"].tolist(), linestyle=linestyle[model], marker=marker[click_model], color=color[dataset_name])

plt.xlabel('Number of queries')
plt.ylabel('Mean AUC')
plt.tight_layout()

legend_entries = []
for label, value in linestyle.items():
    legend_entries.append(mlines.Line2D([], [], color='black', linestyle=value, label=label))
for label, value in marker.items():
    legend_entries.append(mlines.Line2D([], [], color='black', marker=value, label=label))
for label, value in color.items():
    legend_entries.append(mlines.Line2D([], [], color=value, label=label))
plt.legend(handles=legend_entries)

plt.savefig("../plots/plain_metrics.pdf", bbox_inches='tight')
plt.show()


In [None]:
# LTR, no defense, with manipulation

# df = pd.read_csv("../output/ltr_MQ2007_metrics.csv")
# df_manipulated = pd.read_csv("../output/ltr_MQ2007_multibatch_manipulation_metrics.csv")
# df_nn_manipulated = pd.read_csv("../output/ltr_MQ2007_nn_manipulation_metrics.csv")

df = pd.read_csv("../output/ltr_MSLR10K_metrics_new.csv")
df_manipulated = pd.read_csv("../output/ltr_MSLR10K_metrics_manipulation_new.csv")
# df_nn_manipulated = pd.read_csv("../output/ltr_MSLR10K_nn_manipulation_metrics.csv")

# datasets = {"no ADM": df, "with ADM": df_manipulated, "with ADM+NN": df_nn_manipulated}
datasets = {"no ADM": df, "with ADM": df_manipulated,}
linestyle = {"linear": "-", "neural": "--"}
marker = {"informational": "o", "navigational": "^"}
# color = {"with ADM+NN": "#d62728", "with ADM": "#ff7f0e", "no ADM": "#1f77b4"}
color = {"with ADM": "#ff7f0e", "no ADM": "#1f77b4"}
xoffsets = {"no ADM": -0.4, "with ADM": 0}
xoffset_delta = 0.2

plt.axhline(y=0.5, color='grey', linestyle='-', alpha=0.5, linewidth=0.5)
for dataset_name, df in datasets.items():
    xoffset = xoffsets[dataset_name]
    df = df[["name", "auc"]].groupby("name").describe()
    df.columns = df.columns.droplevel()

    df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
    df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
    df["model"] = df.index.map(lambda x: x.split("_")[0]).values
    df["data"] = dataset_name

    df = df[(df.index.str.contains("eps_inf") | df.index.str.contains("manipulated")) & (df["query"] <= 16) & (df["query"] >= 1) & (df["model"] != "random")]

    df = df[["mean", "std", "query", "model", "click_model", "data"]].reset_index(drop=True)

    for model in ["linear", "neural"]:
        for click_model in ["informational", "navigational"]:
            line_df = df[(df["model"] == model) & (df["click_model"] == click_model)].sort_values(by=["query"])
            mean = line_df["mean"].tolist()
            std = line_df["std"].tolist()
            upper_errbar = []
            for i in range(len(mean)):
                upper_errbar.append(std[i] - max(0, std[i] + mean[i] - 1.0))

            eb = plt.errorbar([x + xoffset for x in line_df["query"].tolist()], mean, yerr=[std, upper_errbar],
                              linestyle=linestyle[model], marker=marker[click_model], color=color[dataset_name],
                              markersize=5, elinewidth=0.5, capsize=2)
            eb[-1][0].set_linestyle(linestyle[model])
            xoffset += xoffset_delta

plt.xticks([1, 4, 8, 12, 16])
plt.xlabel('Number of queries per user')
plt.ylabel('Mean AUC')
plt.tight_layout()

legend_entries = []
for label, value in color.items():
    legend_entries.append(mlines.Line2D([], [], color=value, label=label))
for label, value in linestyle.items():
    legend_entries.append(mlines.Line2D([], [], color='black', linestyle=value, label=label))
for label, value in marker.items():
    legend_entries.append(mlines.Line2D([], [], color='black', marker=value, label=label))
# plt.legend(handles=legend_entries, ncols=3, columnspacing=0.5, handlelength=1.5, labelspacing=0.25, borderpad=0.3, borderaxespad=0.1, fontsize="small")
plt.legend(handles=legend_entries, ncols=3, fontsize="small")

plt.savefig("../plots/MSLR10K_ADM_metrics_eb.pdf", bbox_inches='tight')
plt.show()


In [None]:
# LTR, DP


df_normal = pd.read_csv("../output/ltr_MSLR10K_DP_metrics_new_sens_0.5.csv")
df_manipulated = pd.read_csv("../output/ltr_MSLR10K_metrics_manipulation_new_sens_0.5.csv")
datasets = {"with ADM": df_manipulated, "no ADM": df_normal}
linestyle = {"with ADM": "-", "no ADM": "--"}
marker = {"informational": "o", "navigational": "^"}
# color = {1: "#d62728", 4: "#ff7f0e", 8: "#2ca02c", 12: "#1f77b4", 16: "#9467bd"}
color = {4: "#ff7f0e", 8: "#2ca02c", 12: "#1f77b4", 16: "#9467bd"}
xoffsets = {"with ADM": -0.15, "no ADM": -0.10}

xoffset_delta = 0.1
xpos = [1, 2, 3, 4, 5]

for model in ["linear", "neural"]:
    plt.axhline(y=0.5, color='grey', linestyle='-', alpha=0.5, linewidth=0.5)

    for dataset_name, df in datasets.items():
        xoffset = xoffsets[dataset_name]
        df = df[["name", "auc"]].groupby("name").describe()
        df.columns = df.columns.droplevel()
        df["eps"] = df.index.map(lambda x: float(x.split("_")[x.split("_").index("eps") + 1])).values
        df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
        df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
        df["model"] = df.index.map(lambda x: x.split("_")[0]).values
        df = df[(df["eps"] != math.inf) & (df["query"] <= 16) & (df["model"] != "random")]
        df = df[["mean", "std", "query", "model", "click_model", "eps"]].reset_index(drop=True)

        for click_model in ["informational"]:
            for query in [4, 8, 12, 16]:
                line_df = df[(df["model"] == model) & (df["click_model"] == click_model) & (df["query"] == query)].sort_values(by=["eps"])

                mean = line_df["mean"].tolist()
                if (len(mean) == 0):
                    continue

                std = line_df["std"].tolist()
                upper_errbar = []
                for i in range(len(mean)):
                    upper_errbar.append(std[i] - max(0, std[i] + mean[i] - 1.0))
                
                eb = plt.errorbar([x + xoffset for x in xpos], mean, yerr=[std, upper_errbar],
                                linestyle=linestyle[dataset_name], marker=marker[click_model], color=color[query],
                                linewidth=1, alpha=0.9, markersize=5, elinewidth=0.5, capsize=2)
                eb[-1][0].set_linestyle(linestyle[dataset_name])
                
                xoffset += xoffset_delta

    plt.xticks(xpos, line_df["eps"].astype(str).tolist())
    plt.xlabel('Epsilon')
    plt.ylabel('Mean AUC')
    plt.tight_layout()

    legend_entries = []
    for label, value in color.items():
        legend_entries.append(mlines.Line2D([], [], color=value, label=f"{label} {'queries' if label > 1 else 'query'}"))
    for label, value in linestyle.items():
        legend_entries.append(mlines.Line2D([], [], color='black', linestyle=value, label=label))
    # for label, value in marker.items():
    #     legend_entries.append(mlines.Line2D([], [], color='black', marker=value, label=label))
    # plt.legend(handles=legend_entries, ncols=2, columnspacing=0.5, labelspacing=0.1, handlelength=1.5, fontsize="small")
    plt.legend(handles=legend_entries)

    plt.savefig(f"../plots/MSLR10K_DP_{model}_metrics_new.pdf", bbox_inches='tight')
    plt.show()


In [None]:
# LTR, manipulation with normally distributed noise


df_manipulated_zero = pd.read_csv("../output/ltr_MSLR10K_metrics_manipulation_new.csv")
df_manipulated_normal = pd.read_csv("../output/ltr_MSLR10K_metrics_manipulation_new_sens_0.5_std_mean_0.0.csv")
df = pd.concat([df_manipulated_zero, df_manipulated_normal])
df = df[["name", "auc"]].groupby("name").describe()
df.columns = df.columns.droplevel()
df["eps"] = df.index.map(lambda x: float(x.split("_")[x.split("_").index("eps") + 1])).values
df["adm_std"] = df.index.map(lambda x: float(0 if "std" not in x.split("_") else x.split("_")[x.split("_").index("std") + 1])).values
df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
df["model"] = df.index.map(lambda x: x.split("_")[0]).values
df = df[(df["eps"] == math.inf) & (df["query"] >= 4) & (df["query"] <= 16) & (df["model"] != "random")]
df = df[["mean", "std", "query", "model", "click_model", "adm_std"]].reset_index(drop=True)

linestyle = {"linear": "-", "neural": "--"}
marker = {"informational": "o", "navigational": "^"}
# color = {1: "#d62728", 4: "#ff7f0e", 8: "#2ca02c", 12: "#1f77b4", 16: "#9467bd"}
color = {4: "#ff7f0e", 8: "#2ca02c", 12: "#1f77b4", 16: "#9467bd"}
xoffset = -0.35
xoffset_delta = 0.05
xpos = [1, 2, 3, 4, 5]

plt.axhline(y=0.5, color='grey', linestyle='-', alpha=0.5, linewidth=0.5)
for model in ["linear", "neural"]:    
    for query in [4, 8, 12, 16]:
        for click_model in ["informational", "navigational"]:
            line_df = df[(df["model"] == model) & (df["click_model"] == click_model) & (df["query"] == query)].sort_values(by=["adm_std"])

            mean = line_df["mean"].tolist()
            if (len(mean) == 0):
                continue

            std = line_df["std"].tolist()
            upper_errbar = []
            for i in range(len(mean)):
                upper_errbar.append(std[i] - max(0, std[i] + mean[i] - 1.0))
            
            eb = plt.errorbar([x + xoffset for x in xpos], mean, yerr=[std, upper_errbar],
                            linestyle=linestyle[model], marker=marker[click_model], color=color[query],
                            linewidth=1, alpha=0.9, markersize=3, elinewidth=0.5, capsize=2)
            eb[-1][0].set_linestyle(linestyle[model])
            
            xoffset += xoffset_delta

plt.xticks(xpos, line_df["adm_std"].astype(str).tolist())
plt.xlabel('Standard Deviation')
plt.ylabel('Mean AUC')
plt.tight_layout()

legend_entries = []
for label, value in color.items():
    legend_entries.append(mlines.Line2D([], [], color=value, label=f"{label} {'queries' if label > 1 else 'query'}"))
for label, value in linestyle.items():
    legend_entries.append(mlines.Line2D([], [], color='black', linestyle=value, label=label))
for label, value in marker.items():
    legend_entries.append(mlines.Line2D([], [], color='black', marker=value, label=label))
plt.legend(handles=legend_entries, loc="lower right", ncols=4, columnspacing=0.5,
           labelspacing=0.1, handlelength=1.5, borderaxespad=0.1, fontsize="small")

plt.savefig(f"../plots/MSLR10K_ADM_std.pdf", bbox_inches='tight')
plt.show()


In [None]:
# LTR, DP, with manipulation

linestyle = {"linear": "-", "neural": "--"}
marker = {"informational": "o", "navigational": "^"}
color = {4: "#ff7f0e", 8: "#2ca02c", 12: "#1f77b4", 16: "#9467bd"}

df = pd.read_csv("../output/ltr_MSLR10K_manipulation_DP_metrics.csv")
df = df[["name", "auc"]].groupby("name").describe()
df.columns = df.columns.droplevel()
df["eps"] = df.index.map(lambda x: float(x.split("_")[x.split("_").index("eps") + 1])).values
df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
df["model"] = df.index.map(lambda x: x.split("_")[0]).values
df = df[(df["eps"] != math.inf) & (df["query"] <= 16) & (df["model"] != "random")]
df = df[["mean", "query", "model", "click_model", "eps"]].reset_index(drop=True)

for model in ["linear", "neural"]:
    for click_model in ["informational", "navigational"]:
        for query in [4, 8, 12, 16]:
            line_df = df[(df["model"] == model) & (df["click_model"] == click_model) & (df["query"] == query)].sort_values(by=["eps"])
            plt.plot(line_df["eps"].astype(str).tolist(), line_df["mean"].tolist(), linestyle=linestyle[model], marker=marker[click_model], color=color[query])

plt.xlabel('Epsilon')
plt.ylabel('Mean AUC')
plt.tight_layout()

legend_entries = []
for label, value in color.items():
    legend_entries.append(mlines.Line2D([], [], color=value, label=f"{label} {'queries' if label > 1 else 'query'}"))
for label, value in linestyle.items():
    legend_entries.append(mlines.Line2D([], [], color='black', linestyle=value, label=label))
for label, value in marker.items():
    legend_entries.append(mlines.Line2D([], [], color='black', marker=value, label=label))
plt.legend(handles=legend_entries, ncols=2)

plt.savefig("../plots/MSLR10K_DP_manipulation_metrics.pdf", bbox_inches='tight')
plt.show()


In [None]:
# LTR, pruning, no manipulation

linestyle = {"linear": "-", "neural": "--"}
marker = {"informational": "o", "navigational": "^"}
color = {1: "#d62728", 4: "#ff7f0e", 8: "#2ca02c", 12: "#1f77b4", 16: "#9467bd"}

df = pd.read_csv("../output/ltr_MQ2007_manipulated_pruned_metrics.csv")
df2 = pd.read_csv("../output/ltr_MQ2007_multibatch_manipulation_metrics.csv")
df = pd.concat([df, df2])

df = df[["name", "auc"]].groupby("name").describe()
df.columns = df.columns.droplevel()
df["model"] = df.index.map(lambda x: x.split("_")[0]).values
df = df[(df["model"] != "random")]
df["pct"] = df.index.map(lambda x: 0.0 if "prune" not in x else float(x.split("_")[x.split("_").index("prune") + 1])).values
df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
df = df[["mean", "query", "model", "click_model", "pct"]].reset_index(drop=True)

for model in ["linear", "neural"]:
    for click_model in ["informational", "navigational"]:
        for query in [4, 8, 12, 16]:
            line_df = df[(df["model"] == model) & (df["click_model"] == click_model) & (df["query"] == query)].sort_values(by=["pct"])
            plt.plot(line_df["pct"].astype(str).tolist(), line_df["mean"].tolist(), linestyle=linestyle[model], marker=marker[click_model], color=color[query])


df = pd.read_csv("../output/ltr_MQ2007_pruned_metrics.csv")
df2 = pd.read_csv("../output/ltr_MQ2007_metrics.csv")
df2 = df2[df2["name"].str.contains("1_query_eps_inf")]
df = pd.concat([df, df2])

df = df[["name", "auc"]].groupby("name").describe()
df.columns = df.columns.droplevel()
df["model"] = df.index.map(lambda x: x.split("_")[0]).values
df = df[(df["model"] != "random")]
df["pct"] = df.index.map(lambda x: 0.0 if "prune" not in x else float(x.split("_")[x.split("_").index("prune") + 1])).values
df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
df = df[["mean", "query", "model", "click_model", "pct"]].reset_index(drop=True)

for model in ["linear", "neural"]:
    for click_model in ["informational", "navigational"]:
        for query in [1]:
            line_df = df[(df["model"] == model) & (df["click_model"] == click_model) & (df["query"] == query)].sort_values(by=["pct"])
            plt.plot(line_df["pct"].astype(str).tolist(), line_df["mean"].tolist(), linestyle=linestyle[model], marker=marker[click_model], color=color[query])

plt.xlabel('Prune percentage')
plt.ylabel('Mean AUC')
plt.tight_layout()

legend_entries = []
for label, value in color.items():
    legend_entries.append(mlines.Line2D([], [], color=value, label=f"{label} {'queries' if label > 1 else 'query'}"))
for label, value in linestyle.items():
    legend_entries.append(mlines.Line2D([], [], color='black', linestyle=value, label=label))
for label, value in marker.items():
    legend_entries.append(mlines.Line2D([], [], color='black', marker=value, label=label))
plt.legend(handles=legend_entries, ncols=2, columnspacing=0.5, handlelength=1.5, labelspacing=0.25, borderpad=0.3, borderaxespad=0.3)

plt.savefig("../plots/MQ2007_pruned_metrics.pdf", bbox_inches='tight')
plt.show()


In [None]:
raw_df = pd.read_csv("../output/rec_ML100K_metrics.csv")
raw_df["emb_err"] = raw_df["extra_data"].apply(lambda x: json.loads(x).get("est_user_emb_err", None))

df = raw_df[["name", "auc"]].groupby("name").describe()
df.columns = df.columns.droplevel()
df = df.reset_index()
df = df[df["name"].str.contains("eps_inf")]
print(df.to_string())

df = raw_df[["name", "emb_err"]].groupby("name").describe()
df.columns = df.columns.droplevel()
df = df.reset_index()
df = df[df["name"].str.contains("eps_inf")]
print(df.to_string())

df = raw_df
embedding_dim = 64
epsilons = [math.inf]
metric = "auc"
for epsilon in epsilons:
    print(f"Epsilon {epsilon}:")
    # Vs random
    for model in ["FCF", "FNCF"]:
        for type in ["simple", "joint"]:
            print(f"{model} {type} vs Random {metric} p-value:", ks_2samp(
                    df[df["name"] == f"{model}_{type}_emb_{embedding_dim}_eps_{epsilon}"].loc[:, metric],
                    df[df["name"] == f"Random_emb_{embedding_dim}_eps_{epsilon}"].loc[:, metric],
                    alternative="less",
                ).pvalue)

    # FCF
    print(f"FCF joint vs FCF simple {metric} p-value:", ks_2samp(
            df[df["name"] == f"FCF_joint_emb_{embedding_dim}_eps_{epsilon}"].loc[:, metric],
            df[df["name"] == f"FCF_simple_emb_{embedding_dim}_eps_{epsilon}"].loc[:, metric],
            alternative="less",
        ).pvalue)

    # FNCF
    pairs = [("joint", "simple"), ("joint_model", "joint"), ("simple_model", "simple"), ("joint", "simple_model")]
    for model1, model2 in pairs:
        pvalue = ks_2samp(
            df[df["name"] == f"FNCF_{model1}_emb_{embedding_dim}_eps_{epsilon}"].loc[:, metric],
            df[df["name"] == f"FNCF_{model2}_emb_{embedding_dim}_eps_{epsilon}"].loc[:, metric],
            alternative="less",
        ).pvalue
        print(f"FNCF {model1} vs FNCF {model2} {metric} p-value: {pvalue:.2e}")



In [None]:
# Rec, DP
df = pd.read_csv("../output/rec_ML100K_metrics.csv")
df = df[["name", "auc"]].groupby("name").describe()
df.columns = df.columns.droplevel()
df = df.reset_index()

df["eps"] = df["name"].apply(lambda x: float(x.split("_")[-1]))
df = df[(df["eps"] != math.inf) & (df["eps"] != 250)]
# print(df.to_string())

xoffset = -0.15
xoffset_delta = 0.05
xpos = [1, 2, 3, 4, 5]

models = {
    "FCF_simple": ("black", "FCF"),
    "FCF_joint": ("#9467bd", "FCF with user embedding"),
    "FNCF_simple_emb": ("#1f77b4", "FNCF"),
    "FNCF_joint_emb": ("#2ca02c", "FNCF with user embedding"),
    "FNCF_simple_model": ("#ff7f0e", "FNCF with model parameters"),
    "FNCF_joint_model": ("#d62728", "FNCF with user embedding + model parameters"),
}
epsilons = ["1.0", "10.0", "20.0", "100.0", "500.0"]

plt.axhline(y=0.5, color='grey', linestyle='-', alpha=0.5, linewidth=0.5)

for model_name, (color, label) in models.items():
    line_df = df[df["name"].str.contains(model_name)].sort_values(by=["eps"])
    plt.errorbar([x + xoffset for x in xpos], line_df["mean"].tolist(), yerr=line_df["std"].tolist(),
                 marker="o", color=color, label=label,
                 markersize=5, elinewidth=0.5, capsize=2, linewidth=1.0)
    xoffset += xoffset_delta

plt.xticks(xpos, epsilons)
plt.xlabel('Epsilon')
plt.ylabel('Mean AUC')
plt.tight_layout()
plt.legend(fontsize="small", loc="upper left")

plt.savefig("../plots/ML100K_DP_metrics_new.pdf", bbox_inches='tight')

plt.show()

In [None]:
df = pd.read_csv("../output/rec_ML100K_pruned_metrics.csv")
df2 = pd.read_csv("../output/rec_ML100K_metrics.csv")
df2 = df2[df2["name"].str.contains("eps_inf")]
df = pd.concat([df2, df])
df = df[["name", "auc"]].groupby("name").describe()
df.columns = df.columns.droplevel()
df = df.reset_index()

print(df.to_string())

prune_pct = ["0.0", "0.1", "0.3", "0.5", "0.7", "0.9", "0.99"]
# plt.plot(prune_pct, df[df["name"].str.contains("FCF_simple")]["mean"].tolist(), marker="o", color="black", label="FCF")
plt.plot(prune_pct, df[df["name"].str.contains("FCF_joint")]["mean"].tolist(), marker="o", color="#9467bd", label="FCF with user embedding")
plt.plot(prune_pct, df[df["name"].str.contains("FNCF_simple_emb")]["mean"].tolist(), marker="o", color="#1f77b4", label="FNCF")
plt.plot(prune_pct, df[df["name"].str.contains("FNCF_joint_emb")]["mean"].tolist(), marker="o", color="#2ca02c", label="FNCF with user embedding")
plt.plot(prune_pct, df[df["name"].str.contains("FNCF_simple_model")]["mean"].tolist(), marker="o", color="#ff7f0e", label="FNCF with model parameters")
plt.plot(prune_pct, df[df["name"].str.contains("FNCF_joint_model")]["mean"].tolist(), marker="o", color="#d62728", label="FNCF with user embedding + model parameters")

plt.xlabel('Prune percentage')
plt.ylabel('Mean AUC')
plt.tight_layout()
plt.legend(labelspacing=0.25)

plt.savefig("../plots/ML100K_pruned_metrics.pdf", bbox_inches='tight')

plt.show()

In [None]:
# Appendix: LTR metrics

df = pd.read_csv("../output/ltr_MSLR10K_DP_metrics.csv")
metrics = ["f1", "auc", "auc-pr"]
df = df[["name"] + metrics].groupby("name").describe()
df = df.reset_index()

for model_name in ["linear_pdgd", "neural_16_8_pdgd"]:
    for epsilon in [1.0, 10.0, 100.0, 250.0, 500.0, math.inf]:
        for num_query in [1, 4, 8, 12, 16]:
            res = ""
            nav_df = df[df["name"] == f"{model_name}_navigational_{num_query}_query_eps_{epsilon}"]
            inf_df = df[df["name"] == f"{model_name}_informational_{num_query}_query_eps_{epsilon}"]

            for metric in metrics:
                nav_value = nav_df[(metric, )]["mean"].to_list()[0]
                inf_value = inf_df[(metric, )]["mean"].to_list()[0]
                res += f" & {nav_value:.2f} ({inf_value:.2f})"
            res += " \\\\"
            print(f"{model_name.split('_')[0]} & {epsilon} & {num_query}{res}")
    print("\\hline")

In [None]:
df = pd.read_csv("../output/ltr_MSLR10K_manipulation_DP_metrics.csv")
df2 = pd.read_csv("../output/ltr_MSLR10K_multibatch_manipulation_metrics.csv")
df = pd.concat([df, df2])
metrics = ["f1", "auc", "auc-pr"]
df = df[["name"] + metrics].groupby("name").describe()
df = df.reset_index()

print(df.to_string())
for model_name in ["linear_pdgd", "neural_16_8_pdgd"]:
    for epsilon in [1.0, 10.0, 100.0, 250.0, 500.0, math.inf]:
        for num_query in [4, 8, 12, 16]:
            res = ""
            nav_df = df[df["name"] == f"{model_name}_navigational_{num_query}_query_eps_{epsilon}"]
            inf_df = df[df["name"] == f"{model_name}_informational_{num_query}_query_eps_{epsilon}"]

            for metric in metrics:
                nav_value = nav_df[(metric, )]["mean"].to_list()[0]
                inf_value = inf_df[(metric, )]["mean"].to_list()[0]
                res += f" & {nav_value:.2f} ({inf_value:.2f})"
            print(f"{model_name.split('_')[0]} & {epsilon} & {num_query}{res} \\\\")
    print("\\hline")

In [None]:
# Appendix: Rec

df = pd.read_csv("../output/rec_ML100K_metrics.csv")

for model_name in ["FCF_simple", "FCF_joint", "FNCF_simple", "FNCF_joint", "FNCF_simple_model", "FNCF_joint_model"]:
    p_values = []
    for epsilon in [1.0, 10.0, 20.0, 100.0, 500.0]:
        pvalue = ks_2samp(
            df[df["name"] == f"{model_name}_emb_64_eps_{epsilon}"]["auc"].tolist(),
            df[df["name"] == f"Random_emb_64_eps_{epsilon}"]["auc"].tolist(),
            alternative="less",
        ).pvalue
        p_values.append(pvalue)

    eps_str = ""
    for p in p_values:
        if p < 0.05:
            eps_str += " & \\textbf{" + f"{p:.2e}" + "}"
        else:
            eps_str += f" & {p:.2e}"
    print(f"{model_name}{eps_str} \\\\")


In [None]:
# Appendix: K-S p-values for LTR
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 
df = pd.read_csv("../output/ltr_MSLR10K_DP_metrics_new_sens_0.5.csv")
for model_name in ["linear_pdgd", "neural_16_8_pdgd"]:
    for click_model in ["navigational", "informational"]:
        for num_query in [1, 4, 8, 12, 16]:
            p_values = []
            for epsilon in [1.0, 10.0, 20.0, 100.0, 500.0]:
                pvalue = ks_2samp(
                    df[df["name"] == f"{model_name}_{click_model}_{num_query}_query_eps_{epsilon}"].loc[:, "auc"],
                    df[df["name"] == f"random_{click_model}_{num_query}_query_eps_{epsilon}"].loc[:, "auc"],
                    alternative="less",
                ).pvalue
                p_values.append(pvalue)

            eps_str = ""
            for p in p_values:
                if p < 0.05:
                    eps_str += " & \\textbf{" + f"{p:.2e}" + "}"
                else:
                    eps_str += f" & {p:.2e}"
            print(f"{'linear' if model_name == 'linear_pdgd' else 'neural'} & {click_model[:3]} & {num_query}{eps_str} \\\\")
        print("\hline")

In [None]:
df = pd.read_csv("../output/ltr_MSLR10K_metrics_manipulation_new_sens_0.5.csv")
df2 = pd.read_csv("../output/ltr_MSLR10K_DP_metrics_new_sens_0.5.csv")
for model_name in ["linear_pdgd", "neural_16_8_pdgd"]:
    for click_model in ["navigational", "informational"]:
        for num_query in [4, 8, 12, 16]:
            p_values = []
            for epsilon in [1.0, 10.0, 20.0, 100.0, 500.0]:
                pvalue = ks_2samp(
                    df[df["name"] == f"{model_name}_{click_model}_{num_query}_query_eps_{epsilon}"].loc[:, "auc"],
                    df2[df2["name"] == f"random_{click_model}_{num_query}_query_eps_{epsilon}"].loc[:, "auc"],
                    alternative="less",
                ).pvalue
                p_values.append(pvalue)

            eps_str = ""
            for p in p_values:
                if p < 0.05:
                    eps_str += " & \\textbf{" + f"{p:.2e}" + "}"
                else:
                    eps_str += f" & {p:.2e}"
            print(f"{'linear' if model_name == 'linear_pdgd' else 'neural'} & {click_model[:3]} & {num_query}{eps_str} \\\\")
        print("\hline")