In [2]:
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# LTR, no defense, no manipulation

ltr_MQ2007 = pd.read_csv("../output/ltr_MQ2007_metrics.csv")
ltr_MSLR10K = pd.read_csv("../output/ltr_MSLR10K_metrics.csv")

datasets = {"MQ2007": ltr_MQ2007, "MSLR-WEB10K": ltr_MSLR10K}
linestyle = {"linear": "-", "neural": "--"}
marker = {"informational": "o", "navigational": "^"}
color = {"MQ2007": "#ff7f0e", "MSLR-WEB10K": "#1f77b4"}

for dataset_name, df in datasets.items():
    df = df[["name", "auc"]].groupby("name").describe()
    df.columns = df.columns.droplevel()

    df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
    df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
    df["model"] = df.index.map(lambda x: x.split("_")[0]).values
    df["data"] = dataset_name

    df = df[(df.index.str.contains("eps_inf")) & (df["query"] <= 16) & (df["model"] != "random")]

    df = df[["mean", "query", "model", "click_model", "data"]].reset_index(drop=True)

    for model in ["linear", "neural"]:
        for click_model in ["informational", "navigational"]:
            line_df = df[(df["model"] == model) & (df["click_model"] == click_model)].sort_values(by=["query"])
            plt.plot(line_df["query"].astype(str).tolist(), line_df["mean"].tolist(), linestyle=linestyle[model], marker=marker[click_model], color=color[dataset_name])

plt.xlabel('Number of queries')
plt.ylabel('Mean AUC')
plt.tight_layout()

legend_entries = []
for label, value in linestyle.items():
    legend_entries.append(mlines.Line2D([], [], color='black', linestyle=value, label=label))
for label, value in marker.items():
    legend_entries.append(mlines.Line2D([], [], color='black', marker=value, label=label))
for label, value in color.items():
    legend_entries.append(mlines.Line2D([], [], color=value, label=label))
plt.legend(handles=legend_entries)

plt.savefig("../plots/plain_metrics.pdf", bbox_inches='tight')
plt.show()


In [None]:
# LTR, no defense, with manipulation

# ltr_MQ2007 = pd.read_csv("../output/ltr_MQ2007_multibatch_manipulation_metrics.csv")
ltr_MSLR10K = pd.read_csv("../output/ltr_MSLR10K_metrics.csv")
ltr_MSLR10K_manipulated = pd.read_csv("../output/ltr_MSLR10K_multibatch_manipulation_metrics.csv")

datasets = {"no ADM": ltr_MSLR10K, "with ADM": ltr_MSLR10K_manipulated}
linestyle = {"linear": "-", "neural": "--"}
marker = {"informational": "o", "navigational": "^"}
color = {"with ADM": "#ff7f0e", "no ADM": "#1f77b4"}

for dataset_name, df in datasets.items():
    df = df[["name", "auc"]].groupby("name").describe()
    df.columns = df.columns.droplevel()

    df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
    df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
    df["model"] = df.index.map(lambda x: x.split("_")[0]).values
    df["data"] = dataset_name

    df = df[(df.index.str.contains("eps_inf")) & (df["query"] <= 16) & (df["query"] > 1) & (df["model"] != "random")]

    df = df[["mean", "query", "model", "click_model", "data"]].reset_index(drop=True)

    for model in ["linear", "neural"]:
        for click_model in ["informational", "navigational"]:
            line_df = df[(df["model"] == model) & (df["click_model"] == click_model)].sort_values(by=["query"])
            plt.plot(line_df["query"].astype(str).tolist(), line_df["mean"].tolist(), linestyle=linestyle[model], marker=marker[click_model], color=color[dataset_name])

plt.xlabel('Number of queries')
plt.ylabel('Mean AUC')
plt.tight_layout()

legend_entries = []
for label, value in linestyle.items():
    legend_entries.append(mlines.Line2D([], [], color='black', linestyle=value, label=label))
for label, value in marker.items():
    legend_entries.append(mlines.Line2D([], [], color='black', marker=value, label=label))
for label, value in color.items():
    legend_entries.append(mlines.Line2D([], [], color=value, label=label))
plt.legend(handles=legend_entries, loc="upper right", ncols=3, columnspacing=0.5, fontsize="small")

plt.savefig("../plots/MSLR10K_ADM_metrics.pdf", bbox_inches='tight')
plt.show()


In [None]:
# LTR, DP, no manipulation
import math

linestyle = {"linear": "-", "neural": "--"}
marker = {"informational": "o", "navigational": "^"}
color = {1: "#d62728", 4: "#ff7f0e", 8: "#2ca02c", 12: "#1f77b4", 16: "#9467bd"}

df = pd.read_csv("../output/ltr_MSLR10K_metrics.csv")
df = df[["name", "auc"]].groupby("name").describe()
df.columns = df.columns.droplevel()
df["eps"] = df.index.map(lambda x: float(x.split("_")[x.split("_").index("eps") + 1])).values
df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
df["model"] = df.index.map(lambda x: x.split("_")[0]).values
df = df[(df["eps"] != math.inf) & (df["query"] <= 16) & (df["model"] != "random")]
df = df[["mean", "query", "model", "click_model", "eps"]].reset_index(drop=True)

for model in ["linear", "neural"]:
    for click_model in ["informational", "navigational"]:
        for query in [1, 4, 8, 12, 16]:
            line_df = df[(df["model"] == model) & (df["click_model"] == click_model) & (df["query"] == query)].sort_values(by=["eps"])
            plt.plot(line_df["eps"].astype(str).tolist(), line_df["mean"].tolist(), linestyle=linestyle[model], marker=marker[click_model], color=color[query])

plt.xlabel('Epsilon')
plt.ylabel('Mean AUC')
plt.tight_layout()

legend_entries = []
for label, value in color.items():
    legend_entries.append(mlines.Line2D([], [], color=value, label=f"{label} {'queries' if label > 1 else 'query'}"))
for label, value in linestyle.items():
    legend_entries.append(mlines.Line2D([], [], color='black', linestyle=value, label=label))
for label, value in marker.items():
    legend_entries.append(mlines.Line2D([], [], color='black', marker=value, label=label))
plt.legend(handles=legend_entries, ncols=2, columnspacing=0.5, )

plt.savefig("../plots/MSLR10K_DP_metrics.pdf", bbox_inches='tight')
plt.show()


In [None]:
# LTR, pruning, no manipulation
import math

linestyle = {"linear": "-", "neural": "--"}
marker = {"informational": "o", "navigational": "^"}
color = {1: "#d62728", 4: "#ff7f0e", 8: "#2ca02c", 12: "#1f77b4", 16: "#9467bd"}

df = pd.read_csv("../output/ltr_MQ2007_manipulated_pruned_metrics.csv")
df2 = pd.read_csv("../output/ltr_MQ2007_multibatch_manipulation_metrics.csv")
df = pd.concat([df, df2])

df = df[["name", "auc"]].groupby("name").describe()
df.columns = df.columns.droplevel()
df["model"] = df.index.map(lambda x: x.split("_")[0]).values
df = df[(df["model"] != "random")]
df["pct"] = df.index.map(lambda x: 0.0 if "prune" not in x else float(x.split("_")[x.split("_").index("prune") + 1])).values
df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
df = df[["mean", "query", "model", "click_model", "pct"]].reset_index(drop=True)

for model in ["linear", "neural"]:
    for click_model in ["informational", "navigational"]:
        for query in [4, 8, 12, 16]:
            line_df = df[(df["model"] == model) & (df["click_model"] == click_model) & (df["query"] == query)].sort_values(by=["pct"])
            plt.plot(line_df["pct"].astype(str).tolist(), line_df["mean"].tolist(), linestyle=linestyle[model], marker=marker[click_model], color=color[query])


df = pd.read_csv("../output/ltr_MQ2007_pruned_metrics.csv")
df2 = pd.read_csv("../output/ltr_MQ2007_metrics.csv")
df2 = df2[df2["name"].str.contains("1_query_eps_inf")]
df = pd.concat([df, df2])

df = df[["name", "auc"]].groupby("name").describe()
df.columns = df.columns.droplevel()
df["model"] = df.index.map(lambda x: x.split("_")[0]).values
df = df[(df["model"] != "random")]
df["pct"] = df.index.map(lambda x: 0.0 if "prune" not in x else float(x.split("_")[x.split("_").index("prune") + 1])).values
df["query"] = df.index.map(lambda x: int(x.split("_")[x.split("_").index("query") - 1])).values
df["click_model"] = df.index.map(lambda x: x.split("_")[x.split("_").index("query") - 2]).values
df = df[["mean", "query", "model", "click_model", "pct"]].reset_index(drop=True)

for model in ["linear", "neural"]:
    for click_model in ["informational", "navigational"]:
        for query in [1]:
            line_df = df[(df["model"] == model) & (df["click_model"] == click_model) & (df["query"] == query)].sort_values(by=["pct"])
            plt.plot(line_df["pct"].astype(str).tolist(), line_df["mean"].tolist(), linestyle=linestyle[model], marker=marker[click_model], color=color[query])

plt.xlabel('Prune percentage')
plt.ylabel('Mean AUC')
plt.tight_layout()

legend_entries = []
for label, value in color.items():
    legend_entries.append(mlines.Line2D([], [], color=value, label=f"{label} {'queries' if label > 1 else 'query'}"))
for label, value in linestyle.items():
    legend_entries.append(mlines.Line2D([], [], color='black', linestyle=value, label=label))
for label, value in marker.items():
    legend_entries.append(mlines.Line2D([], [], color='black', marker=value, label=label))
plt.legend(handles=legend_entries, ncols=2, columnspacing=0.5, handlelength=1.5, labelspacing=0.25, borderpad=0.3, borderaxespad=0.3)

plt.savefig("../plots/MQ2007_pruned_metrics.pdf", bbox_inches='tight')
plt.show()
