In [None]:
import json
import math
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import ks_2samp

In [None]:
# df = pd.read_csv("../output/ltr_metrics_mq2008_noise.csv")
# df = df[["name", "auc"]].groupby("name").describe()
# df.columns = df.columns.droplevel()
dfs = []
# queries = [4, 8, 16]
queries = [12, 24, 48]
for query in queries:
    df = pd.read_csv(f"../output/ltr_metrics_mslr10k_{query}_query_noise.csv")
    df = df[["name", "auc"]].groupby("name").describe()
    df.columns = df.columns.droplevel()
    dfs.append(df)
df = pd.concat(dfs)
df["model"] = df.index.map(lambda x: "_".join(x.split("_")[0:2])).values
df["click"] = df.index.map(lambda x: x.split("_")[-6]).values
df["query"] = df.index.map(lambda x: int(x.split("_")[-5])).values
df["adm"] = df.index.map(lambda x: float(x.split("_")[-1])).values
df["eps"] = df.index.map(lambda x: float(x.split("_")[-2])).values
df = df[(df["adm"] == 0) | (df["adm"] == 1)]

model_map = {
    "linear_pdgd": "Linear",
    "neural_4": "Neural 4",
    "neural_8": "Neural 8",
    "neural_16": "Neural 16",
}
adm_map = {
    0.0: "None",
    1.0: "\\name"
}
round_num = 2
line = ""
for model in ["linear_pdgd", "neural_4", "neural_8", "neural_16"]:
    line += "\\multirow{4}{*}{\\makecell{" + model_map[model] + "}}"
    for adm in [0.0, 1.0]:
        line += " & \\multirow{2}{*}{\\makecell{" + adm_map[adm] + "}}"
        for eps in [math.inf, 500, 100, 20, 1]:
            for click in ["informational", "navigational"]:
                for query in queries:
                    row = df[(df["model"] == model) & (df["adm"] == adm) & (df["click"] == click) & (df["eps"] == eps) & (df["query"] == query)].iloc[0]
                    num = row["mean"]
                    num = str(round(num, round_num))
                    if len(num) < round_num + 2:
                        num += "0" * (round_num + 2 - len(num))
                    line += f" & {num}"
        line += " \\\\\n &"
        for eps in [math.inf, 500, 100, 20, 1]:
            for click in ["informational", "navigational"]:
                for query in queries:
                    row = df[(df["model"] == model) & (df["adm"] == adm) & (df["click"] == click) & (df["eps"] == eps) & (df["query"] == query)].iloc[0]
                    std = str(round(row["std"], round_num))
                    if len(std) < round_num + 2:
                        std += "0" * (round_num + 2 - len(std))
                    line += f" & ({std})"
        line += " \\\\\n"
    line += "\\midrule\n"
print(line)


In [None]:
from dataset import Steam200K
d = Steam200K()
cnt = 0
for u in d.get_all_user_ids():
    cnt += len(d.get_item_ids_for_users([u])[0])
cnt / len(d.get_all_user_ids())

In [None]:
cv_models = ["resnet", "regnet", "densenet", "mnasnet"]
multipliers = [1, 2, 4]
adm_map = {
    "no_adm": "None",
    "adm_FGSM_0.1": "FGSM",
    "adm_opt": "\\name",
}
cv_model_map = {
    "resnet": "ResNet18",
    "regnet": "RegNet Y 800MF",
    "densenet": "DenseNet121",
    "mnasnet": "MNasNet 1.3",
}
round_num = 3
print_str = ""
for cv_model in cv_models:
    dfs = []
    for multiplier in multipliers:
        df = pd.read_csv(f"../output/ltr_cv_{cv_model}_x{multiplier}_partitioned_metrics.csv")
        df = df[["name", "auc"]].groupby("name").describe()
        df.columns = df.columns.droplevel()
        df["multiplier"] = multiplier
        dfs.append(df)
    df = pd.concat(dfs)
    df = df[(df.index.map(lambda x: "neural2_2_" not in x))]
    df["model"] = df.index.map(lambda x: x.split("_")[0]).values
    df["adm"] = df.index.map(lambda x: "_".join(x.split("_")[5:])).values
    df["eps"] = df.index.map(lambda x: float(x.split("_")[4])).values

    print_str += "\\midrule\n\\multirow{3}{*}{\\makecell{" + cv_model_map[cv_model] + "}}"
    for adm in ["no_adm", "adm_FGSM_0.1", "adm_opt"]:
        print_str += f" & {adm_map[adm]}"
        for model in ["linear", "neural2", "neural4", "neural8"]:
            for multiplier in multipliers:
                row = df[(df["model"] == model) & (df["adm"] == adm) & (df["multiplier"] == multiplier) & (df["eps"] == 1)].iloc[0]
                num = row["mean"]
                std = str(round(row["std"], round_num))
                num = str(round(num, round_num))
                if len(num) < round_num + 2:
                    num += "0" * (round_num + 2 - len(num))
                if len(std) < round_num + 2:
                    std += "0" * (round_num + 2 - len(std))
                # if adm == "adm_opt":
                #     print_str += " & \\textbf{" + num + f" ({std}) " + "}"
                # else:
                #     print_str += " & " + num + f" ({std})"
                print_str += " & " + num + f" ({std})"
        print_str += " \\\\\n"
print(print_str)