In [1]:
import argparse
import pickle
from pathlib import Path

import fraud_eagle as feagle
import fraudar
import networkx as nx
import numpy as np
import pandas as pd
import rsd

from rev2 import rev2compute
from utils import load_data, split_data_by_time

import seaborn as sns
import matplotlib.pyplot as plt

import sklearn.metrics

In [2]:
def get_results(data_name="alpha", alg="fraudar", attack_name="ilp_attack", budget=100, frac=0.2):
    data_nw_df, data_gt_df = load_data(data_name=data_name)
    p = Path(f"../res/{attack_name}/{alg}-{data_name}/{budget}-{frac}.pkl")
    if not p.exists():
        print(p)
        return None
    with open(p, "rb") as fp:
        scores = pickle.load(fp)
        scores = dict(zip(range(len(scores)), scores))
        # 1 means fraudster, 0 means benign
        scores["truth"] = dict(zip(data_gt_df["id"], (1-data_gt_df["label"])/2))
    # df = pd.DataFrame({sp: {u: scores[sp][u] for u in scores[sp] if u in data_gt_df["id"].tolist()} for sp in scores})
    df = pd.DataFrame({sp: {u: scores[sp][u] for u in scores[sp]} for sp in scores})
    return df

In [3]:
def get_metrics(df_res, splits=4, K=None):
    if df_res is None:
        return None
    ret = {}
    for sp in range(splits):
        df_res["truth"] = df_res["truth"].fillna(1)
        df = df_res[[sp, "truth"]].dropna()
        t = df["truth"].sum() if K is None else K
        df["pred"] = (df[sp].rank(ascending=False, method="min") <= t) * 1
#         df["pred"] = df[sp] >= 0.5

        ret[sp] = {
            "precision": sklearn.metrics.precision_score(y_true=df["truth"], y_pred=df["pred"]),
            "recall": sklearn.metrics.recall_score(y_true=df["truth"], y_pred=df["pred"]),
            "f1": sklearn.metrics.f1_score(y_true=df["truth"], y_pred=df["pred"])
        }
    return ret

In [4]:
alg_list = ["fraudar", "rsd", "rev2"][:2]
data_list = ["alpha", "otc", "amazon", "epinions"][:1]
budget_list = [100.0, 200.0, 300.0, 400.0]
frac_list = [0.2, 0.4, 0.6, 0.8]

In [7]:
attack_res[("alpha", "fraudar", 400.0, 0.2)].to_csv("al.csv")

In [6]:
attack_res = {
    (d, a, b, f): get_results(data_name=d, alg=a, attack_name="ilp_attack", budget=b, frac=f)
    for d in data_list for a in alg_list
    for b in budget_list for f in frac_list
}
dict_met = {
    (d, a, b, f): pd.DataFrame.from_dict(get_metrics(df_res=attack_res[(d, a, b, f)], splits=4, K=None))
    for d, a, b, f in attack_res
#     for b in budget_list for f in frac_list
}
df_met = pd.concat(dict_met, keys=dict_met.keys())
display(df_met)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,0,1,2,3
alpha,fraudar,100.0,0.2,precision,0.357513,0.408654,0.380682,0.335526
alpha,fraudar,100.0,0.2,recall,0.862500,1.000000,0.848101,0.698630
alpha,fraudar,100.0,0.2,f1,0.505495,0.580205,0.525490,0.453333
alpha,fraudar,100.0,0.4,precision,0.364103,0.413953,0.380682,0.335526
alpha,fraudar,100.0,0.4,recall,0.845238,1.000000,0.807229,0.662338
alpha,...,...,...,...,...,...,...,...
alpha,rsd,400.0,0.6,recall,0.733871,0.744186,0.772358,0.752137
alpha,rsd,400.0,0.6,f1,0.733871,0.744186,0.772358,0.752137
alpha,rsd,400.0,0.8,precision,0.764286,0.765517,0.798561,0.781955
alpha,rsd,400.0,0.8,recall,0.764286,0.765517,0.798561,0.781955


In [8]:
df_met.index = df_met.index.set_names(["data", "algo", "budget", "frac", "metrics"])

In [9]:
pd.concat([df_met.loc[("alpha", "fraudar", slice(None), slice(None))], df_met.loc[("alpha", "fraudar", slice(None), slice(None))].mean(axis=1)], axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,2,3,0
budget,frac,metrics,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
100.0,0.2,precision,0.357513,0.408654,0.380682,0.335526,0.370594
100.0,0.2,recall,0.8625,1.0,0.848101,0.69863,0.852308
100.0,0.2,f1,0.505495,0.580205,0.52549,0.453333,0.516131
100.0,0.4,precision,0.364103,0.413953,0.380682,0.335526,0.373566
100.0,0.4,recall,0.845238,1.0,0.807229,0.662338,0.828701
100.0,0.4,f1,0.508961,0.585526,0.517375,0.445415,0.514319
100.0,0.6,precision,0.357513,0.424658,0.380682,0.335526,0.374595
100.0,0.6,recall,0.784091,1.0,0.770115,0.62963,0.795959
100.0,0.6,f1,0.491103,0.596154,0.509506,0.437768,0.508633
100.0,0.8,precision,0.367347,0.37766,0.380682,0.335526,0.365304


In [10]:
df_met.mean(axis=1).to_frame().reset_index().pivot(index=["data", "algo", "frac", "metrics"], columns=["budget"]).loc[("alpha", "fraudar", slice(None), "f1")].droplevel(0, axis=1)

budget,100.0,200.0,300.0,400.0
frac,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.516131,0.505482,0.496743,0.518857
0.4,0.514319,0.472821,0.509872,0.46731
0.6,0.508633,0.469065,0.461841,0.431768
0.8,0.482625,0.460793,0.491473,0.423988
