In [1]:
import argparse
import pickle
from pathlib import Path

import fraud_eagle as feagle
import fraudar
import networkx as nx
import numpy as np
import pandas as pd
import rsd

from rev2 import rev2compute
from utils import load_data, split_data_by_time

import seaborn as sns
import matplotlib.pyplot as plt

import sklearn.metrics

import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning)

In [2]:
def get_results(data_name="alpha", alg="fraudar", attack_name="ilp_attack", budget=100, frac=0.2):
    data_nw_df, data_gt_df = load_data(data_name=data_name)
    p = Path(f"../res/{attack_name}/{alg}-{data_name}/{budget}-{frac}.pkl")
    if not p.exists():
        print(p)
        return None
    with open(p, "rb") as fp:
        scores = pickle.load(fp)
        scores = dict(zip(range(len(scores)), scores))
        # 1 means fraudster, 0 means benign
        scores["truth"] = dict(zip(data_gt_df["id"], (1-data_gt_df["label"])/2))
    # df = pd.DataFrame({sp: {u: scores[sp][u] for u in scores[sp] if u in data_gt_df["id"].tolist()} for sp in scores})
    df = pd.DataFrame({sp: {u: scores[sp][u] for u in scores[sp]} for sp in scores})
    df.loc[[u[:5]=="usock" for u in df.index.tolist()], "truth"] = 1
    df.loc[[u[:6]=="udummy" for u in df.index.tolist()], "truth"] = 1
    return df

In [3]:
def get_metrics(df_res, splits=4, K=None, th=0.5):
    if df_res is None:
        return None
    ret = {}
    for sp in range(splits):
        # df_res["truth"] = df_res["truth"].fillna(1)
        df_res.loc[[u[:5]=="usock" for u in df_res.index.tolist()], "truth"] = 1
        df_res.loc[[u[:6]=="udummy" for u in df_res.index.tolist()], "truth"] = 1
        df = df_res[[sp, "truth"]].dropna()
        df[sp]
        t = df["truth"].sum() if K is None else K
        t_frac = t / df.shape[0]
        df["pred"] = df[sp] >= th

        ret[sp] = {
            "precision": sklearn.metrics.precision_score(y_true=df["truth"], y_pred=df["pred"]),
            "recall": sklearn.metrics.recall_score(y_true=df["truth"], y_pred=df["pred"]),
            "f1": sklearn.metrics.f1_score(y_true=df["truth"], y_pred=df["pred"])
        }
    return ret

In [33]:
alg_list = ["fraudar", "rsd", "rev2", "sg", "fbox"][:4]
data_list = ["alpha", "otc", "amazon", "epinions"][2:3]
budget_list = [1000.0, 2000.0, 3000.0, 4000.0]
frac_list = [0.0, 0.2, 0.4, 0.6, 0.8]

In [28]:
attack_res = {
    (d, a, b, f): get_results(data_name=d, alg=a, attack_name="ilp_large", budget=b, frac=f)
    for d in data_list for a in alg_list
    for b in budget_list for f in frac_list
}

../res/ilp_large/rev2-amazon/4000.0-0.2.pkl
../res/ilp_large/rev2-amazon/4000.0-0.4.pkl
../res/ilp_large/rev2-amazon/4000.0-0.6.pkl
../res/ilp_large/rev2-amazon/4000.0-0.8.pkl


In [85]:
# attack_res[("epinions", "fraudar", 400.0, 0.2)].to_csv("ar.csv")

In [61]:
th_dict = {
    "fraudar": 0.5,
    "rsd": 1.97,
    "rev2": 0.1,
    "fbox": 0.5,
    "sg": 0.5,
}
dict_met = {
    (d, a, b, f): pd.DataFrame.from_dict(get_metrics(df_res=attack_res[(d, a, b, f)], splits=4, K=None, th=th_dict[a]))
    for d, a, b, f in attack_res
}
df_met = pd.concat(dict_met, keys=dict_met.keys())
display(df_met)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,0,1,2,3
amazon,fraudar,1000.0,0.0,precision,0.081395,0.076923,0.093702,0.090909
amazon,fraudar,1000.0,0.0,recall,0.443038,0.302326,0.309645,0.317992
amazon,fraudar,1000.0,0.0,f1,0.137525,0.122642,0.143868,0.141395
amazon,fraudar,1000.0,0.2,precision,0.081395,0.076923,0.093702,0.090909
amazon,fraudar,1000.0,0.2,recall,0.372340,0.270833,0.287736,0.299213
amazon,...,...,...,...,...,...,...,...
amazon,sg,4000.0,0.6,recall,0.019305,0.022654,0.021220,0.000000
amazon,sg,4000.0,0.6,f1,0.036496,0.041916,0.038278,0.000000
amazon,sg,4000.0,0.8,precision,0.333333,0.280000,0.195122,0.000000
amazon,sg,4000.0,0.8,recall,0.015674,0.018970,0.018307,0.000000


In [62]:
def get_alg_data_met(df_met, alg="rev2", data="alpha", met="f1"):
    df_met.index = df_met.index.set_names(["data", "algo", "budget", "frac", "metrics"])
    rdf = df_met.mean(axis=1).to_frame().reset_index().pivot(index=["data", "algo", "frac", "metrics"], columns=["budget"]).loc[(data, alg, slice(None), met)]#.droplevel(0, axis=1).droplevel([0, 1, 3], axis=0)
    rdf.index = [f"baseline-{i}" if i > 0 else "no-attack" for i in rdf.index]
    return rdf

In [65]:
get_alg_data_met(df_met, alg="rsd", data="amazon", met="precision")

Unnamed: 0_level_0,0,0,0,0
budget,1000.0,2000.0,3000.0,4000.0
no-attack,0.287317,0.275116,0.275116,0.275116
baseline-0.2,0.282357,0.264919,0.275116,0.275116
baseline-0.4,0.282357,0.277105,0.277105,0.275116
baseline-0.6,0.282357,0.282357,0.277105,0.277105
baseline-0.8,0.282357,0.287317,0.276408,0.281979
