In [1]:
import argparse
import pickle
from pathlib import Path

import fraud_eagle as feagle
import fraudar
import networkx as nx
import numpy as np
import pandas as pd
import rsd

from rev2 import rev2compute
from utils import load_data, split_data_by_time

import seaborn as sns
import matplotlib.pyplot as plt

import sklearn.metrics

import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning)

In [2]:
alg_list = ["fraudar", "rsd", "rev2", "sg"][:]
data_list = ["alpha", "otc", "amazon", "epinions"][3:]
budget_list = [1000.0, 2000.0, 3000.0, 4000.0]
frac_list = [0.0, 0.2, 0.4, 0.6, 0.8]

In [3]:
def get_results(data_name="alpha", alg="fraudar", attack_name="ilp_attack", budget=100):
    data_nw_df, data_gt_df = load_data(data_name=data_name)
    p = Path(f"../res/{attack_name}/{alg}-{data_name}/{budget}.pkl")
    if not p.exists():
        print(p)
        return None
    with open(p, "rb") as fp:
        scores = pickle.load(fp)
        scores = dict(zip(range(len(scores)), scores))
        # 1 means fraudster, 0 means benign
        scores["truth"] = {
            a: dict(zip(data_gt_df["id"], (1-data_gt_df["label"])/2))
            for a in alg_list
        }
    # df = pd.DataFrame({sp: {u: scores[sp][u] for u in scores[sp] if u in data_gt_df["id"].tolist()} for sp in scores})
    df = pd.DataFrame({(a, sp): {u: scores[sp][a][u] for u in scores[sp][a]} for a in alg_list for sp in scores if a in scores[sp]})
    df.loc[[u[:5]=="usock" for u in df.index.tolist()], (slice(None), "truth")] = 1
    df.loc[[u[:6]=="udummy" for u in df.index.tolist()], (slice(None), "truth")] = 1
    return df

In [4]:
def get_metrics(df_res, splits=4, K=None, th_dict=None):
    if df_res is None:
        return None
    ret = {}
    for sp in range(splits):
        # df_res["truth"] = df_res["truth"].fillna(1)
        # df_res.loc[[u[:5]=="usock" for u in df_res.index.tolist()], "truth"] = 1
        # df_res.loc[[u[:6]=="udummy" for u in df_res.index.tolist()], "truth"] = 1
        for a in alg_list:
            if (a, sp) not in df_res.columns:
                continue
            df = df_res[[(a, sp), (a, "truth")]].dropna()
            t = df[(a, "truth")].sum() if K is None else K
            t_frac = t / df.shape[0]
            # df["pred"] = (df[sp].rank(ascending=False, method="min") <= t) * 1
            # df["pred"] = df[sp] >= t_frac
            df[(a, "pred")] = df[(a, sp)] >= th_dict[a]

            ret[(a, sp)] = {
                "precision": sklearn.metrics.precision_score(y_true=df[(a, "truth")], y_pred=df[(a, "pred")]),
                "recall": sklearn.metrics.recall_score(y_true=df[(a, "truth")], y_pred=df[(a, "pred")]),
                "f1": sklearn.metrics.f1_score(y_true=df[(a, "truth")], y_pred=df[(a, "pred")])
            }
    return ret

In [5]:
attack_res = {
    (d, a, b): get_results(data_name=d, alg=a, attack_name="sockfarm_large_single", budget=b)
    for d in data_list for a in alg_list
    for b in budget_list
}

../res/sockfarm_large_single/rsd-epinions/3000.0.pkl
../res/sockfarm_large_single/rsd-epinions/4000.0.pkl


In [6]:
# attack_res[("amazon", "rev2", 400.0)].to_csv("ar.csv")

In [9]:
def get_sockfarm_alg_data_met(df_met, data="alpha", met="f1"):
    rdf = df_met.reset_index().pivot(index=["data", "algo", "metrics"], columns=["budget"]).loc[(data, slice(None), met)]#.droplevel([0, 2], axis=0)
    rdf.index = [f"sockfarm-{i}" for i in rdf.index]
    return rdf

In [20]:
def_dict = {
    "fraudar": {"fraudar": 0.9, "rsd": 1.96, "rev2": 0.03, "sg": 0.5}, # rev2=0.026
    "rsd": {"fraudar": 0.5, "rsd": 0.51, "rev2": 0.026, "sg": 0.5},
    "rev2": {"fraudar": 0.6, "rsd": 1.91, "rev2": 0.5, "sg": 0.5},
    "sg": {"fraudar": 0.9, "rsd": 1.4, "rev2": 0.15, "sg": 0.5},
}


dict_met = {
    (d, a, b): pd.DataFrame.from_dict(get_metrics(df_res=attack_res[(d, a, b)], splits=4, K=None, th_dict=def_dict[a]))
    for d, a, b in attack_res
}
df_met = pd.concat(dict_met, keys=dict_met.keys())
df_met.index = df_met.index.set_names(["data", "algo", "budget", "metrics"])
df_met.columns = pd.MultiIndex.from_tuples(df_met.columns)
df_met = df_met.mean(axis=1, level=0)
# display(df_met)

dmet="epinions"

display(get_sockfarm_alg_data_met(df_met, data=dmet, met="f1"))
display(get_sockfarm_alg_data_met(df_met, data=dmet, met="recall"))
display(get_sockfarm_alg_data_met(df_met, data=dmet, met="precision"))

Unnamed: 0_level_0,fraudar,fraudar,fraudar,fraudar,rsd,rsd,rsd,rsd,rev2,rev2,rev2,rev2,sg,sg,sg,sg
budget,1000.0,2000.0,3000.0,4000.0,1000.0,2000.0,3000.0,4000.0,1000.0,2000.0,3000.0,4000.0,1000.0,2000.0,3000.0,4000.0
sockfarm-fraudar,0.033639,0.03249,0.031423,0.030428,0.11928,0.09803,0.083752,0.073386,0.088937,0.087044,0.085236,0.083507,0.004962,0.004605,0.004298,0.004032
sockfarm-rev2,0.033639,0.03249,0.031423,0.030428,0.14583,0.120021,0.102599,0.089923,0.125709,0.10657,0.092731,0.081909,0.004962,0.004605,0.004298,0.004032
sockfarm-rsd,0.033639,0.03249,,,0.277813,0.250903,,,0.092,0.090133,,,0.004962,0.004605,,
sockfarm-sg,0.033639,0.03249,0.031423,0.030428,0.273944,0.238449,0.210816,0.190559,0.012403,0.010205,0.008703,0.007601,0.004962,0.004605,0.004298,0.004032


Unnamed: 0_level_0,fraudar,fraudar,fraudar,fraudar,rsd,rsd,rsd,rsd,rev2,rev2,rev2,rev2,sg,sg,sg,sg
budget,1000.0,2000.0,3000.0,4000.0,1000.0,2000.0,3000.0,4000.0,1000.0,2000.0,3000.0,4000.0,1000.0,2000.0,3000.0,4000.0
sockfarm-fraudar,0.137138,0.107837,0.089167,0.076149,0.075451,0.060551,0.050776,0.043811,0.554454,0.439234,0.365028,0.312882,0.008875,0.006994,0.005791,0.00495
sockfarm-rev2,0.137138,0.107837,0.089167,0.076149,0.094789,0.07579,0.063393,0.054596,0.093945,0.073561,0.061263,0.052368,0.008875,0.006994,0.005791,0.00495
sockfarm-rsd,0.137138,0.107837,,,0.345597,0.273961,,,0.596522,0.472509,,,0.008875,0.006994,,
sockfarm-sg,0.137138,0.107837,0.089167,0.076149,0.255492,0.203364,0.168549,0.14548,0.006293,0.005167,0.0044,0.003838,0.008875,0.006994,0.005791,0.00495


Unnamed: 0_level_0,fraudar,fraudar,fraudar,fraudar,rsd,rsd,rsd,rsd,rev2,rev2,rev2,rev2,sg,sg,sg,sg
budget,1000.0,2000.0,3000.0,4000.0,1000.0,2000.0,3000.0,4000.0,1000.0,2000.0,3000.0,4000.0,1000.0,2000.0,3000.0,4000.0
sockfarm-fraudar,0.019199,0.019199,0.019199,0.019199,0.465989,0.465989,0.465989,0.465989,0.048352,0.048352,0.048352,0.048352,0.003447,0.003447,0.003447,0.003447
sockfarm-rev2,0.019199,0.019199,0.019199,0.019199,0.461441,0.461441,0.461441,0.461441,0.196892,0.200786,0.200786,0.200317,0.003447,0.003447,0.003447,0.003447
sockfarm-rsd,0.019199,0.019199,,,0.232471,0.232471,,,0.049845,0.049845,,,0.003447,0.003447,,
sockfarm-sg,0.019199,0.019199,0.019199,0.019199,0.306145,0.306145,0.305363,0.30595,0.488095,0.488095,0.488095,0.488095,0.003447,0.003447,0.003447,0.003447
