# Notebook 04 — Benchmark ComparisonCompares SubFraudGMM against five deep anomaly detection baselines(DeepSAD, DevNet, FEAWAD, PReNet, REPEN, XGBOD) and a simple ranking baseline.Pre-computed result CSVs are loaded from `../results/`.

> **ADBench baseline results** (DeepSAD, DevNet, FEAWAD, PReNet, REPEN, XGBOD) were generated> using **ADBench** (Han et al., NeurIPS 2022): https://github.com/Minqi824/ADBench>> The ADBench library is **not included** in this repository. Results are pre-computed> and stored in `results/`.

In [None]:
import osimport globimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport matplotlib.patches as mpatchesimport matplotlib.ticker as tickerfrom matplotlib.patches import Rectangle# Path to the pre-computed result CSVsfolder_path = '../results'output_path = 'figures'os.makedirs(output_path, exist_ok=True)# Risk Indicator comparison configurationindicator = 'Risk Indicator'groups    = ['RT', 'Demais']yscale    = 'log' if indicator == 'Rank' else 'linear'# Load all result CSVscsv_files = glob.glob(os.path.join(folder_path, '*.csv'))if not csv_files:    raise FileNotFoundError(f'No CSV files found in "{folder_path}".')dfs = []for file in csv_files:    model_name = os.path.splitext(os.path.basename(file))[0]    df = pd.read_csv(file)    required_cols = ['ID', 'Produto', 'Grupo', indicator]    missing = [c for c in required_cols if c not in df.columns]    if missing:        # Skip files missing required columns (e.g. metrics_df.csv)        continue    df['Modelo'] = model_name    dfs.append(df)df_all = pd.concat(dfs, ignore_index=True)print(f"Loaded {len(dfs)} model result files.")print(f"Models: {sorted(df_all['Modelo'].unique())}")print(f"Products: {sorted(df_all['Produto'].dropna().unique())}")

In [None]:
# Derive fraud labels directly from the 'Grupo' column (RT = fraud)# This eliminates the dependency on the external fraudes.csv filedf_all["fraude"] = (df_all["Grupo"] == "RT").astype(int)df_all.head()

In [None]:
# Sanity check: verify that SubFraudGMM correctly identifies Grupo == RT as frauddf_filtrado = df_all[    (df_all["fraude"] == 1) &    (df_all["Modelo"] == "SubFraudGMM") &    (df_all["Grupo"] == "Demais")]print("Mismatches (Grupo=Demais but labelled fraud):", len(df_filtrado))min_rank = df_all[(df_all["fraude"] == 1) & (df_all["Modelo"] == "SubFraudGMM")]["Rank"].min()print("Minimum Rank for confirmed fraud records (SubFraudGMM):", min_rank)

In [None]:
def compute_precision_recall(df_group):    """    Compute Precision@k, Recall@k, and PctPositivoTopK for a (Model, Product) group.    Uses the 'Rank' column to define the top-k threshold.    'RT' == fraud, 'Demais' == non-fraud (or misclassified fraud).    PctPositivoTopK = fraction of top-k records that are positive (RT or fraude==1).    """    fraude_col = df_group['fraude'] if 'fraude' in df_group.columns else 0    fraude_col = pd.Series(fraude_col).fillna(0).astype(int)    is_rt       = (df_group['Grupo'] == 'RT')    is_positive = is_rt | (fraude_col == 1)    total_rt = is_rt.sum()    ks = np.sort(df_group['Rank'].astype(int).unique())    records = []    for k in ks:        topk_mask  = df_group['Rank'].astype(int) <= k        topk_size  = topk_mask.sum()        tp_rt_topk = (is_rt & topk_mask).sum()        recall_rt_k = (tp_rt_topk / total_rt) if total_rt > 0 else np.nan        positivos_topk    = (is_positive & topk_mask).sum()        pct_positivo_topk = (positivos_topk / topk_size) if topk_size > 0 else np.nan        records.append({            'k':               k,            'Recall@k':        recall_rt_k,            'PctPositivoTopK': pct_positivo_topk,        })    return pd.DataFrame(records)# Compute metrics for every (Model, Product) pairmetrics_list = []for (model, produto), df_group in df_all.groupby(['Modelo', 'Produto']):    df_metrics           = compute_precision_recall(df_group)    df_metrics['Modelo'] = model    df_metrics['Produto']= produto    metrics_list.append(df_metrics)metrics_df = pd.concat(metrics_list, ignore_index=True)print("Sample metrics:")print(metrics_df.head())

In [None]:
# Export metrics at k=10 as the primary benchmark tablemetrics_k10 = metrics_df[metrics_df['k'] == 10]metrics_k10.to_csv("../results/metrics_df.csv", index=False)print(metrics_k10)

In [None]:
import osimport globimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom matplotlib import cm# Rank visualisation — reload CSVs to ensure df_all has correct 'Rank' columnfolder_path = '../results'indicator   = 'Rank'dim1        = ['RT', 'Demais']yscale      = 'log' if indicator == 'Rank' else 'linear'csv_files = glob.glob(os.path.join(folder_path, '*.csv'))dfs = []for file in csv_files:    model_name = os.path.splitext(os.path.basename(file))[0]    df = pd.read_csv(file)    if not all(c in df.columns for c in ['ID', 'Produto', 'Grupo', indicator]):        continue    df['Modelo'] = model_name    dfs.append(df)df_all = pd.concat(dfs, ignore_index=True)produtos = df_all['Produto'].dropna().unique()for produto in produtos:    df_produto     = df_all[df_all['Produto'] == produto]    modelos_brutos = df_produto['Modelo'].dropna().unique().tolist()    prioridade     = [m for m in ["SubFraudGMM", "RankingSimples"] if m in modelos_brutos]    outros         = sorted([m for m in modelos_brutos if m not in prioridade])    models         = prioridade + outros    n      = len(models)    reds   = cm.Reds(np.linspace(0.4, 0.8, n))    blues  = cm.Blues(np.linspace(0.4, 0.8, n))    data, positions, colors, labels = [], [], [], []    for i, model in enumerate(models):        for j, grupo in enumerate(dim1):            vals = df_produto.loc[                (df_produto['Modelo'] == model) &                (df_produto['Grupo'] == grupo),                indicator            ].dropna().values            if yscale == 'log':                vals = vals[vals > 0]            data.append(vals)            positions.append(i * len(dim1) + j)            colors.append(reds[i] if grupo == 'RT' else blues[i])            labels.append(f'{model}\n{grupo}')    fig, ax = plt.subplots(figsize=(14, 6))    bp = ax.boxplot(data, positions=positions, widths=0.6, patch_artist=True, showfliers=True)    for patch, color in zip(bp['boxes'], colors):        patch.set_facecolor(color)    if yscale == 'log':        ax.set_yscale('log')    ax.set_xticks(positions)    ax.set_xticklabels(labels, fontsize=7, rotation=45, ha='right')    ax.set_xlabel('Model / Group')    ax.set_ylabel(indicator)    ax.set_title(f'Rank Distribution by Model and Fraud Group — {produto}')    ax.grid(axis='y', linestyle='--', linewidth=0.5)    plt.tight_layout()    plt.savefig(os.path.join(output_path, f'rank_{produto}.pdf'))    plt.show()

In [None]:
from sklearn.metrics import (    roc_auc_score,    average_precision_score,    precision_score,    recall_score,    f1_score,    roc_curve)def compute_auc_metrics(df_group):    """    Compute AUC-ROC and AUC-PR for a (Model, Product) sub-group.    Grupo == 'RT' → fraud (1), Grupo == 'Demais' → non-fraud (0).    Uses 'Risk Indicator' as the anomaly score.    """    y_true = (df_group['Grupo'] == 'RT').astype(int).values    scores = df_group['Risk Indicator'].values    auc_roc = roc_auc_score(y_true, scores)    auc_pr  = average_precision_score(y_true, scores)    return pd.Series({'AUC-ROC': auc_roc, 'AUC-PR': auc_pr})def compute_at_k(df_group, k=50):    """Compute Precision@k, Recall@k, F1@k using top-k by Risk Indicator."""    y_true = (df_group['Grupo'] == 'RT').astype(int).values    scores = df_group['Risk Indicator'].values    order  = np.argsort(scores)[::-1]    preds  = np.zeros_like(y_true)    topk   = order[:k] if k <= len(preds) else order    preds[topk] = 1    p = precision_score(y_true, preds, zero_division=0)    r = recall_score(y_true, preds, zero_division=0)    f = f1_score(y_true, preds, zero_division=0)    return pd.Series({'Precision@k': p, 'Recall@k': r, 'F1@k': f})# Reload df_all with Risk Indicatorfolder_path = '../results'csv_files = glob.glob(os.path.join(folder_path, '*.csv'))dfs = []for file in csv_files:    model_name = os.path.splitext(os.path.basename(file))[0]    df = pd.read_csv(file)    if not all(c in df.columns for c in ['ID', 'Produto', 'Grupo', 'Risk Indicator']):        continue    df['Modelo'] = model_name    dfs.append(df)df_all = pd.concat(dfs, ignore_index=True)results_auc = []for (model, product), group in df_all.groupby(['Modelo', 'Produto']):    if group['Grupo'].nunique() < 2:        continue    aucs = compute_auc_metrics(group)    atks = compute_at_k(group, k=100)    results_auc.append({'Modelo': model, 'Produto': product, **aucs.to_dict(), **atks.to_dict()})metrics_df_auc = pd.DataFrame(results_auc)# Plot AUC-ROC curves per productfor product in df_all['Produto'].unique():    plt.figure(figsize=(8, 6))    sub_prod = df_all[df_all['Produto'] == product]    for model in sorted(sub_prod['Modelo'].unique()):        grp = sub_prod[sub_prod['Modelo'] == model]        if grp['Grupo'].nunique() < 2:            continue        y_true = (grp['Grupo'] == 'RT').astype(int).values        scores = grp['Risk Indicator'].values        fpr, tpr, _ = roc_curve(y_true, scores)        auc = roc_auc_score(y_true, scores)        plt.plot(fpr, tpr, label=f"{model} (AUC={auc:.2f})")    plt.plot([0, 1], [0, 1], '--', color='gray')    plt.title(f"ROC Curve Comparison — {product}")    plt.xlabel("False Positive Rate")    plt.ylabel("True Positive Rate")    plt.legend(loc="lower right")    plt.tight_layout()    plt.savefig(os.path.join(output_path, f'roc_{product}.pdf'))    plt.show()

In [None]:
# Detailed summary of Risk Indicator and Rank statistics by Product, Group, and Modelresultados_detalhados = df_all.groupby(['Produto', 'Grupo', 'Modelo']).agg(    mediana_risk=('Risk Indicator', 'median'),    media_risk=('Risk Indicator', 'mean'),    std_risk=('Risk Indicator', 'std'),    max_risk=('Risk Indicator', 'max'),    min_risk=('Risk Indicator', 'min'),    mediana_rank=('Rank', 'median'),    media_rank=('Rank', 'mean'),    std_rank=('Rank', 'std'),    max_rank=('Rank', 'max'),    min_rank=('Rank', 'min'),    qtd_casos=('Risk Indicator', 'count')).reset_index()print("Detailed Results Summary:")print(resultados_detalhados)resultados_detalhados.to_csv('../results/resultados_detalhados.csv', index=False)

In [None]:
import pandas as pd# Median Risk Indicator and Rank by Model and Product (for fraud group RT only)medians = df_all.groupby(['Produto', 'Grupo', 'Modelo']).agg(    mediana_risk=('Risk Indicator', 'median'),    mediana_rank=('Rank', 'median')).reset_index().rename(columns={    'mediana_risk': 'Mediana Risk Indicator',    'mediana_rank': 'Mediana Rank'})df_rt = medians[medians['Grupo'] == 'RT'].drop(columns='Grupo')pivot_risk = df_rt.pivot(index='Produto', columns='Modelo', values='Mediana Risk Indicator').round(3)pivot_rank = df_rt.pivot(index='Produto', columns='Modelo', values='Mediana Rank').round(3)# Add overall median rowpivot_risk.loc['Overall Median'] = pivot_risk.median(numeric_only=True)pivot_rank.loc['Overall Median'] = pivot_rank.median(numeric_only=True)def style_heatmap(df, title, cmap):    styled = df.style.set_caption(title).format(precision=3, na_rep='-')    for idx in df.index:        styled = styled.background_gradient(axis=1, cmap=cmap, subset=pd.IndexSlice[[idx], :])    return styledstyled_risk = style_heatmap(pivot_risk, "Median Risk Indicator by Product and Model (Fraud Group: RT)", cmap='Reds')styled_rank = style_heatmap(pivot_rank, "Median Rank by Product and Model (Fraud Group: RT)", cmap='Reds_r')display(styled_risk)display(styled_rank)

In [None]:
import osimport globimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom matplotlib import cm# Risk Indicator distribution across all groups and productsfolder_path = '../results'indicator   = 'Risk Indicator'groups      = ['RT', 'Demais']csv_files = glob.glob(os.path.join(folder_path, '*.csv'))dfs = []for file in csv_files:    model = os.path.splitext(os.path.basename(file))[0]    df = pd.read_csv(file)    if not all(c in df.columns for c in ['ID', 'Produto', 'Grupo', indicator]):        continue    df['Modelo'] = model    dfs.append(df)df_all = pd.concat(dfs, ignore_index=True)models   = sorted(df_all['Modelo'].unique())products = sorted(df_all['Produto'].unique())n_models = len(models)reds  = cm.Reds(np.linspace(0.4, 0.8, n_models))blues = cm.Blues(np.linspace(0.4, 0.8, n_models))n_cols = 2n_rows = int(np.ceil(len(products) / n_cols))fig, axes = plt.subplots(n_rows, n_cols, figsize=(12, 4 * n_rows), sharey=True)axes = axes.flatten()for idx, produto in enumerate(products):    ax = axes[idx]    data, positions, colors, labels = [], [], [], []    for i, model in enumerate(models):        for j, grp in enumerate(groups):            vals = df_all.loc[                (df_all['Modelo'] == model) &                (df_all['Grupo'] == grp) &                (df_all['Produto'] == produto),                indicator            ].dropna().values            data.append(vals)            positions.append(i * len(groups) + j)            colors.append(reds[i] if grp == 'RT' else blues[i])            labels.append(f'{model}\n{grp}')    bp = ax.boxplot(data, positions=positions, widths=0.5, patch_artist=True, showfliers=False)    for patch, color in zip(bp['boxes'], colors):        patch.set_facecolor(color)    ax.set_xticks(positions)    ax.set_xticklabels(labels, fontsize=6, rotation=45, ha='right')    ax.set_title(f'{produto}')    ax.set_ylabel(indicator)    ax.grid(axis='y', linestyle='--', linewidth=0.5)# Hide unused subplotsfor ax in axes[len(products):]:    ax.set_visible(False)plt.suptitle('Risk Indicator Distribution by Model and Fraud Group', y=1.02)plt.tight_layout()plt.savefig(os.path.join(output_path, 'risk_indicator_distribution.pdf'))plt.show()

In [None]:
import osimport globimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsfrom matplotlib import cm# Risk Indicator distribution — fraud group (RT) only, faceted by productfolder_path = '../results'indicator   = 'Risk Indicator'csv_files = glob.glob(os.path.join(folder_path, '*.csv'))dfs = []for file in csv_files:    model = os.path.splitext(os.path.basename(file))[0]    df = pd.read_csv(file)    if not all(c in df.columns for c in ['ID', 'Produto', 'Grupo', indicator]):        continue    df['Modelo'] = model    dfs.append(df)df_all = pd.concat(dfs, ignore_index=True)# Restrict to fraud groupdf_rt = df_all[df_all['Grupo'] == 'RT']models  = sorted(df_rt['Modelo'].unique())reds    = cm.Reds(np.linspace(0.4, 0.8, len(models)))palette = dict(zip(models, reds))sns.set_style("whitegrid")g = sns.catplot(    data=df_rt,    x='Modelo',    y=indicator,    col='Produto',    col_wrap=2,    kind='box',    palette=palette,    sharey=True,    height=4,    aspect=1)g.set_titles("Product: {col_name}")g.set_xticklabels(rotation=45, ha='right')g.set_axis_labels("Model", indicator)plt.tight_layout()plt.savefig(os.path.join(output_path, 'risk_indicator_rt_facet.pdf'))plt.show()