In [None]:
import os
from glob import glob
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pandas.api.types import CategoricalDtype
from sklearn.metrics import RocCurveDisplay, auc, mean_absolute_error, roc_curve

from label_flip_revised.utils import create_dir

In [None]:
PATH_ROOT = Path(os.getcwd()).absolute().parent
print('Root:', PATH_ROOT)

In [None]:
path_output = os.path.join(PATH_ROOT, 'results_plot')
print('Output:', path_output)
create_dir(path_output)

In [None]:
def get_pivot_by_difficulty(df: pd.DataFrame, threshold: float=None) -> pd.DataFrame: 
    df_ = df[['Difficulty', 'Rate', 'Similarity']]

    if threshold is None:
        for i in np.arange(0.05, 0.5, 0.01):
            threshold = i
            results = (1 - df_['Similarity']) >= threshold
            df_['Prediction'] = results.astype(int)
            # count = df_['Prediction'][(df_['Rate'] == 0) & (df_['Difficulty'] == 'Easy')].sum()
            # if count == 0:
                # break
            count = df_['Prediction'][(df_['Rate'] == 0) & (df_['Difficulty'] == 'Normal')].sum()
            if count <= 1:
                break
    else:
        results = (1 - df_['Similarity']) >= threshold
        df_['Prediction'] = results.astype(int)
    print('Threshold:', threshold)

    df_ = df_.rename(columns = {'Difficulty':'Dataset Difficulty', 'Rate':'Poisoning Rate', 'Prediction': 'FPR'})
    df_grouped = df_.groupby(['Dataset Difficulty', 'Poisoning Rate']).sum()
    df_grouped = df_grouped.reset_index()
    df_pivot = df_grouped.pivot('Poisoning Rate', 'Dataset Difficulty', 'FPR')
    df_pivot = df_pivot[['Easy', 'Normal', 'Hard']]
    return df_pivot

In [None]:
paths = [] 
paths.append(os.path.join(PATH_ROOT, 'results', 'synth', 'baseline', 'synth_alfa_svm_knndefense.csv'))
paths.append(os.path.join(PATH_ROOT, 'results', 'synth', 'baseline', 'synth_falfa_nn_knndefense.csv'))

# Checking the threshold for each attack
for p in paths:
    df_ = pd.read_csv(p)
    pivot_table = get_pivot_by_difficulty(df_)
    print(pivot_table)

In [None]:
pivot_tables = []
threshold = 0.32
for att in ['falfa_nn', 'alfa_svm']:
    df_ = pd.read_csv(os.path.join(PATH_ROOT, 'results', 'synth', 'baseline', f'synth_{att}_knndefense.csv'))
    pivot_table = get_pivot_by_difficulty(df_, threshold)
    pivot_table.to_csv(os.path.join(path_output, f'synth_pivot_baseline_difficulty_{att}.csv'))

    pivot_table = pivot_table.reset_index()
    pivot_tables.append(pivot_table)
        
pivot_1 = pd.concat(pivot_tables, ignore_index=True).groupby('Poisoning Rate').mean().round()
pivot_1 = pivot_1 * 2

print(pivot_1)

In [None]:
def get_pivot_by_noise(df: pd.DataFrame, threshold: float=None) -> pd.DataFrame: 
    df_ = df[['Noise', 'Rate', 'Similarity']]

    if threshold is None:
        for i in np.arange(0.05, 0.5, 0.01):
            threshold = i
            results = (1 - df_['Similarity']) >= threshold
            df_['Prediction'] = results.astype(int)

            count = df_['Prediction'][(df_['Rate'] == 0) & (df_['Noise'] == 0.)].sum()
            if count <= 1:
                break
    else:
        results = (1 - df_['Similarity']) >= threshold
        df_['Prediction'] = results.astype(int)
    print('Threshold:', threshold)

    df_ = df_.rename(columns = {'Noise':'Noise Label Rate', 'Rate':'Poisoning Rate', 'Prediction': 'FPR'})
    df_grouped = df_.groupby(['Noise Label Rate', 'Poisoning Rate']).sum()
    df_grouped = df_grouped.reset_index()
    df_pivot = df_grouped.pivot('Poisoning Rate', 'Noise Label Rate', 'FPR')
    return df_pivot

In [None]:
paths = [] 
paths.append(os.path.join(PATH_ROOT, 'results', 'synth_noisy', 'baseline', 'synth_alfa_svm_knndefense.csv'))
paths.append(os.path.join(PATH_ROOT, 'results', 'synth_noisy', 'baseline', 'synth_falfa_nn_knndefense.csv'))

# Checking the threshold for each attack
for p in paths:
    df_ = pd.read_csv(p)
    pivot_table = get_pivot_by_noise(df_)
    print(pivot_table)

In [None]:
pivot_tables = []
threshold = 0.37
for att in ['falfa_nn', 'alfa_svm']:
    df_ = pd.read_csv(os.path.join(PATH_ROOT, 'results', 'synth_noisy', 'baseline', f'synth_{att}_knndefense.csv'))
    pivot_table = get_pivot_by_noise(df_, threshold)
    pivot_table.to_csv(os.path.join(path_output, f'synth_pivot_baseline_noisy_{att}.csv'))

    pivot_table = pivot_table.reset_index()
    pivot_tables.append(pivot_table)
        
pivot_2 = pd.concat(pivot_tables, ignore_index=True).groupby('Poisoning Rate').mean().round()
pivot_2 = pivot_2 * 2

print(pivot_2)

In [None]:
# Baseline subfigures at top row
# Keep subtitles, but remove X labels

FONTSIZE = 13
FIGSIZE = (8, 4)
# X_LABELS = ['Dataset Difficulty', 'Label Noise Rate']
TITLES = ['By Difficulties', 'By Label Noise Rates']

plt.rcParams["font.size"] = FONTSIZE
fig, axes = plt.subplots(1, 2, sharey=True, figsize=FIGSIZE)

sns.heatmap(pivot_1, ax=axes[0], annot=True, fmt='.0f', cmap="Greens", vmin=0, vmax=100, cbar=False)
axes[0].set_ylabel('Poisoning Rate', fontsize=FONTSIZE+1)

sns.heatmap(pivot_2, ax=axes[1], annot=True, fmt='.0f', cmap="Greens", vmin=0, vmax=100)
axes[1].set_ylabel('')

for i, ax in enumerate(axes):
    # ax.set_xlabel(X_LABELS[i], fontsize=FONTSIZE+1)
    ax.set(xlabel=None)
    ax.set_title(TITLES[i], fontsize=FONTSIZE+2)

plt.tight_layout(pad=0.6)
plot_heatmap = os.path.join(path_output, 'synth_heatmap_baseline.svg')
plt.savefig(plot_heatmap, dpi=300)
print(f'Save to: {plot_heatmap}')