In [7]:
import os
from glob import glob
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pandas.api.types import CategoricalDtype
from sklearn.metrics import RocCurveDisplay, auc, mean_absolute_error, roc_curve

from label_flip_revised.utils import create_dir

In [8]:
PATH_ROOT = Path(os.getcwd()).absolute().parent
print('Root:', PATH_ROOT)

Root: /home/lukec/workspace/diva_01


In [9]:
path_output = os.path.join(PATH_ROOT, 'results_plot')
print('Output:', path_output)
create_dir(path_output)

Output: /home/lukec/workspace/diva_01/results_plot


In [10]:
def get_pivot_by_difficulty(df: pd.DataFrame, threshold: float=None) -> pd.DataFrame: 
    df_ = df[['Difficulty', 'Rate', 'Similarity']]

    if threshold is None:
        for i in np.arange(0.05, 0.2, 0.01):
            threshold = i
            results = (1 - df_['Similarity']) >= threshold
            df_['Prediction'] = results.astype(int)
            count = df_['Prediction'][(df_['Rate'] == 0) & (df_['Difficulty'] == 'Easy')].sum()
            if count == 0:
                break
    else:
        results = (1 - df_['Similarity']) >= threshold
        df_['Prediction'] = results.astype(int)
    print('Threshold:', threshold)

    df_ = df_.rename(columns = {'Difficulty':'Dataset Difficulty', 'Rate':'Poisoning Rate', 'Prediction': 'FPR'})
    df_grouped = df_.groupby(['Dataset Difficulty', 'Poisoning Rate']).sum()
    df_grouped = df_grouped.reset_index()
    df_pivot = df_grouped.pivot('Poisoning Rate', 'Dataset Difficulty', 'FPR')
    df_pivot = df_pivot[['Easy', 'Normal', 'Hard']]
    return df_pivot

In [11]:
paths = [] 
paths.append(os.path.join(PATH_ROOT, 'results', 'synth', 'baseline', 'synth_alfa_svm_knndefense.csv'))
paths.append(os.path.join(PATH_ROOT, 'results', 'synth', 'baseline', 'synth_falfa_nn_knndefense.csv'))

# Checking the threshold for each attack
for p in paths:
    df_ = pd.read_csv(p)
    pivot_table = get_pivot_by_difficulty(df_)
    print(pivot_table)

Threshold: 0.16000000000000003
Dataset Difficulty  Easy  Normal  Hard
Poisoning Rate                        
0.00                   0      40    50
0.05                  10      46    50
0.10                  21      49    50
0.15                  23      50    50
0.20                  27      50    50
0.25                  29      50    50
0.30                  31      50    50
0.35                  33      50    50
0.40                  35      50    50
Threshold: 0.16000000000000003
Dataset Difficulty  Easy  Normal  Hard
Poisoning Rate                        
0.00                   0      40    50
0.05                   4      46    50
0.10                  21      49    50
0.15                  25      50    50
0.20                  36      50    50
0.25                  31      49    50
0.30                  38      50    50
0.35                  29      49    50
0.40                  33      48    50


In [13]:
pivot_tables = []

for att in ['falfa_nn', 'alfa_svm']:
    df_ = pd.read_csv(os.path.join(PATH_ROOT, 'results', 'synth', 'baseline', f'synth_{att}_knndefense.csv'))
    pivot_table = get_pivot_by_difficulty(df_, threshold=0.16)
    pivot_table.to_csv(os.path.join(path_output, f'synth_pivot_baseline_difficulty_{att}.csv'))

    pivot_table = pivot_table.reset_index()
    pivot_tables.append(pivot_table)
        
pivot_1 = pd.concat(pivot_tables, ignore_index=True).groupby('Poisoning Rate').mean().round()
pivot_1 = pivot_1 * 2

print(pivot_1)

Threshold: 0.16
Threshold: 0.16
Dataset Difficulty  Easy  Normal   Hard
Poisoning Rate                         
0.00                 0.0    80.0  100.0
0.05                14.0    92.0  100.0
0.10                42.0    98.0  100.0
0.15                48.0   100.0  100.0
0.20                64.0   100.0  100.0
0.25                60.0   100.0  100.0
0.30                68.0   100.0  100.0
0.35                62.0   100.0  100.0
0.40                68.0    98.0  100.0
