In [1]:
import os
from glob import glob
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import linear_model
from sklearn.metrics import RocCurveDisplay, auc, mean_squared_error, roc_curve
from sklearn.model_selection import RandomizedSearchCV
from sklearn.utils.fixes import loguniform
from scipy.interpolate import interp1d

from label_flip_revised.utils import create_dir

In [2]:
PATH_ROOT = Path(os.getcwd()).absolute().parent
print(PATH_ROOT)

/home/lukec/workspace/diva_01


In [3]:
path_output = os.path.join(PATH_ROOT, 'results_plot')
print(path_output)
create_dir(path_output)

/home/lukec/workspace/diva_01/results_plot


In [4]:
df = pd.read_csv(os.path.join(PATH_ROOT, 'results', 'real', 'real_falfa_nn_db.csv'))

print(df.shape)

(90, 38)


In [5]:
datanames = df['Data.Base'].unique()
print(datanames)

['breastcancer_std' 'texture_subset_std' 'ringnorm_subset_std'
 'australian_std' 'cmc_std' 'yeast_subset_std' 'abalone_subset_std'
 'phoneme_subset_std' 'banknote_std' 'htru2_subset_std']


In [6]:
print(df.columns)

Index(['Data', 'F1', 'F1 SD', 'F1v', 'F2', 'F3', 'F4', 'N1', 'N2', 'N2 SD',
       'N3', 'N3 SD', 'N4', 'N4 SD', 'T1', 'T1 SD', 'LSC', 'L1', 'L2', 'L3',
       'T2', 'T3', 'T4', 'C1', 'C2', 'Density', 'ClsCoef', 'Hubs', 'Hubs SD',
       'Path.Train', 'Path.Poison', 'Path.Test', 'Rate', 'Train.Clean',
       'Test.Clean', 'Train.Poison', 'Test.Poison', 'Data.Base'],
      dtype='object')


In [7]:
workspace_names = [f'diva_{i:02d}' for i in range(1,6)]
workspace_names

['diva_01', 'diva_02', 'diva_03', 'diva_04', 'diva_05']

In [8]:
attacks = ['falfa_nn', 'alfa_svm', 'rand_svm', 'poison_svm']

In [9]:
for workspace in workspace_names:
    tables = []
    for att in attacks:
        df_ = pd.read_csv(os.path.join(PATH_ROOT.parent, workspace, 'results', 'real', f'real_{att}_db.csv'))
        tables.append(df_[['Data.Base', 'Train.Clean', 'Test.Clean']].groupby('Data.Base').mean().reset_index())
results = pd.concat(tables, ignore_index=True).groupby('Data.Base').mean().reset_index()

In [10]:
results.to_csv(os.path.join(path_output, 'table_real_acc.csv'))

In [11]:
results[['Train.Clean', 'Test.Clean']] = results[['Train.Clean', 'Test.Clean']] * 100
results

Unnamed: 0,Data.Base,Train.Clean,Test.Clean
0,abalone_subset_std,79.921875,76.5
1,australian_std,91.485507,81.884058
2,banknote_std,100.0,100.0
3,breastcancer_std,99.340659,94.95614
4,cmc_std,79.902377,77.542373
5,htru2_subset_std,94.796875,92.625
6,phoneme_subset_std,89.6875,85.5625
7,ringnorm_subset_std,99.4375,97.8125
8,texture_subset_std,100.0,99.75
9,yeast_subset_std,73.527349,65.782123


In [12]:
print(results.to_latex(index=False, float_format='%.1f', column_format='lcc'))

\begin{tabular}{lcc}
\toprule
          Data.Base &  Train.Clean &  Test.Clean \\
\midrule
 abalone\_subset\_std &         79.9 &        76.5 \\
     australian\_std &         91.5 &        81.9 \\
       banknote\_std &        100.0 &       100.0 \\
   breastcancer\_std &         99.3 &        95.0 \\
            cmc\_std &         79.9 &        77.5 \\
   htru2\_subset\_std &         94.8 &        92.6 \\
 phoneme\_subset\_std &         89.7 &        85.6 \\
ringnorm\_subset\_std &         99.4 &        97.8 \\
 texture\_subset\_std &        100.0 &        99.8 \\
   yeast\_subset\_std &         73.5 &        65.8 \\
\bottomrule
\end{tabular}



In [13]:
RATES = [0.1, 0.2, 0.3]

tables = []
for workspace in workspace_names:
    for att in attacks:
        df_ = pd.read_csv(os.path.join(PATH_ROOT.parent, workspace, 'results', 'real', f'real_{att}_db.csv'))
        for rate in RATES:
            subset_ =  df_[df_['Rate'] == rate][['Data.Base', 'Train.Clean', 'Test.Clean', 'Train.Poison', 'Test.Poison']]
            n_ = subset_.shape[0]

            data = {
                'Dataset': subset_['Data.Base'],
                'Poisoning Rate': np.array([rate] * n_),
                'Attack': np.array([att] * n_),
                'Train': (subset_['Train.Clean'] - subset_['Train.Poison']) * 100,
                'Test': (subset_['Test.Clean'] - subset_['Test.Poison']) * 100,
            }
            tables.append(pd.DataFrame(data))
results = pd.concat(tables, ignore_index=True)


In [14]:
table = results.groupby(['Dataset', 'Poisoning Rate', 'Attack']).mean().reset_index()
table.to_csv(os.path.join(path_output, 'table_real_dif.csv'), index=False)

In [15]:
tab_pivot = table[table['Poisoning Rate'] == 0.2].pivot(index='Dataset', columns=['Attack'], values=['Test']).round(1)
tab_pivot.columns = [c[-1] for c in tab_pivot.columns.to_flat_index()]
tab_pivot = tab_pivot.reset_index()
tab_pivot = tab_pivot[['Dataset', 'rand_svm', 'poison_svm', 'alfa_svm', 'falfa_nn']]
tab_pivot.columns = ['Dataset', 'Rand', 'PoisSVM', 'ALFA', 'FALFA']
tab_pivot['Dataset'] = tab_pivot['Dataset'].map({
    'abalone_subset_std': 'Abalone',
    'australian_std': 'Australian',
    'banknote_std': 'Banknote',
    'breastcancer_std': 'Breastcancer',
    'cmc_std': 'CMC',
    'htru2_subset_std': 'HTRU2',
    'phoneme_subset_std': 'Phoneme',
    'ringnorm_subset_std': 'Ringnorm',
    'texture_subset_std': 'Texture',
    'yeast_subset_std': 'Yeast'
})


In [16]:
tab_pivot

Unnamed: 0,Dataset,Rand,PoisSVM,ALFA,FALFA
0,Abalone,1.7,1.8,20.5,17.0
1,Australian,-0.0,7.4,28.4,20.0
2,Banknote,2.4,2.0,21.7,21.8
3,Breastcancer,3.3,7.0,19.5,20.0
4,CMC,0.5,15.3,11.1,16.3
5,HTRU2,1.6,1.6,19.7,21.9
6,Phoneme,7.8,2.5,14.1,20.4
7,Ringnorm,0.8,3.4,26.2,20.1
8,Texture,2.1,1.8,22.3,24.5
9,Yeast,1.9,4.1,17.1,12.2


In [17]:
print(tab_pivot.to_latex(index=False, float_format='%.1f', multicolumn=True))

\begin{tabular}{lrrrr}
\toprule
     Dataset &  Rand &  PoisSVM &  ALFA &  FALFA \\
\midrule
     Abalone &   1.7 &      1.8 &  20.5 &   17.0 \\
  Australian &  -0.0 &      7.4 &  28.4 &   20.0 \\
    Banknote &   2.4 &      2.0 &  21.7 &   21.8 \\
Breastcancer &   3.3 &      7.0 &  19.5 &   20.0 \\
         CMC &   0.5 &     15.3 &  11.1 &   16.3 \\
       HTRU2 &   1.6 &      1.6 &  19.7 &   21.9 \\
     Phoneme &   7.8 &      2.5 &  14.1 &   20.4 \\
    Ringnorm &   0.8 &      3.4 &  26.2 &   20.1 \\
     Texture &   2.1 &      1.8 &  22.3 &   24.5 \\
       Yeast &   1.9 &      4.1 &  17.1 &   12.2 \\
\bottomrule
\end{tabular}

