In [1]:
import os
from glob import glob
from pathlib import Path

import numpy as np
import pandas as pd


In [2]:
PATH_ROOT = Path(os.getcwd()).absolute().parent
print(PATH_ROOT)

PATH_OUTPUT = os.path.join(PATH_ROOT, 'results_plot')
print(PATH_OUTPUT)


/home/lukec/workspace/diva_01
/home/lukec/workspace/diva_01/results_plot


In [3]:
attacks = ['falfa_nn', 'alfa_svm', 'rand_svm', 'poison_svm']
workspace_names = [f'diva_{i:02d}' for i in range(1, 6)]
print(workspace_names)


['diva_01', 'diva_02', 'diva_03', 'diva_04', 'diva_05']


In [4]:
def load_dataframes(workspace):
    dfs = []
    for att in attacks:
        df_ = pd.read_csv(os.path.join(PATH_ROOT.parent, workspace, 'results', 'real', f'real_{att}_db.csv'))
        df_['Attack'] = att
        dfs.append(df_)
    df = pd.concat(dfs, ignore_index=True)
    df['Prediction'] = 0.
    return df


In [5]:
df_dict = {}
for wname in workspace_names:
    df = load_dataframes(wname)
    df_dict[wname] = df


In [6]:
alphas = {
    'diva_01': 0.0034,
    'diva_02': 0.036,
    'diva_03': 0.0284,
    'diva_04': 0.0109,
    'diva_05': 0.0119
}


In [7]:
df_dict['diva_01']


Unnamed: 0,Data,F1,F1 SD,F1v,F2,F3,F4,N1,N2,N2 SD,...,Path.Poison,Path.Test,Rate,Train.Clean,Test.Clean,Train.Poison,Test.Poison,Data.Base,Attack,Prediction
0,breastcancer_std_falfa_nn_0.00.csv,0.712464,0.215349,0.066235,6.264744e-11,0.485714,0.000000,0.074725,0.366983,0.074135,...,/home/lukec/workspace/label_flip_revised_new/d...,/home/lukec/workspace/label_flip_revised_new/d...,0.00,0.986813,0.982456,0.986813,0.982456,breastcancer_std,falfa_nn,0.0
1,breastcancer_std_falfa_nn_0.05.csv,0.859098,0.101038,0.133480,1.440449e-06,0.696703,0.000000,0.103297,0.382469,0.070786,...,/home/lukec/workspace/label_flip_revised_new/d...,/home/lukec/workspace/label_flip_revised_new/d...,0.05,0.986813,0.982456,0.982418,0.956140,breastcancer_std,falfa_nn,0.0
2,breastcancer_std_falfa_nn_0.10.csv,0.844907,0.108064,0.240749,2.910036e-09,0.753846,0.316484,0.191209,0.409176,0.076152,...,/home/lukec/workspace/label_flip_revised_new/d...,/home/lukec/workspace/label_flip_revised_new/d...,0.10,0.986813,0.982456,0.964835,0.868421,breastcancer_std,falfa_nn,0.0
3,breastcancer_std_falfa_nn_0.15.csv,0.948572,0.034941,0.258380,2.631059e-06,0.712088,0.048352,0.153846,0.396442,0.077667,...,/home/lukec/workspace/label_flip_revised_new/d...,/home/lukec/workspace/label_flip_revised_new/d...,0.15,0.986813,0.982456,0.984615,0.824561,breastcancer_std,falfa_nn,0.0
4,breastcancer_std_falfa_nn_0.20.csv,0.919837,0.052507,0.311281,6.912120e-10,0.723077,0.270330,0.217582,0.415749,0.075189,...,/home/lukec/workspace/label_flip_revised_new/d...,/home/lukec/workspace/label_flip_revised_new/d...,0.20,0.986813,0.982456,0.971429,0.728070,breastcancer_std,falfa_nn,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
335,yeast_subset_std_poison_svm_0.10.csv,0.990786,0.020445,0.768290,2.744995e-04,0.988294,0.936455,0.625418,0.478590,0.139140,...,/home/lukec/workspace/label_flip_revised_new/d...,/home/lukec/workspace/label_flip_revised_new/d...,0.10,0.726508,0.648045,0.730769,0.687151,yeast_subset_std,poison_svm,0.0
336,yeast_subset_std_poison_svm_0.15.csv,0.991886,0.017572,0.800329,2.960327e-04,0.988800,0.956800,0.627200,0.491407,0.154971,...,/home/lukec/workspace/label_flip_revised_new/d...,/home/lukec/workspace/label_flip_revised_new/d...,0.15,0.726508,0.648045,0.729600,0.670391,yeast_subset_std,poison_svm,0.0
337,yeast_subset_std_poison_svm_0.20.csv,0.996799,0.005893,0.913371,5.020123e-04,0.992331,0.969325,0.630368,0.495467,0.153496,...,/home/lukec/workspace/label_flip_revised_new/d...,/home/lukec/workspace/label_flip_revised_new/d...,0.20,0.726508,0.648045,0.728528,0.670391,yeast_subset_std,poison_svm,0.0
338,yeast_subset_std_poison_svm_0.25.csv,0.994281,0.010280,0.822829,5.493845e-04,0.994118,0.975000,0.638235,0.501441,0.162395,...,/home/lukec/workspace/label_flip_revised_new/d...,/home/lukec/workspace/label_flip_revised_new/d...,0.25,0.726508,0.648045,0.722059,0.675978,yeast_subset_std,poison_svm,0.0


In [8]:
# Get column index for "Prediction"
IDX_PRED = df_dict[workspace_names[0]].columns.get_indexer(['Prediction'])
print(IDX_PRED)


[39]


In [9]:
df_sample = pd.read_csv(os.path.join(PATH_ROOT, 'results', 'real', 'real_falfa_nn_db.csv'))
print(df_sample.shape, '\n')
datanames = sorted(df_sample['Data.Base'].unique())
print(*datanames, sep=',\n')
print()


(90, 38) 

abalone_subset_std,
australian_std,
banknote_std,
breastcancer_std,
cmc_std,
htru2_subset_std,
phoneme_subset_std,
ringnorm_subset_std,
texture_subset_std,
yeast_subset_std



In [10]:
cols_cm = [
    'F1', 'F1 SD', 'F1v', 'F2', 'F3', 'F4', 'N1', 'N2', 'N2 SD',
    'N3', 'N3 SD', 'N4', 'N4 SD', 'T1', 'T1 SD', 'LSC', 'L1', 'L2', 'L3',
    'T2', 'T3', 'T4', 'C1', 'C2', 'Density', 'ClsCoef', 'Hubs', 'Hubs SD',
]
col_y = 'Test.Poison'

print(len(cols_cm))


28


In [11]:
from sklearn import linear_model
from sklearn.metrics import mean_squared_error


In [12]:
for wname in workspace_names:
    df_ = df_dict[wname]
    alpha_ = alphas[wname]
    for dname in datanames:
        # Anything other than test set
        # Only train with 1 attack and 1 noise
        df_train_1 = df_[(df_['Data.Base'] != dname) & (df_['Attack'] == 'falfa_nn')]
        df_train_2 = df_[(df_['Data.Base'] != dname) & (df_['Attack'] == 'rand_svm')]
        df_train = pd.concat([df_train_1, df_train_2], ignore_index=True)
        X_train = df_train[cols_cm]
        y_train = df_train[col_y]

        regressor = linear_model.Ridge(alpha=alpha_)
        regressor.fit(X_train, y_train)

        for att in attacks:
            df_test = df_[(df_['Data.Base'] == dname) & (df_['Attack'] == att)]
            X_test = df_test[cols_cm]

            pred = regressor.predict(X_test)
            # Apply clipping
            pred = np.amin([pred, np.ones_like(pred)], axis=0)

            idx = df_[(df_['Data.Base'] == dname) & (df_['Attack'] == att)].index
            df_.iloc[idx, IDX_PRED] = pred


In [13]:
ATTACK_DICT = {
    'falfa_nn': 'FALFA',
    'alfa_svm': 'ALFA',
    'rand_svm': 'SLN',
    'poison_svm': 'PoisSVM',
}

DATA_DICT = {
    'abalone_subset_std': 'Abalone',
    'australian_std': 'Australian',
    'banknote_std': 'Banknote',
    'breastcancer_std': 'Breastcancer',
    'cmc_std': 'CMC',
    'htru2_subset_std': 'HTRU2',
    'phoneme_subset_std': 'Phoneme',
    'ringnorm_subset_std': 'Ringnorm',
    'texture_subset_std': 'Texture',
    'yeast_subset_std': 'Yeast',
}


In [14]:
data_rmse = {
    'Dataset': [],
    'Attack': [],
    'RMSE': [],
    'STD': [],
}
for dname in datanames:
    for att in attacks:
        RMSEs = []
        for wname in workspace_names:
            df_ = df_dict[wname]
            y_true = df_[(df_['Attack'] == att) & (df_['Data.Base'] == dname)]['Test.Poison']
            pred = df_[(df_['Attack'] == att) & (df_['Data.Base'] == dname)]['Prediction']
            rmse = np.sqrt(mean_squared_error(y_true, pred))
            RMSEs.append(rmse)
        rmse = np.mean(RMSEs)
        std = np.std(RMSEs)
        data_rmse['Dataset'].append(DATA_DICT[dname])
        data_rmse['Attack'].append(ATTACK_DICT[att])
        data_rmse['RMSE'].append(rmse)
        data_rmse['STD'].append(std)
df_rmse = pd.DataFrame(data_rmse)
path_rmse = os.path.join(PATH_OUTPUT, 'real_rmse_std.csv')
df_rmse.to_csv(path_rmse, index=False)
print(f'Save to: {path_rmse}')

df_rmse


Save to: /home/lukec/workspace/diva_01/results_plot/real_rmse_std.csv


Unnamed: 0,Dataset,Attack,RMSE,STD
0,Abalone,FALFA,0.082573,0.009516
1,Abalone,ALFA,0.114757,0.054557
2,Abalone,SLN,0.071744,0.021556
3,Abalone,PoisSVM,0.058351,0.022669
4,Australian,FALFA,0.108992,0.031864
5,Australian,ALFA,0.114566,0.035253
6,Australian,SLN,0.071057,0.030616
7,Australian,PoisSVM,0.064454,0.02153
8,Banknote,FALFA,0.146025,0.051968
9,Banknote,ALFA,0.089204,0.043116


In [15]:
df_std = df_rmse.groupby('Attack').std().transpose()
df_std = df_std.loc[['RMSE'], :]
df_std['Dataset'] = 'STD'

df_rmse_pivot = df_rmse.pivot(index='Dataset', columns='Attack', values='STD').reset_index()
df_rmse_std = pd.concat([df_rmse_pivot.reset_index(), df_std], ignore_index=True)
df_rmse_std =  df_rmse_std[['Dataset', 'SLN', 'PoisSVM', 'ALFA', 'FALFA']]
df_rmse_std= df_rmse_std.rename({'Dataset': 'Dataset/Attack'}, axis=1)
df_rmse_std


Attack,Dataset/Attack,SLN,PoisSVM,ALFA,FALFA
0,Abalone,0.021556,0.022669,0.054557,0.009516
1,Australian,0.030616,0.02153,0.035253,0.031864
2,Banknote,0.044021,0.030623,0.043116,0.051968
3,Breastcancer,0.028582,0.023865,0.040603,0.012504
4,CMC,0.020738,0.080752,0.111036,0.05562
5,HTRU2,0.012656,0.013425,0.031609,0.014335
6,Phoneme,0.027366,0.022441,0.018949,0.032646
7,Ringnorm,0.068402,0.053851,0.044347,0.021081
8,Texture,0.030095,0.040833,0.026562,0.023945
9,Yeast,0.034811,0.040458,0.046826,0.024061


In [16]:
path_results = os.path.join(PATH_OUTPUT, 'real_rmse_pivot_std.csv')
df_rmse_std.to_csv(path_results, index=False)
print(f'Save to: {path_results}')

Save to: /home/lukec/workspace/diva_01/results_plot/real_rmse_pivot_std.csv


In [17]:
df_mean = df_rmse.groupby('Attack').mean().transpose()
df_mean = df_mean.loc[['RMSE'], :]
df_mean['Dataset'] = 'Mean'
df_rmse_pivot = df_rmse.pivot(index='Dataset', columns='Attack', values='RMSE').reset_index()
df_rmse_full = pd.concat([df_rmse_pivot.reset_index(), df_mean], ignore_index=True)
df_rmse_full =  df_rmse_full[['Dataset', 'SLN', 'PoisSVM', 'ALFA', 'FALFA']]
df_rmse_full= df_rmse_full.rename({'Dataset': 'Dataset/Attack'}, axis=1)
df_rmse_full

Attack,Dataset/Attack,SLN,PoisSVM,ALFA,FALFA
0,Abalone,0.071744,0.058351,0.114757,0.082573
1,Australian,0.071057,0.064454,0.114566,0.108992
2,Banknote,0.125771,0.174844,0.089204,0.146025
3,Breastcancer,0.06765,0.065132,0.117239,0.113384
4,CMC,0.101471,0.250192,0.250426,0.208706
5,HTRU2,0.055077,0.061504,0.105428,0.094382
6,Phoneme,0.086685,0.066644,0.0846,0.093598
7,Ringnorm,0.191611,0.14871,0.139373,0.13446
8,Texture,0.090455,0.099153,0.066767,0.072702
9,Yeast,0.083917,0.091567,0.18559,0.12442


In [18]:
path_results = os.path.join(PATH_OUTPUT, 'real_rmse_pivot_mean.csv')
df_rmse_full.to_csv(path_results, index=False)
print(f'Save to: {path_results}')

Save to: /home/lukec/workspace/diva_01/results_plot/real_rmse_pivot_mean.csv
