In [None]:
import os
from pathlib import Path
from glob import glob

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from label_flip_revised.utils import create_dir

In [None]:
PATH_ROOT = Path(os.getcwd()).absolute().parent
print(PATH_ROOT)

In [None]:
path_output = os.path.join(PATH_ROOT, 'results_plot')
print(path_output)
create_dir(path_output)

In [None]:
df_falfa_01 = pd.read_csv(os.path.join(PATH_ROOT, 'results', 'real', 'real_falfa_nn_db.csv'))
print(df_falfa_01.shape)
df_falfa_01.head()

In [None]:
df_falfa_01.columns

In [None]:
datanames = np.sort(df_falfa_01['Data.Base'].unique())
print(datanames)

In [None]:
workspace_names = [f'diva_{i:02d}' for i in range(1,6)]
workspace_names

In [None]:
dbnames = ['falfa_nn', 'alfa_svm', 'poison_svm', 'rand_svm']
dbnames


In [None]:
score_dict = {}
for dname in dbnames:
    dfs = []
    for wname in workspace_names:
        df_ = pd.read_csv(os.path.join(PATH_ROOT.parent, wname, 'results', 'real', f'real_{dname}_db.csv'))
        df_ = df_[['Data.Base', 'Rate', 'Train.Clean', 'Test.Clean', 'Train.Poison', 'Test.Poison']]
        dfs.append(df_)
    df = pd.concat(dfs, ignore_index=True).groupby(['Data.Base','Rate']).mean()
    df.to_csv(os.path.join(path_output, f'real_score_mean_{dname}.csv'))

In [None]:
colors = {
    'falfa_nn': '#d32f2f',
    'rand_svm': '#1976d2',
    'poison_svm': '#388e3c',
}
titlenames = [
    'Abalone',
    'Australian',
    'Banknote',
    'Breastcancer',
    'CMC',
    'HTRU2',
    'Phoneme',
    'Ringnorm',
    'Texture',
    'Yeast'
]

In [None]:
def draw_subplot(dfs, dataname, ax, colors=colors):
    for df, color_code in zip(dfs, colors):
        df = df * 100
        df.plot(x='Rate', y='Train.Poison', ax=ax, style='--', marker='.', color=colors[color_code], alpha=0.9)
        df.plot(x='Rate', y='Test.Poison', ax=ax, marker='.', color=colors[color_code], alpha=0.9)

    ax.set(xlim=[-1, 41], ylim=[45, 105])
    ax.set_title(dataname, fontsize=FONTSIZE+1)
    ax.set_xlabel('Poisoning Rate (%)', fontsize=FONTSIZE+1)
    ax.set_ylabel('Accuracy (%)', fontsize=FONTSIZE+1)
    ax.set_xticks(np.linspace(0, 40, num=5, endpoint=True), fontsize=FONTSIZE-1)
    ax.set_yticks(np.linspace(50, 100, num=6, endpoint=True), fontsize=FONTSIZE-1)
    ax.get_legend().remove()

In [None]:
# Save results
df_01 = pd.read_csv(os.path.join(path_output, f'real_score_mean_falfa_nn.csv'))
df_02 = pd.read_csv(os.path.join(path_output, f'real_score_mean_rand_svm.csv'))
df_03 = pd.read_csv(os.path.join(path_output, f'real_score_mean_poison_svm.csv'))

df_01

In [None]:
FONTSIZE = 13
FIGSIZE = (16, 5)

plt.rcParams["font.size"] = FONTSIZE
fig, axs = plt.subplots(2, 5, sharey=True, sharex=True, figsize=FIGSIZE)
_axs = []
for i in range(2):
    for j in range(5):
        _axs.append(axs[i, j])
for dataname, title, ax in zip(datanames, titlenames, _axs):
    dfs = [
        df_01[df_01['Data.Base'] == dataname],
        df_02[df_02['Data.Base'] == dataname],
        df_03[df_03['Data.Base'] == dataname],
    ]
    draw_subplot(dfs, title, ax)

plt.yticks(np.arange(50, 101, 10))
lines_labels = [ax.get_legend_handles_labels() for ax in fig.axes]
lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
labels = [
    'FALFA-Train',
    'FALFA-Test',
    'SLN-Train',
    'SLN-Test',
    'PoisSVM-Train',
    'PoisSVM-Test',
]
fig.legend(lines, labels, bbox_to_anchor=(0, 1, 1, 0), loc="lower left", mode="expand", ncol=6)
plt.tight_layout(pad=0.8)
path_fig_out = os.path.join(path_output, 'flfa_acc.pdf', )
plt.savefig(path_fig_out, dpi=300, bbox_inches='tight')
print(f'Save plot to {path_fig_out}')