In [None]:
import matplotlib.pyplot as plt
import os
import pandas as pd
import glob
import ast
import numpy as np
import re
import seaborn as sns

%load_ext autoreload
%autoreload 2

In [None]:
target = "gerrig"
# target = "delatorre"

In [None]:
LOGDIR = '../outputs/'

assert os.path.isdir(LOGDIR), f"Directory {LOGDIR} does not exist"

glob_path = os.path.join(LOGDIR, '**/adversarial/**/**/**/**/*.csv')

glob_list = glob.glob(glob_path)

dfs = []

for dataframe_path in glob_list:
    dataframe_path = dataframe_path.replace(os.sep, "/")

    match = re.search(r'/(?P<experiment>[^/]+)_experiment/adversarial/(?P<attack>[^/]+)/(?P<iter>[^/]+)/(?P<model>[^/]+)/(?:[^/]+)/results\.csv$', dataframe_path)
    if match:
        experiment = match.group('experiment')
        attack = match.group('attack')
        iter = match.group('iter')
        model = match.group('model')

        df = pd.read_csv(dataframe_path)
        df["experiment"] = experiment
        df["attack"] = attack
        df["iter"] = iter
        df["model"] = model
        dfs.append(df)
    else:
        raise ValueError(f"Could not parse experiment, attack, iter, model from {dataframe_path}")

df = pd.concat(dfs)
df.to_csv('all_results.csv', index=False)    

In [None]:
def parse_experiment(_df : pd.DataFrame, experiment_name : str):
    _df = _df[_df['experiment'] == experiment_name].drop(columns=['experiment'])
    _df['response'] = _df['response'].apply(ast.literal_eval)
    keys = list(_df['response'].iloc[0].keys())
    _response_df = _df['response'].apply(pd.Series)
    _df = pd.concat([_df, _response_df], axis=1).drop(columns=['response'])

    _df['mean_score'] = _df[keys].mean(axis=1)
    _df = _df.groupby(['experiment_name', 'version', 'attack', 'iter'])[keys + ['mean_score']].mean().reset_index()
    _df = _df.groupby(['experiment_name', 'version', 'attack'])[keys + ['mean_score']].agg(['mean', 'std']).reset_index()

    return _df

In [None]:
for experiment in df['experiment'].unique():
    out = parse_experiment(df, experiment)
    out.to_csv(f'{experiment}_results.csv', index=False)

In [None]:
out = parse_experiment(df, target)
out.head()

In [None]:
# KEY_TO_PLOT = 'mean_score'

# def plot_experiment(_df : pd.DataFrame, experiment_name : str):
    
#     _df['group'] = _df['experiment_name'] + ' ' + _df['version']
    
#     groups = _df['group'].unique()
#     attacks = _df['attack'].unique()
#     fig, axes = plt.subplots(nrows=len(groups), ncols=1, figsize=(10, 5 * len(groups)))
#     if len(groups) == 1:
#         axes = [axes]
#     for ax, group in zip(axes, groups):
#         group_df = _df[_df['group'] == group]
#         index = np.arange(len(attacks))
#         bar_width = 0.35
#         colors = plt.cm.tab20(np.linspace(0, 1, len(attacks)))
#         for i, attack in enumerate(attacks):
#             bar = ax.bar(index[i] * bar_width, group_df[group_df['attack'] == attack][KEY_TO_PLOT]['mean'], bar_width, 
#                  yerr=group_df[group_df['attack'] == attack][KEY_TO_PLOT]['std'], label=attack, color=colors[i])
#         control_mean = group_df[group_df['attack'] == 'control'][KEY_TO_PLOT]['mean'].values[0]
#         ax.axhline(y=control_mean, color='black', linestyle='--', label='Control')
#         ax.set_title(f'Group: {group}')
#         ax.set_xlabel('Attack')
#         ax.set_ylabel('Mean Response')
#         ax.set_xticks(index * bar_width)
#         ax.set_xticklabels(attacks)
#         ax.tick_params(axis='x', rotation=45)
#         ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

#     plt.tight_layout()
#     plt.show()

# plot_experiment(out, target)

In [None]:
# def get_difference_from_control(_df : pd.DataFrame):
#     control_mean, control_std = _df[_df['attack'] == 'control'][KEY_TO_PLOT].values[0]
#     other_attacks = _df[_df['attack'] != 'control']
#     other_attacks['diff'] = other_attacks[KEY_TO_PLOT]['mean'] - control_mean
#     other_attacks['diff_std'] = np.sqrt(other_attacks[KEY_TO_PLOT]['std']**2 + control_std**2)
#     return other_attacks
    
# def plot_difference_from_control(_df : pd.DataFrame):
#     groups = _df['group'].unique()
#     attacks = _df['attack'].unique()
#     fig, axes = plt.subplots(nrows=len(groups), ncols=1, figsize=(10, 5 * len(groups)))
#     if len(groups) == 1:
#         axes = [axes]
#     for ax, group in zip(axes, groups):
#         group_df = _df[_df['group'] == group]
#         index = np.arange(len(attacks))
#         bar_width = 0.35
#         colors = plt.cm.tab20(np.linspace(0, 1, len(attacks)))
#         for i, attack in enumerate(attacks):
#             bar = ax.bar(index[i] * bar_width, group_df[group_df['attack'] == attack]['diff'], bar_width, 
#                  yerr=group_df[group_df['attack'] == attack]['diff_std'], label=attack, color=colors[i])
#         ax.axhline(y=0, color='black', linestyle='--', label='Control')
#         ax.set_title(f'Group: {group}')
#         ax.set_xlabel('Attack')
#         ax.set_ylabel('Mean Response')
#         ax.set_xticks(index * bar_width)
#         ax.set_xticklabels(attacks)
#         ax.tick_params(axis='x', rotation=45)
#         ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# diff = get_difference_from_control(out)
# plot_difference_from_control(diff)
# plt.tight_layout(pad=3.0)
# plt.show()

In [None]:
# attack_summaries = diff.groupby(['attack'])['diff'].agg(['mean', 'std'])
# plt.bar(attack_summaries.index, attack_summaries['mean'], yerr=attack_summaries['std'])
# plt.xticks(rotation=45)
# plt.show()

In [None]:
def get_combined_change():
    gerrig_df = parse_experiment(df, "gerrig")
    delatorre_df = parse_experiment(df, "delatorre")

    mean_score_changes = {}
    mean_std_changes = {}

    for _df in [gerrig_df, delatorre_df]:

        control_mean_score = _df.loc[_df['attack'] == 'control', ('mean_score', 'mean')].values[0]
        control_mean_std = _df.loc[_df['attack'] == 'control', ('mean_score', 'std')].values[0]

        for attack in _df['attack'].unique():
            
            if attack == "control":
                continue

            if attack not in mean_score_changes:
                mean_score_changes[attack] = []
            if attack not in mean_std_changes:
                mean_std_changes[attack] = []

            mean_score = _df.loc[_df['attack'] == attack, ('mean_score', 'mean')]
            mean_score_change = mean_score - control_mean_score
            mean_score_changes[attack].extend(mean_score_change)

            mean_std = _df.loc[_df['attack'] == attack, ('mean_score', 'std')]
            mean_std_change = mean_std - control_mean_std
            mean_std_changes[attack].extend(mean_std_change)

    # for mean_change in [mean_score_changes, mean_std_changes]:
    #     for attack, values in mean_change.items():
    #         mean_change[attack] = sum(values) / len(values)

    return mean_score_changes, mean_std_changes

mean_score_changes, mean_std_changes = get_combined_change()

In [None]:
# Convert dictionary to a list of (attack, score) pairs
data = []
for attack, scores in mean_score_changes.items():
    for score in scores:
        data.append((attack, score))

# Create DataFrame
diff_df = pd.DataFrame(data, columns=['Attack Type', 'Score Change'])

# Set Viridis color palette
palette = sns.color_palette("viridis", as_cmap=False)

# Plot with flipped X and Y axes
plt.figure(figsize=(12, 6))
ax = sns.boxplot(
    data=diff_df, 
    y='Attack Type',  # Now 'Attack Type' is on the Y axis
    x='Score Change',  # Now 'Score Change' is on the X axis
    palette=palette,  # Apply Viridis color scheme to boxes
    flierprops=dict(marker='D', markersize=5, markerfacecolor=palette[0])  # Apply Viridis color to outliers (first color)
)

# Add a vertical line at x=0
ax.axvline(x=0, color='black', linestyle='dotted', linewidth=1)

# Add median labels
medians = diff_df.groupby(['Attack Type'])['Score Change'].median().round(2)
horizontal_offset = diff_df['Score Change'].median() * 0.05  # Offset from median for display

for ytick in ax.get_yticks():
    ax.text(medians.iloc[ytick] + horizontal_offset, ytick, f"{medians.iloc[ytick]:.3f}", 
            horizontalalignment='center', size='x-small', color='w', weight='semibold')

# Improve visualization
plt.title('Gerrig and Delatorre Rating Change by Attack')
plt.xlabel('Difference from Control')
plt.ylabel('Attack')
plt.grid(axis='x', linestyle='--', alpha=0.6)

plt.show()