In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from scipy import stats
from scipy.stats import gaussian_kde

In [None]:
all_qfeatures = np.arange(0.05, 1.05, 0.10)
all_qfeatures = np.round(all_qfeatures, 2).tolist()

required_columns = [
    "metric_score",
]
methods = [
    'integrated_gradients',
    'deeplift',
    'deepliftshap',
    'gradshap',
    'kernelshap',
    'shapleyvalue'
]


df_reaults_all = pd.DataFrame(columns=['corruption', 'syn_10percent'])
skewness = []
kurtosis = []

for k in all_qfeatures:
    results_path = f"interpretability_results/results_k_feature"
    df_tmp = pd.read_csv(
        os.path.join(results_path, f"interpretability_shapleyvalue__{k}/results_interp__top.csv"),
        # os.path.join(results_path, f"interpretability_shapleyvalue__{k}/results_interp__bottom.csv"),
        index_col=0
    )

    tmp = df_tmp[required_columns].copy().reset_index(drop=True).rename(columns={'metric_score': 'syn_10percent'})
    skewness.append(stats.skew(tmp, bias=True))
    kurtosis.append(stats.kurtosis(tmp, bias=True))

    df_tmp = tmp
    df_tmp['corruption'] = k
    df_reaults_all = pd.concat([df_reaults_all, df_tmp], axis=0, ignore_index=True)

In [None]:
all_qfeatures = np.arange(0.05, 1.05, 0.10)
all_qfeatures = np.round(all_qfeatures, 2).tolist()
all_qfeatures.append(1.0)
all_qfeatures

In [None]:
all_qfeatures = np.arange(0.05, 1.05, 0.10)
all_qfeatures = np.round(all_qfeatures, 2).tolist()

fig = plt.figure(figsize=(8, 8))
ax = None

required_columns = [
    "metric_score",
]
methods = [
    'integrated_gradients',
    'deeplift',
    'deepliftshap',
    'gradshap',
    'kernelshap',
    'shapleyvalue'
]
method_titles = [
    'Integrated Gradients',
    'DeepLIFT',
    'DeepSHAP',
    'GradientSHAP',
    'KernelSHAP',
    'Shapley Value Sampling'
]

skewness_top = []
kurtosis_top = []

for n, method in enumerate(methods):
    df_reaults_all = pd.DataFrame(columns=['corruption', 'syn_10percent'])

    skewness_synthetic = []
    kurtosis_synthetic = []

    for k in all_qfeatures:
        results_path = f"interpretability_results/results_k_feature"
        df_tmp = pd.read_csv(
            os.path.join(results_path, f"interpretability_{method}__{k}/results_interp__top.csv"),
            index_col=0
        )
        tmp = df_tmp[required_columns].copy().reset_index(drop=True).rename(columns={'metric_score': 'syn_10percent'})
        skewness_synthetic.append(stats.skew(tmp, bias=True))
        kurtosis_synthetic.append(stats.kurtosis(tmp, bias=True))

        df_tmp = tmp
        df_tmp['corruption'] = k
        df_reaults_all = pd.concat([df_reaults_all, df_tmp], axis=0, ignore_index=True)

    ax = plt.subplot(2, 3, n + 1, frameon=False, sharey=ax)
    for i in range(10):
        kq = all_qfeatures[-i-1]
        NormalizedScoreDrop = df_reaults_all[df_reaults_all['corruption'] == kq].loc[:, 'syn_10percent'].values
        kde = gaussian_kde(NormalizedScoreDrop)
        x = np.linspace(min(NormalizedScoreDrop), max(NormalizedScoreDrop), 1000)
        # x = np.linspace(-1, 1, 1000)
        ax.plot(x, kde.pdf(x) + i, color="k", zorder=i, linewidth=1)
        ax.fill_between(x, kde.pdf(x) + i-0.02, i, color="#A7C0DE", zorder=-i-10, alpha=0.5)
        
    ax.yaxis.set_tick_params(tick1On=False)
    ax.set_xlim(-1, 1)
    ax.set_ylim(0, 15)
    ax.axvline(0.0, ls="--", lw=0.75, color="black", ymax=0.8)
    # ax.set_xlabel("Normalized score drop")
    ax.set_xlabel("$\\tilde{\\mathcal{S}}(\\overline{\\mathbf{X}})$")
    # ax.tick_params(axis="x", labelsize=16)
    ax.set_title(f"{method_titles[n]}", ha='center', x=0.5, y=-0.20)

    if n == 0 or n == 3:
        ax.yaxis.set_tick_params(labelleft=True)
        ax.set_yticks(np.arange(10))
        ax.set_yticklabels([all_qfeatures[-i] for i in range(1, 11)])
        ax.set_ylabel("k-percentile", ha='center', x=0.5, y=0.33)
        for tick in ax.yaxis.get_major_ticks():
            tick.label1.set_fontsize(10)
            tick.label1.set_verticalalignment("center")
    else:
        ax.yaxis.set_tick_params(labelleft=False)

    skewness_top.append(np.squeeze(skewness_synthetic))
    kurtosis_top.append(np.squeeze(kurtosis_synthetic))
plt.tight_layout(w_pad=0.4, h_pad=0.2)
plt.savefig(os.path.join("interpretability_results/visualization_results", "k-percentile.pdf"), bbox_inches='tight')

In [None]:
all_qfeatures = np.arange(0.05, 1.05, 0.10)
all_qfeatures = np.round(all_qfeatures, 2).tolist()
nrows = 2
ncols = 5
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(20, 8), dpi=300)
for r in range(nrows):
    for c in range(ncols):
        k = all_qfeatures[r*ncols+c]
        pd_path = f"interpretability_results/results_k_feature/interpretability_deepliftshap__{k}/results_interp__top.csv"
        df_metric = pd.read_csv(pd_path, index_col=0)
        sns.histplot(data=df_metric, x='metric_score', hue='class_name', kde=False, 
                     bins=30, edgecolor=None, shrink=0.9, palette=['#67adb7', '#e3716e'], ax=ax[r][c])
        ax[r][c].set_title(f'k={k}')
        ax[r][c].set_xlabel('Normalized score drop')
        ax[r][c].set_ylabel('Number of samples')
        if r == 1 and c == 4:
            legend = ax[r][c].get_legend()
            # handles = legend.legendHandles
            handles = legend.legend_handles
            ax[r][c].legend(handles, [r'$\tau < 60$', r'$\tau \geq 60$'], title='Classes', bbox_to_anchor=(1.35, 1.3))
        else:
            ax[r][c].get_legend().remove()

plt.tight_layout()
plt.savefig("interpretability_results/visualization_results/syntheticDisPerClass.png", dpi=300, bbox_inches='tight')