In [8]:
import numpy as np
import pandas as pd
import torch
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from IPython import display
import random
import scipy.stats
from sklearn import metrics
from sklearn.metrics import roc_curve
import os
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster## Fidelity Analysis
from sklearn.cluster import KMeans

import timm
import torchvision
from torchvision.transforms.functional import gaussian_blur
from torch.utils.data import Subset
import torch.nn.functional as F
import matplotlib.pyplot as plt

# CIFAR-10

In [148]:
data='CIFAR10'
num_experiments=5
n_rows=5000
sigma_list=[0.1,0.25]
k_list=[50,100, 1000]
m=10
epochs=30
epsilon=0.0
clipping_mode='nonDP'
model='vit_small_patch16_224'
explanation_type = 'sl'

df_gap_nondp = pd.DataFrame()

for sigma in sigma_list:
    for k in k_list:
        gap_dir = f'perturbation_gap_pred_test/{data}_sigma={sigma}_k={k}_m={m}'
        df_path = f'{gap_dir}/mode={clipping_mode}_nrows={n_rows}_type={explanation_type}_epochs={epochs}.csv'
        if not os.path.exists(df_path):
            continue
        df_one_setting = pd.read_csv(df_path)
#             df_one_setting['expl_type'] = explanation_type
        df_gap_nondp = pd.concat([df_gap_nondp, df_one_setting], ignore_index=True)

df_gap_nondp['epsilon'] = np.inf
df_gap_nondp_grouped = df_gap_nondp.groupby(['m', 'k', 'sigma'])
df_gap_nondp_grouped[['gap']].agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gap,gap
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
m,k,sigma,Unnamed: 3_level_2,Unnamed: 4_level_2
10.0,50.0,0.1,0.023692,0.004005
10.0,50.0,0.25,0.082453,0.016574
10.0,100.0,0.1,0.023449,0.004925
10.0,100.0,0.25,0.082711,0.017934
10.0,1000.0,0.1,0.022582,0.004872
10.0,1000.0,0.25,0.078223,0.015414


In [149]:
clipping_mode='BK-MixOpt'

epsilons = [0.5, 1.0, 2.0, 8.0]

df_gap_dp = pd.DataFrame()
for sigma in sigma_list:
    for k in k_list:
        for eps in epsilons:
            gap_dir = f'perturbation_gap_pred_test/{data}_sigma={sigma}_k={k}_m={m}'
            df_dir = f'{gap_dir}/mode={clipping_mode}_nrows={n_rows}_type={explanation_type}_epsilon={eps}_epochs={epochs}.csv'
            if not os.path.exists(df_dir):
                continue
            df_one_eps = pd.read_csv(df_dir)
#                 df_one_eps['expl_type'] = explanation_type
            df_gap_dp = pd.concat([df_gap_dp, df_one_eps], ignore_index=True)
    
df_gap_dp_grouped = df_gap_dp.groupby(['k', 'sigma', 'epsilon'])
df_gap_dp_grouped[['gap']].agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gap,gap
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
k,sigma,epsilon,Unnamed: 3_level_2,Unnamed: 4_level_2
50.0,0.1,0.5,0.065437,0.005568
50.0,0.1,1.0,0.067329,0.012328
50.0,0.1,2.0,0.059255,0.0046
50.0,0.1,8.0,0.065145,0.007052
50.0,0.25,0.5,0.245269,0.044353
50.0,0.25,1.0,0.250014,0.054672
50.0,0.25,2.0,0.232455,0.035113
50.0,0.25,8.0,0.227056,0.033146
100.0,0.1,0.5,0.065845,0.006255
100.0,0.1,1.0,0.067249,0.012769


In [150]:
df_gap_combined = pd.concat([df_gap_nondp, df_gap_dp], ignore_index=True)
# df_gap_combined = df_gap_combined.loc
df_gap_combined_grouped = df_gap_combined.groupby(['k', 'sigma', 'epsilon'])
latexify = df_gap_combined_grouped[['gap']].agg(['mean', 'std'])
latexify

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gap,gap
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
k,sigma,epsilon,Unnamed: 3_level_2,Unnamed: 4_level_2
50.0,0.1,0.5,0.065437,0.005568
50.0,0.1,1.0,0.067329,0.012328
50.0,0.1,2.0,0.059255,0.0046
50.0,0.1,8.0,0.065145,0.007052
50.0,0.1,inf,0.023692,0.004005
50.0,0.25,0.5,0.245269,0.044353
50.0,0.25,1.0,0.250014,0.054672
50.0,0.25,2.0,0.232455,0.035113
50.0,0.25,8.0,0.227056,0.033146
50.0,0.25,inf,0.082453,0.016574


In [151]:
print(latexify.to_latex(float_format="{:.3f}".format))

\begin{tabular}{lllrr}
\toprule
 &  &  & \multicolumn{2}{r}{gap} \\
 &  &  & mean & std \\
k & sigma & epsilon &  &  \\
\midrule
\multirow[t]{10}{*}{50.000000} & \multirow[t]{5}{*}{0.100000} & 0.500000 & 0.065 & 0.006 \\
 &  & 1.000000 & 0.067 & 0.012 \\
 &  & 2.000000 & 0.059 & 0.005 \\
 &  & 8.000000 & 0.065 & 0.007 \\
 &  & inf & 0.024 & 0.004 \\
\cline{2-5}
 & \multirow[t]{5}{*}{0.250000} & 0.500000 & 0.245 & 0.044 \\
 &  & 1.000000 & 0.250 & 0.055 \\
 &  & 2.000000 & 0.232 & 0.035 \\
 &  & 8.000000 & 0.227 & 0.033 \\
 &  & inf & 0.082 & 0.017 \\
\cline{1-5} \cline{2-5}
\multirow[t]{10}{*}{100.000000} & \multirow[t]{5}{*}{0.100000} & 0.500000 & 0.066 & 0.006 \\
 &  & 1.000000 & 0.067 & 0.013 \\
 &  & 2.000000 & 0.060 & 0.004 \\
 &  & 8.000000 & 0.065 & 0.007 \\
 &  & inf & 0.023 & 0.005 \\
\cline{2-5}
 & \multirow[t]{5}{*}{0.250000} & 0.500000 & 0.245 & 0.044 \\
 &  & 1.000000 & 0.248 & 0.055 \\
 &  & 2.000000 & 0.231 & 0.034 \\
 &  & 8.000000 & 0.228 & 0.033 \\
 &  & inf & 0.083 &

# SVHN

In [133]:
data='SVHN'
num_experiments=5
n_rows=5000
sigma_list=[0.1,0.25]
k_list=[50,100,1000]
m=10
epochs=50
epsilon=0.0
clipping_mode='nonDP'
model='vit_small_patch16_224'
explanation_type='sl'

df_gap_nondp = pd.DataFrame()

for sigma in sigma_list:
    for k in k_list:
        gap_dir = f'perturbation_gap_test/{data}_sigma={sigma}_k={k}_m={m}'
        df_path = f'{gap_dir}/mode={clipping_mode}_nrows={n_rows}_type={explanation_type}_epochs={epochs}.csv'
        if not os.path.exists(df_path):
            print(df_path)
            continue
        df_one_setting = pd.read_csv(df_path)
        df_gap_nondp = pd.concat([df_gap_nondp, df_one_setting], ignore_index=True)

df_gap_nondp['epsilon'] = np.inf
df_gap_nondp_grouped = df_gap_nondp.groupby(['m', 'k', 'sigma'])
df_gap_nondp_grouped[['gap']].agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gap,gap
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
m,k,sigma,Unnamed: 3_level_2,Unnamed: 4_level_2
10.0,50.0,0.1,0.062493,0.011066
10.0,50.0,0.25,0.16958,0.029045
10.0,100.0,0.1,0.061709,0.011461
10.0,100.0,0.25,0.168471,0.029466
10.0,1000.0,0.1,0.053734,0.010093
10.0,1000.0,0.25,0.149644,0.024569


In [134]:
clipping_mode='BK-MixOpt'

epsilons = [0.5, 1.0, 2.0, 8.0]

df_gap_dp = pd.DataFrame()
for sigma in sigma_list:
    for k in k_list:
        for eps in epsilons:
            gap_dir = f'perturbation_gap_test/{data}_sigma={sigma}_k={k}_m={m}'
            df_dir = f'{gap_dir}/mode={clipping_mode}_nrows={n_rows}_type={explanation_type}_epsilon={eps}_epochs={epochs}.csv'
            if not os.path.exists(df_dir):
                continue
            df_one_eps = pd.read_csv(df_dir)
            df_gap_dp = pd.concat([df_gap_dp, df_one_eps], ignore_index=True)
    
df_gap_dp_grouped = df_gap_dp.groupby(['k', 'sigma', 'epsilon'])
df_gap_dp_grouped[['gap']].agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gap,gap
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
k,sigma,epsilon,Unnamed: 3_level_2,Unnamed: 4_level_2
50.0,0.1,0.5,0.770728,0.133268
50.0,0.1,1.0,0.733784,0.106286
50.0,0.1,2.0,0.756063,0.106511
50.0,0.1,8.0,0.749714,0.129884
50.0,0.25,0.5,1.407484,0.096482
50.0,0.25,1.0,1.470687,0.128673
50.0,0.25,2.0,1.473971,0.150654
50.0,0.25,8.0,1.488659,0.140441
100.0,0.1,0.5,0.77084,0.13365
100.0,0.1,1.0,0.732568,0.104052


In [135]:
df_gap_combined = pd.concat([df_gap_nondp, df_gap_dp], ignore_index=True)
# df_gap_combined = df_gap_combined.loc
df_gap_combined_grouped = df_gap_combined.groupby(['k', 'sigma', 'epsilon'])
latexify_svhn = df_gap_combined_grouped[['gap']].agg(['mean', 'std'])
latexify_svhn

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gap,gap
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
k,sigma,epsilon,Unnamed: 3_level_2,Unnamed: 4_level_2
50.0,0.1,0.5,0.770728,0.133268
50.0,0.1,1.0,0.733784,0.106286
50.0,0.1,2.0,0.756063,0.106511
50.0,0.1,8.0,0.749714,0.129884
50.0,0.1,inf,0.062493,0.011066
50.0,0.25,0.5,1.407484,0.096482
50.0,0.25,1.0,1.470687,0.128673
50.0,0.25,2.0,1.473971,0.150654
50.0,0.25,8.0,1.488659,0.140441
50.0,0.25,inf,0.16958,0.029045


In [136]:
print(latexify_svhn.to_latex(float_format="{:.3f}".format))

\begin{tabular}{lllrr}
\toprule
 &  &  & \multicolumn{2}{r}{gap} \\
 &  &  & mean & std \\
k & sigma & epsilon &  &  \\
\midrule
\multirow[t]{10}{*}{50.000000} & \multirow[t]{5}{*}{0.100000} & 0.500000 & 0.771 & 0.133 \\
 &  & 1.000000 & 0.734 & 0.106 \\
 &  & 2.000000 & 0.756 & 0.107 \\
 &  & 8.000000 & 0.750 & 0.130 \\
 &  & inf & 0.062 & 0.011 \\
\cline{2-5}
 & \multirow[t]{5}{*}{0.250000} & 0.500000 & 1.407 & 0.096 \\
 &  & 1.000000 & 1.471 & 0.129 \\
 &  & 2.000000 & 1.474 & 0.151 \\
 &  & 8.000000 & 1.489 & 0.140 \\
 &  & inf & 0.170 & 0.029 \\
\cline{1-5} \cline{2-5}
\multirow[t]{10}{*}{100.000000} & \multirow[t]{5}{*}{0.100000} & 0.500000 & 0.771 & 0.134 \\
 &  & 1.000000 & 0.733 & 0.104 \\
 &  & 2.000000 & 0.753 & 0.106 \\
 &  & 8.000000 & 0.747 & 0.128 \\
 &  & inf & 0.062 & 0.011 \\
\cline{2-5}
 & \multirow[t]{5}{*}{0.250000} & 0.500000 & 1.407 & 0.096 \\
 &  & 1.000000 & 1.470 & 0.128 \\
 &  & 2.000000 & 1.473 & 0.150 \\
 &  & 8.000000 & 1.488 & 0.141 \\
 &  & inf & 0.168 &

# CIFAR100

In [145]:
data='CIFAR100'
num_experiments=5
n_rows=5000
sigma_list=[0.1,0.25]
k_list=[50,100, 1000]
m=10
epochs=9
epsilon=0.0
clipping_mode='nonDP'
model='beit_base_patch16_224.in22k_ft_in22k_in1k'
explanation_type='sl'

df_gap_nondp = pd.DataFrame()

for sigma in sigma_list:
    for k in k_list:
        gap_dir = f'perturbation_gap/{data}_sigma={sigma}_k={k}_m={m}'
        df_path = f'{gap_dir}/mode={clipping_mode}_nrows={n_rows}_type={explanation_type}_epochs={epochs}.csv'
        if not os.path.exists(df_path):
#             print('oops')
            continue
        df_one_setting = pd.read_csv(df_path)
        df_gap_nondp = pd.concat([df_gap_nondp, df_one_setting], ignore_index=True)

df_gap_nondp['epsilon'] = np.inf
df_gap_nondp_grouped = df_gap_nondp.groupby(['m', 'k', 'sigma'])
df_gap_nondp_grouped[['gap']].agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gap,gap
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
m,k,sigma,Unnamed: 3_level_2,Unnamed: 4_level_2
10.0,50.0,0.1,0.070382,0.003926
10.0,50.0,0.25,0.276722,0.036081
10.0,100.0,0.1,0.069428,0.003769
10.0,100.0,0.25,0.275301,0.035744


In [146]:
clipping_mode='BK-MixOpt'

epsilons = [0.5, 1.0, 2.0, 8.0]

df_gap_dp = pd.DataFrame()
for sigma in sigma_list:
    for k in k_list:
        for eps in epsilons:
            gap_dir = f'perturbation_gap/{data}_sigma={sigma}_k={k}_m={m}'
            df_dir = f'{gap_dir}/mode={clipping_mode}_nrows={n_rows}_type={explanation_type}_epsilon={eps}_epochs={epochs}.csv'
            if not os.path.exists(df_dir):
                continue
            df_one_eps = pd.read_csv(df_dir)
            df_gap_dp = pd.concat([df_gap_dp, df_one_eps], ignore_index=True)
    
df_gap_dp_grouped = df_gap_dp.groupby(['k', 'sigma', 'epsilon'])
df_gap_dp_grouped[['gap']].agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gap,gap
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
k,sigma,epsilon,Unnamed: 3_level_2,Unnamed: 4_level_2
50.0,0.1,0.5,0.13102,0.006569
50.0,0.1,1.0,0.208004,0.013746
50.0,0.1,2.0,0.319919,0.011432
50.0,0.1,8.0,0.404755,0.017115
50.0,0.25,0.5,0.213961,0.007014
50.0,0.25,1.0,0.356175,0.014012
50.0,0.25,2.0,0.595072,0.017195
50.0,0.25,8.0,0.848497,0.036878
100.0,0.1,0.5,0.130827,0.006542
100.0,0.1,1.0,0.207983,0.01374


In [147]:
df_gap_combined = pd.concat([df_gap_nondp, df_gap_dp], ignore_index=True)
# df_gap_combined = df_gap_combined.loc
df_gap_combined_grouped = df_gap_combined.groupby(['k', 'sigma', 'epsilon'])
latexify_cif100 = df_gap_combined_grouped[['gap']].agg(['mean', 'std'])
latexify_cif100

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gap,gap
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std
k,sigma,epsilon,Unnamed: 3_level_2,Unnamed: 4_level_2
50.0,0.1,0.5,0.13102,0.006569
50.0,0.1,1.0,0.208004,0.013746
50.0,0.1,2.0,0.319919,0.011432
50.0,0.1,8.0,0.404755,0.017115
50.0,0.1,inf,0.070382,0.003926
50.0,0.25,0.5,0.213961,0.007014
50.0,0.25,1.0,0.356175,0.014012
50.0,0.25,2.0,0.595072,0.017195
50.0,0.25,8.0,0.848497,0.036878
50.0,0.25,inf,0.276722,0.036081


In [144]:
print(latexify_cif100.to_latex(float_format="{:.3f}".format))

\begin{tabular}{lllrr}
\toprule
 &  &  & \multicolumn{2}{r}{gap} \\
 &  &  & mean & std \\
k & sigma & epsilon &  &  \\
\midrule
\multirow[t]{10}{*}{50.000000} & \multirow[t]{5}{*}{0.100000} & 0.500000 & 0.131 & 0.007 \\
 &  & 1.000000 & 0.208 & 0.014 \\
 &  & 2.000000 & 0.320 & 0.011 \\
 &  & 8.000000 & 0.404 & 0.017 \\
 &  & inf & 0.071 & 0.004 \\
\cline{2-5}
 & \multirow[t]{5}{*}{0.250000} & 0.500000 & 0.214 & 0.007 \\
 &  & 1.000000 & 0.356 & 0.014 \\
 &  & 2.000000 & 0.595 & 0.017 \\
 &  & 8.000000 & 0.848 & 0.038 \\
 &  & inf & 0.277 & 0.036 \\
\cline{1-5} \cline{2-5}
\multirow[t]{10}{*}{100.000000} & \multirow[t]{5}{*}{0.100000} & 0.500000 & 0.131 & 0.007 \\
 &  & 1.000000 & 0.208 & 0.014 \\
 &  & 2.000000 & 0.319 & 0.012 \\
 &  & 8.000000 & 0.404 & 0.017 \\
 &  & inf & 0.070 & 0.004 \\
\cline{2-5}
 & \multirow[t]{5}{*}{0.250000} & 0.500000 & 0.214 & 0.007 \\
 &  & 1.000000 & 0.356 & 0.014 \\
 &  & 2.000000 & 0.595 & 0.017 \\
 &  & 8.000000 & 0.848 & 0.037 \\
 &  & inf & 0.277 &

## Fidelity Analysis - Hierarchical

In [12]:
data='CIFAR10'
num_experiments=5
n_rows=10000
channel=1
epochs=30
epsilon=0.0
clipping_mode='nonDP'
model='vit_small_patch16_224'
explanation_type='ig'
clustering_method='hierarchical'
fid_method='all_clusters'

df_nondp = pd.read_csv(f'fidelity/fid_{data}_{clustering_method}_method={fid_method}/mode={clipping_mode}_nrows={n_rows}_type={explanation_type}_epochs={epochs}_channel={channel}.csv')
    
df_nondp_grouped = df_nondp.groupby(['max_distance'])
df_nondp_grouped[['consistency', 'num_clusters']].agg(['mean', 'std'])

Unnamed: 0_level_0,consistency,consistency,num_clusters,num_clusters
Unnamed: 0_level_1,mean,std,mean,std
max_distance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0.3,0.188596,0.065455,2.2,0.83666
0.4,0.23584,0.034403,11.0,4.062019
0.5,0.232262,0.035899,30.8,11.562872
0.6,0.215622,0.020133,54.6,10.014989
0.7,0.206074,0.020136,83.4,15.453155


In [13]:
clipping_mode='BK-MixOpt'

epsilons = [0.5, 1.0, 2.0, 8.0]

df_dp = pd.DataFrame()
for eps in epsilons:
    df_one_eps = pd.read_csv(
        f'fidelity/fid_{data}_{clustering_method}_method={fid_method}/mode={clipping_mode}_nrows={n_rows}_type={explanation_type}_epsilon={eps}_epochs={epochs}_channel={channel}.csv')

    df_dp = pd.concat([df_dp, df_one_eps], ignore_index=True)
df_dp_grouped = df_dp.groupby(['max_distance', 'epsilon'])
print('Consistency Data, DP')
df_dp_grouped[['consistency', 'num_clusters']].agg(['mean', 'std'])

Consistency Data, DP


Unnamed: 0_level_0,Unnamed: 1_level_0,consistency,consistency,num_clusters,num_clusters
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
max_distance,epsilon,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1.5,0.5,0.375661,0.156409,2.4,1.140175
1.5,1.0,0.420606,0.19386,2.2,1.643168
1.5,2.0,0.324518,0.200772,1.4,0.547723
1.5,8.0,0.254359,0.113154,1.8,0.83666
2.0,0.5,0.326589,0.093082,12.0,4.84768
2.0,1.0,0.291613,0.068553,9.8,3.271085
2.0,2.0,0.272771,0.033485,8.0,1.870829
2.0,8.0,0.280474,0.051869,11.4,2.880972
2.5,0.5,0.257817,0.036265,33.4,7.300685
2.5,1.0,0.248362,0.037188,30.8,6.496153


## Fidelity Analysis - K-Means

In [16]:
data='CIFAR10'
num_experiments=5
n_rows=10000
channel=1
epochs=30
epsilon=0.0
clipping_mode='nonDP'
model='vit_small_patch16_224'
explanation_type='ig'
clustering_method='kmeans'
fid_method='max'

df_nondp = pd.read_csv(f'fidelity/fid_{data}_{clustering_method}_method={fid_method}/mode={clipping_mode}_nrows={n_rows}_type={explanation_type}_epochs={epochs}_channel={channel}.csv')
    
df_nondp_grouped = df_nondp.groupby(['k'])
df_nondp_grouped[['consistency', 'num_clusters']].agg(['mean', 'std'])

Unnamed: 0_level_0,consistency,consistency,num_clusters,num_clusters
Unnamed: 0_level_1,mean,std,mean,std
k,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
25.0,0.107753,0.008183,3.2,2.774887
50.0,0.125218,0.007672,3.8,1.788854
100.0,0.122129,0.010654,4.2,1.48324
150.0,0.127879,0.005073,4.6,1.140175
200.0,0.12456,0.008919,5.8,2.774887
250.0,0.124768,0.013068,5.6,2.19089


In [17]:
clipping_mode='BK-MixOpt'

epsilons = [0.5, 1.0, 2.0, 8.0]

df_dp = pd.DataFrame()
for eps in epsilons:
    df_one_eps = pd.read_csv(
        f'fidelity/fid_{data}_{clustering_method}_method={fid_method}/mode={clipping_mode}_nrows={n_rows}_type={explanation_type}_epsilon={eps}_epochs={epochs}_channel={channel}.csv')

    df_dp = pd.concat([df_dp, df_one_eps], ignore_index=True)
df_dp_grouped = df_dp.groupby(['k', 'epsilon'])
print('Consistency Data, DP')
df_dp_grouped[['consistency', 'num_clusters']].agg(['mean', 'std'])

Consistency Data, DP


Unnamed: 0_level_0,Unnamed: 1_level_0,consistency,consistency,num_clusters,num_clusters
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
k,epsilon,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
25.0,0.5,0.134484,0.025108,3.8,1.788854
25.0,1.0,0.122771,0.011132,3.0,1.414214
25.0,2.0,0.11424,0.009952,2.2,0.83666
25.0,8.0,0.113569,0.012186,3.0,1.414214
50.0,0.5,0.13976,0.011722,4.2,1.643168
50.0,1.0,0.140354,0.018084,4.8,0.83666
50.0,2.0,0.114613,0.011699,3.4,1.81659
50.0,8.0,0.122,0.010789,4.2,1.788854
100.0,0.5,0.142444,0.014243,4.2,1.788854
100.0,1.0,0.131847,0.020391,3.8,1.30384


## Fidelity Generation with k-means

In [8]:
# NEW CODE, TRY IT OUT
num_experiments=5
epochs=[5,10,30]
epochs_nondp = [5,10,30]
model="vit_small_patch16_224"
clipping_mode_dp='BK-MixOpt'
clipping_mode_nondp='nonDP'
num_samples=20000
cifar_data='CIFAR10'
expl_type='sl'
ep=10 # 10 epochs (for now)
# n_rows=100
n_rows=5000
channel=1

df_fid = pd.DataFrame(columns=['experiment_no', 'k', 'consistency', 'num_clusters'])
for experiment_no in tqdm(range(5)):
    exp_no = str(experiment_no)
    df_path = f'lira/attributions_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_nondp}_type={expl_type}_nsamples=20_epochs={ep}/channel{channel}/{exp_no}.csv'
    # randomly sample only 1000 examples to read in
    skip_rows = np.random.choice(np.arange(0, 20000), size=19000, replace=False)
    print(skip_rows)
    df_channel1 = pd.read_csv(df_path,
#                               skiprows=skip_rows,
#                               header=None
                              nrows=n_rows
                             )
    indices_path = f'lira/indices_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_nondp}_epochs={ep}/{exp_no}.csv'
    preds_path = f'lira/preds_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_nondp}_type={expl_type}_nsamples=20_epochs={ep}/{exp_no}.csv'
    df_preds = pd.read_csv(preds_path,
#                            skiprows=skip_rows,
#                            header=None
                           nrows=n_rows
                          )
    df_indices = pd.read_csv(indices_path,
#                              skiprows=skip_rows,
#                              header=None
                             nrows=n_rows
                            )
    k_list = [5, 10, 50, 100]
    for k in tqdm(k_list):
        kmeans = KMeans(n_clusters=k)
        kmeans.fit(df_channel1)
#         linkage_matrix = linkage(df_channel1, method='complete')
#         clusters = fcluster(linkage_matrix, dist, criterion='distance')
        clusters = kmeans.labels_
        df_preds[str(k)] = clusters
        consistency=0.0
        total_in_clusters=0.0
        large_cluster_count = 0
        for c in np.unique(clusters):
            df_preds_subset = df_preds.loc[df_preds[str(k)] == c]
#             mode_label = df_preds_subset[exp_no].mode().iloc[0]
#             mode_label_count = len(df_preds_subset.loc[df_preds_subset[exp_no] == mode_label])
            total_cluster_size = len(df_preds_subset)
            if total_cluster_size <= 1.0:
                continue
            cluster_counts = df_preds_subset[exp_no].value_counts().tolist()
            for cluster_count in cluster_counts:
                consistency += cluster_count * (cluster_count - 1) / (total_cluster_size - 1)
            total_in_clusters += total_cluster_size
#             if total_cluster_size != 1.0:
#                 print(total_cluster_size, mode_label_count / total_cluster_size)
#                 consistency+=mode_label_count
#                 total_in_clusters+=total_cluster_size
#                 large_cluster_count += 1
        if total_in_clusters > 2.0:
            row = {
                'experiment_no': experiment_no,
                'k': k,
                'consistency': consistency / total_in_clusters,
                'num_clusters': large_cluster_count
            }
            print(f'consistency for exp {experiment_no}, k {k}, num_clusters {large_cluster_count}: {consistency / total_in_clusters}')
            df_fid.loc[len(df_fid)] = list(row.values())
            
# df_fid.to_csv(f'fidelity/fid_kmeans_{clipping_mode_nondp}_{cifar_data}_{n_rows}_type={expl_type}_epochs={ep}_channel={channel}.csv')
df_fid.head(60)

KeyboardInterrupt: 

In [None]:
num_experiments=5
epochs=[5,10,30]
epochs_nondp = [5,10,30]
model="vit_small_patch16_224"
clipping_mode_dp='BK-MixOpt'
# clipping_mode_nondp='nonDP'
num_samples=20000
cifar_data='CIFAR10'
expl_type='ixg'
ep=10 # 10 epochs (for now)
epsilons=[0.5, 1.0, 2.0, 8.0]
# n_rows=100
n_rows=5000
channel=1

df_fid = pd.DataFrame(columns=['experiment_no', 'k', 'consistency', 'num_clusters', 'epsilon'])

for eps in epsilons:
    for experiment_no in tqdm(range(5)):
        exp_no = str(experiment_no)
        df_path = f'lira/attributions_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_dp}_eps={eps}_type={expl_type}_nsamples=20_epochs={ep}/channel{channel}/{exp_no}.csv'
        # randomly sample only 1000 examples to read in
        skip_rows = np.random.choice(np.arange(0, 20000), size=19000, replace=False)
#         print(skip_rows)
        df_channel1 = pd.read_csv(df_path,
    #                               skiprows=skip_rows,
    #                               header=None
                                  nrows=n_rows
                                 )
        indices_path = f'lira/indices_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_dp}_eps={eps}_epochs={ep}/{exp_no}.csv'
        preds_path = f'lira/preds_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_dp}_eps={eps}_type={expl_type}_nsamples=20_epochs={ep}/{exp_no}.csv'
        df_preds = pd.read_csv(preds_path,
    #                            skiprows=skip_rows,
    #                            header=None
                               nrows=n_rows
                              )
        df_indices = pd.read_csv(indices_path,
    #                              skiprows=skip_rows,
    #                              header=None
                                 nrows=n_rows
                                )
        k_list = [5, 10, 50, 100]
        for k in tqdm(k_list):
            kmeans = KMeans(n_clusters=k)
            kmeans.fit(df_channel1)
    #         linkage_matrix = linkage(df_channel1, method='complete')
    #         clusters = fcluster(linkage_matrix, dist, criterion='distance')
            clusters = kmeans.labels_
            df_preds[str(k)] = clusters
            consistency=0.0
            total_in_clusters=0.0
            large_cluster_count = 0
            for c in np.unique(clusters):
                df_preds_subset = df_preds.loc[df_preds[str(k)] == c]
                mode_label = df_preds_subset[exp_no].mode().iloc[0]
                mode_label_count = len(df_preds_subset.loc[df_preds_subset[exp_no] == mode_label])
                total_cluster_size = len(df_preds_subset)
                if total_cluster_size != 1.0:
                    print(total_cluster_size, mode_label_count / total_cluster_size)
                    consistency+=mode_label_count
                    total_in_clusters+=total_cluster_size
                    large_cluster_count += 1
            if total_in_clusters > 2.0:
                row = {
                    'experiment_no': experiment_no,
                    'k': k,
                    'consistency': consistency / total_in_clusters,
                    'num_clusters': large_cluster_count,
                    'epsilon': eps,
                }
                print(f'consistency for exp {experiment_no}, k {k}, eps {eps}, num_clusters {large_cluster_count}: {consistency / total_in_clusters}')
                df_fid.loc[len(df_fid)] = list(row.values())

df_fid.to_csv(f'fidelity/fid_kmeans_{clipping_mode_dp}_{cifar_data}_{n_rows}_type={expl_type}_epochs={ep}_channel={channel}.csv')
df_fid.head(60)

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


4996 0.1088871096877502
consistency for exp 0, k 5, eps 0.5, num_clusters 1: 0.1088871096877502


  super()._check_params_vs_input(X, default_n_init=10)


4991 0.1087958324984973
consistency for exp 0, k 10, eps 0.5, num_clusters 1: 0.1087958324984973


  super()._check_params_vs_input(X, default_n_init=10)


2 1.0
4 0.5
4945 0.10940343781597574
3 0.3333333333333333
consistency for exp 0, k 50, eps 0.5, num_clusters 4: 0.1102139685102947


  super()._check_params_vs_input(X, default_n_init=10)


2 0.5
2 0.5
2 0.5
41 0.1951219512195122
2 0.5
25 0.24
4833 0.10883509207531554
consistency for exp 0, k 100, eps 0.5, num_clusters 7: 0.11086203382922356


  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


5 0.4
4762 0.11129777404451911
231 0.19913419913419914
consistency for exp 1, k 5, eps 0.5, num_clusters 3: 0.11564625850340136


  super()._check_params_vs_input(X, default_n_init=10)


4990 0.1094188376753507
2 1.0
consistency for exp 1, k 10, eps 0.5, num_clusters 2: 0.10977564102564102


  super()._check_params_vs_input(X, default_n_init=10)


4951 0.10927085437285397
consistency for exp 1, k 50, eps 0.5, num_clusters 1: 0.10927085437285397


  super()._check_params_vs_input(X, default_n_init=10)


6 0.5
4889 0.10922479034567396
2 0.5
2 0.5
3 0.3333333333333333
3 0.6666666666666666
2 0.5
consistency for exp 1, k 100, eps 0.5, num_clusters 7: 0.11065824332586101


  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


2 0.5
4995 0.10750750750750751
consistency for exp 2, k 5, eps 0.5, num_clusters 2: 0.10766459875925555


  super()._check_params_vs_input(X, default_n_init=10)


4991 0.10719294730514926
consistency for exp 2, k 10, eps 0.5, num_clusters 1: 0.10719294730514926


  super()._check_params_vs_input(X, default_n_init=10)


4951 0.10664512219753586
consistency for exp 2, k 50, eps 0.5, num_clusters 1: 0.10664512219753586


  super()._check_params_vs_input(X, default_n_init=10)


4900 0.1063265306122449
2 0.5
consistency for exp 2, k 100, eps 0.5, num_clusters 2: 0.10648714810281518


  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


4996 0.10548438751000801
consistency for exp 3, k 5, eps 0.5, num_clusters 1: 0.10548438751000801


  super()._check_params_vs_input(X, default_n_init=10)


4991 0.10559006211180125
consistency for exp 3, k 10, eps 0.5, num_clusters 1: 0.10559006211180125


  super()._check_params_vs_input(X, default_n_init=10)


2 0.5
4950 0.10565656565656566
consistency for exp 3, k 50, eps 0.5, num_clusters 2: 0.10581583198707593


  super()._check_params_vs_input(X, default_n_init=10)


4 0.75
3 0.3333333333333333
5 0.2
4481 0.10734211113590716
410 0.15121951219512195
3 0.3333333333333333
consistency for exp 3, k 100, eps 0.5, num_clusters 6: 0.11190379127598858


  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


4996 0.10608486789431545
consistency for exp 4, k 5, eps 0.5, num_clusters 1: 0.10608486789431545


  super()._check_params_vs_input(X, default_n_init=10)


8 0.25
4984 0.10634028892455859
consistency for exp 4, k 10, eps 0.5, num_clusters 2: 0.10657051282051282


  super()._check_params_vs_input(X, default_n_init=10)


47 0.2127659574468085
4905 0.1056065239551478
consistency for exp 4, k 50, eps 0.5, num_clusters 2: 0.10662358642972536


  super()._check_params_vs_input(X, default_n_init=10)


15 0.3333333333333333
2 0.5
27 0.2962962962962963
23 0.34782608695652173
922 0.11822125813449023
2 1.0
3916 0.11006128702757917
consistency for exp 4, k 100, eps 0.5, num_clusters 7: 0.11493784389647442


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


4996 0.10548438751000801
consistency for exp 0, k 5, eps 1.0, num_clusters 1: 0.10548438751000801


  super()._check_params_vs_input(X, default_n_init=10)


17 0.29411764705882354
4975 0.10532663316582914
consistency for exp 0, k 10, eps 1.0, num_clusters 2: 0.10596955128205128


  super()._check_params_vs_input(X, default_n_init=10)


4951 0.10583720460513028
consistency for exp 0, k 50, eps 1.0, num_clusters 1: 0.10583720460513028


  super()._check_params_vs_input(X, default_n_init=10)


3 0.3333333333333333
4899 0.10573586446213513
consistency for exp 0, k 100, eps 1.0, num_clusters 2: 0.105875152998776


  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


4996 0.10648518815052041
consistency for exp 1, k 5, eps 1.0, num_clusters 1: 0.10648518815052041


  super()._check_params_vs_input(X, default_n_init=10)


4991 0.10659186535764376
consistency for exp 1, k 10, eps 1.0, num_clusters 1: 0.10659186535764376


  super()._check_params_vs_input(X, default_n_init=10)


2 0.5
66 0.22727272727272727
193 0.15025906735751296
4693 0.10739399105050075
consistency for exp 1, k 50, eps 1.0, num_clusters 4: 0.11081953976584578


  super()._check_params_vs_input(X, default_n_init=10)


2 0.5
4900 0.1063265306122449
consistency for exp 1, k 100, eps 1.0, num_clusters 2: 0.10648714810281518


  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


4996 0.10468374699759808
consistency for exp 2, k 5, eps 1.0, num_clusters 1: 0.10468374699759808


  super()._check_params_vs_input(X, default_n_init=10)


4991 0.10458825886595872
consistency for exp 2, k 10, eps 1.0, num_clusters 1: 0.10458825886595872


  super()._check_params_vs_input(X, default_n_init=10)


133 0.21052631578947367
4540 0.10770925110132158
279 0.16487455197132617
2 0.5
consistency for exp 2, k 50, eps 1.0, num_clusters 4: 0.11384739604360113


  super()._check_params_vs_input(X, default_n_init=10)


4899 0.10491937130026537
2 1.0
2 0.5
consistency for exp 2, k 100, eps 1.0, num_clusters 3: 0.1054456455231491


  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


2 0.5
4995 0.1043043043043043
consistency for exp 3, k 5, eps 1.0, num_clusters 2: 0.10446267760656394


  super()._check_params_vs_input(X, default_n_init=10)


4991 0.10438789821679022
consistency for exp 3, k 10, eps 1.0, num_clusters 1: 0.10438789821679022


  super()._check_params_vs_input(X, default_n_init=10)


4 0.25
2 0.5
21 0.2857142857142857
4927 0.10432311751572965
consistency for exp 3, k 50, eps 1.0, num_clusters 4: 0.10536939846588615


  super()._check_params_vs_input(X, default_n_init=10)


48 0.25
4561 0.10830958123218593
294 0.17687074829931973
consistency for exp 3, k 100, eps 1.0, num_clusters 3: 0.11380787273098103


  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


4996 0.10488390712570056
consistency for exp 4, k 5, eps 1.0, num_clusters 1: 0.10488390712570056


  super()._check_params_vs_input(X, default_n_init=10)


11 0.18181818181818182
4981 0.10499899618550491
consistency for exp 4, k 10, eps 1.0, num_clusters 2: 0.10516826923076923


  super()._check_params_vs_input(X, default_n_init=10)


64 0.171875
209 0.18181818181818182
4680 0.10662393162393162
consistency for exp 4, k 50, eps 1.0, num_clusters 3: 0.11064001615182717


  super()._check_params_vs_input(X, default_n_init=10)


22 0.22727272727272727
8 0.25
50 0.2
4824 0.10634328358208955
consistency for exp 4, k 100, eps 1.0, num_clusters 4: 0.10807504078303426


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


16 0.25
8 0.375
4974 0.10494571773220748
consistency for exp 0, k 5, eps 2.0, num_clusters 3: 0.1058423369347739


  super()._check_params_vs_input(X, default_n_init=10)


22 0.22727272727272727
4970 0.10462776659959759
consistency for exp 0, k 10, eps 2.0, num_clusters 2: 0.10516826923076923


  super()._check_params_vs_input(X, default_n_init=10)


160 0.1875
4792 0.10580133555926544
consistency for exp 0, k 50, eps 2.0, num_clusters 2: 0.10844103392568659


  super()._check_params_vs_input(X, default_n_init=10)


6 0.3333333333333333
4647 0.10845706907682376
250 0.164
consistency for exp 0, k 100, eps 2.0, num_clusters 3: 0.11156434835814807


  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


4996 0.10568454763811048
consistency for exp 1, k 5, eps 2.0, num_clusters 1: 0.10568454763811048


  super()._check_params_vs_input(X, default_n_init=10)


7 0.42857142857142855
4985 0.10531594784353059
consistency for exp 1, k 10, eps 2.0, num_clusters 2: 0.10576923076923077


  super()._check_params_vs_input(X, default_n_init=10)


5 0.4
4947 0.10592278148372751
consistency for exp 1, k 50, eps 2.0, num_clusters 2: 0.10621970920840064


  super()._check_params_vs_input(X, default_n_init=10)


4901 0.10589675576412977
consistency for exp 1, k 100, eps 2.0, num_clusters 1: 0.10589675576412977


  0%|          | 0/4 [00:00<?, ?it/s]

  super()._check_params_vs_input(X, default_n_init=10)


10 0.4
7 0.42857142857142855
4981 0.1045974703874724
consistency for exp 2, k 5, eps 2.0, num_clusters 3: 0.1056422569027611


  super()._check_params_vs_input(X, default_n_init=10)


2 0.5
4990 0.10501002004008016
consistency for exp 2, k 10, eps 2.0, num_clusters 2: 0.10516826923076923


  super()._check_params_vs_input(X, default_n_init=10)


2 0.5
5 0.4
4946 0.10473109583501819
consistency for exp 2, k 50, eps 2.0, num_clusters 3: 0.10518877448011306


  super()._check_params_vs_input(X, default_n_init=10)


## Fidelity generation with hierarchical clustering

In [4]:
num_experiments=5
epochs=[5,10,30]
epochs_nondp = [5,10,30]
model="vit_small_patch16_224"
clipping_mode_dp='BK-MixOpt'
clipping_mode_nondp='nonDP'
num_samples=20000
cifar_data='CIFAR10'
expl_type='ixg'
ep=10 # 10 epochs (for now)
# n_rows=100
n_rows=5000
channel=1

df_fid = pd.DataFrame(columns=['experiment_no', 'max_distance', 'consistency', 'num_clusters'])
for experiment_no in tqdm(range(5)):
    exp_no = str(experiment_no)
    df_path = f'lira/attributions_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_nondp}_type={expl_type}_nsamples=20_epochs={ep}/channel{channel}/{exp_no}.csv'
    # randomly sample only 1000 examples to read in
    skip_rows = np.random.choice(np.arange(0, 20000), size=19000, replace=False)
    print(skip_rows)
    df_channel1 = pd.read_csv(df_path,
#                               skiprows=skip_rows,
#                               header=None
                              nrows=n_rows
                             )
    indices_path = f'lira/indices_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_nondp}_epochs={ep}/{exp_no}.csv'
    preds_path = f'lira/preds_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_nondp}_type={expl_type}_nsamples=20_epochs={ep}/{exp_no}.csv'
    df_preds = pd.read_csv(preds_path,
#                            skiprows=skip_rows,
#                            header=None
                           nrows=n_rows
                          )
    df_indices = pd.read_csv(indices_path,
#                              skiprows=skip_rows,
#                              header=None
                             nrows=n_rows
                            )
    max_distances = [0.1, 0.2, 0.3, 0.4]
    for dist in tqdm(max_distances):
        linkage_matrix = linkage(df_channel1, method='complete')
        clusters = fcluster(linkage_matrix, dist, criterion='distance')
        df_preds[str(dist)] = clusters
        consistency=0.0
        total_in_clusters=0.0
        large_cluster_count = 0
        for c in np.unique(clusters):
            df_preds_subset = df_preds.loc[df_preds[str(dist)] == c]
            mode_label = df_preds_subset[exp_no].mode().iloc[0]
            mode_label_count = len(df_preds_subset.loc[df_preds_subset[exp_no] == mode_label])
            total_cluster_size = len(df_preds_subset)
            if total_cluster_size != 1.0:
                print(total_cluster_size, mode_label_count / total_cluster_size)
                consistency+=mode_label_count
                total_in_clusters+=total_cluster_size
                large_cluster_count += 1
        if total_in_clusters > 2.0:
            row = {
                'experiment_no': experiment_no,
                'max_distance': dist,
                'consistency': consistency / total_in_clusters,
                'num_clusters': large_cluster_count
            }
            print(f'consistency for exp {experiment_no}, dist {dist}, num_clusters {large_cluster_count}: {consistency / total_in_clusters}')
            df_fid.loc[len(df_fid)] = list(row.values())
            
df_fid.to_csv(f'fidelity/fid_hierarchical_{clipping_mode_nondp}_{cifar_data}_{n_rows}_type={expl_type}_epochs={ep}_channel={channel}.csv')
df_fid.head(60)

  0%|          | 0/5 [00:00<?, ?it/s]

[17069  8229 13765 ...  7719  7226  2651]


  0%|          | 0/4 [00:00<?, ?it/s]

2 0.5
23 0.30434782608695654
2 0.5
2 0.5
3 0.3333333333333333
2 1.0
2 0.5
4 0.5
190 0.3473684210526316
2 1.0
2 0.5
2 0.5
2 0.5
2 0.5
2 0.5
18 0.3888888888888889
2 0.5
consistency for exp 0, dist 0.2, num_clusters 17: 0.3702290076335878
2 0.5
2 1.0
4 0.25
2 0.5
59 0.1694915254237288
2 0.5
2 0.5
2 0.5
2 0.5
3 0.6666666666666666
6 0.5
19 0.2631578947368421
5 0.4
4 0.75
5 0.2
19 0.3157894736842105
45 0.26666666666666666
904 0.23119469026548672
2 1.0
2 0.5
2 1.0
2 0.5
3 0.6666666666666666
3 0.6666666666666666
2 0.5
2 1.0
2 0.5
2 0.5
2 0.5
3 0.3333333333333333
2 0.5
4 0.5
2 0.5
6 0.5
2 0.5
10 0.3
2 0.5
3 0.6666666666666666
4 0.5
2 0.5
3 0.6666666666666666
2 0.5
3 0.3333333333333333
2 1.0
3 0.6666666666666666
consistency for exp 0, dist 0.3, num_clusters 45: 0.2611683848797251
2 0.5
2 1.0
2 1.0
2 0.5
3 0.6666666666666666
2 1.0
11 0.18181818181818182
4 0.5
3 0.3333333333333333
2 0.5
2 0.5
2 0.5
3 0.3333333333333333
2 1.0
2 0.5
9 0.3333333333333333
9 0.3333333333333333
4 0.5
4 0.25
17 0.1764705

  0%|          | 0/4 [00:00<?, ?it/s]

2 1.0
2 0.5
2 0.5
2 1.0
2 0.5
2 1.0
9 0.4444444444444444
4 0.25
2 0.5
24 0.20833333333333334
2 0.5
3 0.6666666666666666
133 0.2556390977443609
6 0.5
2 0.5
6 0.3333333333333333
2 0.5
2 1.0
consistency for exp 1, dist 0.2, num_clusters 18: 0.3188405797101449
2 1.0
2 1.0
2 1.0
2 0.5
2 1.0
3 0.6666666666666666
2 1.0
2 0.5
2 1.0
3 0.6666666666666666
2 0.5
3 0.3333333333333333
2 0.5
2 0.5
26 0.23076923076923078
5 0.6
2 1.0
680 0.19705882352941176
6 0.3333333333333333
3 0.3333333333333333
2 0.5
3 0.3333333333333333
2 0.5
2 0.5
2 0.5
2 0.5
2 1.0
3 0.3333333333333333
9 0.2222222222222222
3 0.3333333333333333
2 0.5
3 0.3333333333333333
10 0.3
5 0.4
5 0.4
2 0.5
9 0.2222222222222222
3 0.6666666666666666
4 0.5
8 0.375
2 1.0
3 0.6666666666666666
2 0.5
3 0.3333333333333333
2 0.5
2 1.0
2 1.0
2 1.0
47 0.19148936170212766
2 1.0
10 0.3
2 0.5
consistency for exp 1, dist 0.3, num_clusters 52: 0.2508214676889376
3 0.3333333333333333
2 0.5
2 0.5
7 0.5714285714285714
5 0.4
4 0.25
2 0.5
2 0.5
2 0.5
2 0.5
2 0.5

  0%|          | 0/4 [00:00<?, ?it/s]

3 1.0
consistency for exp 2, dist 0.1, num_clusters 1: 1.0
2 0.5
2 0.5
3 1.0
14 0.35714285714285715
20 0.3
2 0.5
3 0.6666666666666666
222 0.31981981981981983
2 0.5
4 0.75
4 0.5
2 0.5
3 0.3333333333333333
2 0.5
29 0.27586206896551724
2 1.0
consistency for exp 2, dist 0.2, num_clusters 16: 0.3449367088607595
2 0.5
2 0.5
2 0.5
2 1.0
2 1.0
2 1.0
2 1.0
2 0.5
8 0.375
2 0.5
5 0.4
7 0.42857142857142855
4 0.25
7 0.2857142857142857
1025 0.20097560975609757
3 0.3333333333333333
81 0.2345679012345679
4 0.25
6 0.3333333333333333
2 0.5
22 0.22727272727272727
2 0.5
2 1.0
2 1.0
3 0.3333333333333333
2 1.0
3 0.6666666666666666
3 0.3333333333333333
5 0.2
3 0.6666666666666666
3 1.0
2 0.5
2 0.5
14 0.21428571428571427
10 0.2
3 0.3333333333333333
2 1.0
2 0.5
2 1.0
2 1.0
3 0.6666666666666666
4 0.75
consistency for exp 2, dist 0.3, num_clusters 42: 0.23380726698262244
2 1.0
2 1.0
2 1.0
2 0.5
2 0.5
2 1.0
2 0.5
6 0.3333333333333333
2 1.0
2 0.5
2 0.5
2 0.5
2 1.0
2 0.5
26 0.2692307692307692
3 0.3333333333333333
2 

  0%|          | 0/4 [00:00<?, ?it/s]

2 1.0
3 0.3333333333333333
2 0.5
4 0.5
3 0.6666666666666666
2 1.0
2 1.0
3 0.6666666666666666
2 0.5
7 0.42857142857142855
3 0.6666666666666666
2 1.0
2 0.5
2 0.5
243 0.32510288065843623
2 0.5
15 0.26666666666666666
14 0.2857142857142857
34 0.2647058823529412
2 1.0
2 1.0
2 1.0
2 1.0
2 0.5
consistency for exp 3, dist 0.2, num_clusters 24: 0.3641456582633053
2 0.5
2 0.5
6 0.3333333333333333
6 0.16666666666666666
3 0.3333333333333333
397 0.17632241813602015
6 0.5
3 0.6666666666666666
4 0.25
2 0.5
2 0.5
4 0.25
2 0.5
4 0.25
16 0.1875
2 0.5
2 0.5
2 0.5
4 0.5
3 0.6666666666666666
2 0.5
2 0.5
2 1.0
3 0.6666666666666666
2 0.5
2 0.5
10 0.3
2 1.0
2 0.5
2 0.5
3 0.3333333333333333
2 1.0
2 0.5
2 0.5
2 0.5
2 0.5
2 1.0
2 0.5
2 1.0
2 0.5
2 0.5
4 0.5
4 0.5
7 0.5714285714285714
6 0.8333333333333334
2 0.5
2 1.0
4 0.75
2 0.5
5 1.0
2 0.5
3 0.3333333333333333
3 0.6666666666666666
3 0.3333333333333333
2 1.0
3 0.6666666666666666
709 0.2143864598025388
3 0.3333333333333333
4 0.5
12 0.3333333333333333
consistency f

  0%|          | 0/4 [00:00<?, ?it/s]

28 0.25
2 0.5
consistency for exp 4, dist 0.2, num_clusters 2: 0.26666666666666666
2 0.5
2 0.5
2 0.5
2 0.5
2 1.0
3 0.3333333333333333
2 0.5
3 0.3333333333333333
3 0.6666666666666666
3 0.6666666666666666
3 0.3333333333333333
219 0.2237442922374429
2 1.0
2 0.5
4 0.5
3 0.3333333333333333
7 0.2857142857142857
6 0.3333333333333333
6 0.3333333333333333
2 0.5
2 0.5
2 0.5
2 0.5
30 0.2
24 0.3333333333333333
3 0.3333333333333333
3 0.3333333333333333
2 0.5
3 1.0
3 0.6666666666666666
3 0.6666666666666666
2 1.0
2 1.0
2 0.5
2 1.0
2 0.5
2 0.5
consistency for exp 4, dist 0.3, num_clusters 37: 0.30517711171662126
4 0.75
2 1.0
2 0.5
2 0.5
2 0.5
2 0.5
2 1.0
2 0.5
11 0.36363636363636365
7 0.7142857142857143
2 0.5
2 0.5
2 1.0
2 0.5
2 0.5
2 0.5
2 1.0
3 0.3333333333333333
2 0.5
2 0.5
2 0.5
2 0.5
7 0.42857142857142855
3 0.3333333333333333
3 0.3333333333333333
3 0.3333333333333333
2 0.5
2 0.5
640 0.16875
5 0.6
156 0.17307692307692307
3 0.6666666666666666
2 0.5
2 0.5
2 0.5
2 0.5
2 0.5
4 0.5
8 0.5
2 0.5
2 1.0
4 

Unnamed: 0,experiment_no,max_distance,consistency,num_clusters
0,0.0,0.2,0.370229,17.0
1,0.0,0.3,0.261168,45.0
2,0.0,0.4,0.206064,60.0
3,1.0,0.2,0.318841,18.0
4,1.0,0.3,0.250821,52.0
5,1.0,0.4,0.188364,67.0
6,2.0,0.1,1.0,1.0
7,2.0,0.2,0.344937,16.0
8,2.0,0.3,0.233807,42.0
9,2.0,0.4,0.183318,47.0


In [None]:
num_experiments=5
epochs=[5,10,30]
epochs_nondp = [5,10,30]
model="vit_small_patch16_224"
clipping_mode_dp='BK-MixOpt'
# clipping_mode_nondp='nonDP'
num_samples=20000
cifar_data='CIFAR10'
expl_type='ixg'
ep=10 # 10 epochs (for now)
epsilons=[0.5, 1.0, 2.0, 8.0]
n_rows=5000
channel=1

df_fid = pd.DataFrame(columns=['experiment_no', 'max_distance', 'consistency', 'num_clusters', 'epsilon'])

for eps in epsilons:
    for experiment_no in tqdm(range(5)):
        exp_no = str(experiment_no)
        df_path = f'lira/attributions_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_dp}_eps={eps}_type={expl_type}_nsamples=20_epochs={ep}/channel{channel}/{exp_no}.csv'
        # randomly sample only 1000 examples to read in
        skip_rows = np.random.choice(np.arange(0, 20000), size=19000, replace=False)
#         print(skip_rows)
        df_channel1 = pd.read_csv(df_path,
    #                               skiprows=skip_rows,
    #                               header=None
                                  nrows=n_rows
                                 )
        indices_path = f'lira/indices_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_dp}_eps={eps}_epochs={ep}/{exp_no}.csv'
        preds_path = f'lira/preds_{cifar_data}_{num_samples}/model={model}_mode={clipping_mode_dp}_eps={eps}_type={expl_type}_nsamples=20_epochs={ep}/{exp_no}.csv'
        df_preds = pd.read_csv(preds_path,
    #                            skiprows=skip_rows,
    #                            header=None
                               nrows=n_rows
                              )
        df_indices = pd.read_csv(indices_path,
    #                              skiprows=skip_rows,
    #                              header=None
                                 nrows=n_rows
                                )
        max_distances = [1.0, 1.5, 2.0, 2.5]
        for dist in tqdm(max_distances):
            linkage_matrix = linkage(df_channel1, method='complete')
            clusters = fcluster(linkage_matrix, dist, criterion='distance')
            df_preds[str(dist)] = clusters
            consistency=0.0
            total_in_clusters=0.0
            large_cluster_count = 0
            for c in np.unique(clusters):
                df_preds_subset = df_preds.loc[df_preds[str(dist)] == c]
                mode_label = df_preds_subset[exp_no].mode().iloc[0]
                mode_label_count = len(df_preds_subset.loc[df_preds_subset[exp_no] == mode_label])
                total_cluster_size = len(df_preds_subset)
                if total_cluster_size != 1.0:
                    print(total_cluster_size, mode_label_count / total_cluster_size)
                    consistency+=mode_label_count
                    total_in_clusters+=total_cluster_size
                    large_cluster_count += 1
            if total_in_clusters > 2.0:
                row = {
                    'experiment_no': experiment_no,
                    'max_distance': dist,
                    'consistency': consistency / total_in_clusters,
                    'num_clusters': large_cluster_count,
                    'epsilon': eps,
                }
                print(f'consistency for exp {experiment_no}, dist {dist}, eps {eps}, num_clusters {large_cluster_count}: {consistency / total_in_clusters}')
                df_fid.loc[len(df_fid)] = list(row.values())

df_fid.to_csv(f'fidelity/fid_hierarchical_{clipping_mode_dp}_{cifar_data}_{n_rows}_type={expl_type}_epochs={ep}_channel={channel}.csv')
df_fid.head(60)