# CNN Clustering Significance Stability over Multiple Trainings

### Before Running on Perceptron

Go to `src/train_nn.py` to line 46, and change the path `./models/` to `/scratch/<your username>/models`

Do not forget to undo this change!

DO NOT COMMIT THIS CHANGE TO GITHUB!!!

In [1]:
%load_ext autoreload
%autoreload 1

In [2]:
import sys
sys.path.append('..')

import random
import subprocess
from multiprocessing import Pool

import numpy as np
import matplotlib.pylab as plt
import pandas as pd
from tqdm.notebook import tqdm, trange
from IPython import display

from src.visualization import draw_mlp_clustering_report, run_double_spectral_cluster, run_spectral_cluster
from src.utils import get_weights_paths, build_clustering_results, cohen_d_stats
from src.experiment_tagging import get_model_path, MODEL_TAG_LOOKUP
# from src.spectral_cluster_model import SHUFFLE_METHODS
SHUFFLE_METHODS = ['layer', 'layer_nonzero']

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
N_TRAINED_MODELS = 5

# we are doing 30 experiments (=6 models x 5 times)
# we do 40 shuffles
N_SHUFFLES = 40

N_GPUS = 1

TAGS_COMMANDS = {
    'CNN:MNIST': 'python -m src.train_nn with cnn_config dataset_name=mnist',
    'CNN:FASHION': 'python -m src.train_nn with cnn_config dataset_name=fashion',
    'CNN:CIFAR10': 'python -m src.train_nn with cnn_config dataset_name=cifar10',
    'CNN:MNIST+DROPOUT': 'python -m src.train_nn with cnn_config dataset_name=mnist with_dropout=True',
    'CNN:FASHION+DROPOUT': 'python -m src.train_nn with cnn_config dataset_name=fashion with_dropout=True',
    'CNN:CIFAR10+DROPOUT': 'python -m src.train_nn with cnn_config dataset_name=cifar10 with_dropout=True'
}
DATASETS_TAGS = [command.split()[5][13:] for command in TAGS_COMMANDS.values()]

In [4]:
if False:
    def train_model_multiply(command, n_models_per_command, gpu_id=None):    
        for _ in range(n_models_per_command):
            actual_command = f'cd .. && {command}' #' > log.log 2>&1'
            print(actual_command)
            subprocess.run(actual_command, shell=True,
                           env={'CUDA_VISIBLE_DEVICES': str(gpu_id)})


    def train_models(commands, n_models_per_command, gpu_id=None):
        for command in commands:
            train_model_multiply(command, n_models_per_command, gpu_id)

    commands = list(TAGS_COMMANDS.values())
    random.shuffle(commands)

    assert len(commands) % N_GPUS == 0
    n_commands_per_gpu = len(commands) // N_GPUS

    with Pool(N_GPUS) as p:

        p.starmap(train_models,
                 [(commands[gpu_id*n_commands_per_gpu : (gpu_id+1)*n_commands_per_gpu], N_TRAINED_MODELS, gpu_id)
                 for gpu_id in range(N_GPUS)])

cd .. && python -m src.train_nn with cnn_config dataset_name=mnist with_dropout=True
cd .. && python -m src.train_nn with cnn_config dataset_name=mnist with_dropout=True
cd .. && python -m src.train_nn with cnn_config dataset_name=mnist with_dropout=True
cd .. && python -m src.train_nn with cnn_config dataset_name=mnist with_dropout=True
cd .. && python -m src.train_nn with cnn_config dataset_name=mnist with_dropout=True
cd .. && python -m src.train_nn with cnn_config dataset_name=cifar10
cd .. && python -m src.train_nn with cnn_config dataset_name=cifar10
cd .. && python -m src.train_nn with cnn_config dataset_name=cifar10
cd .. && python -m src.train_nn with cnn_config dataset_name=cifar10
cd .. && python -m src.train_nn with cnn_config dataset_name=cifar10
cd .. && python -m src.train_nn with cnn_config dataset_name=mnist
cd .. && python -m src.train_nn with cnn_config dataset_name=mnist
cd .. && python -m src.train_nn with cnn_config dataset_name=mnist
cd .. && python -m src.train_

In [6]:
results = {}

for shuffle_method in SHUFFLE_METHODS:
    results[shuffle_method] = {}
    
    for (model_tag, _), dataset_tag in zip(TAGS_COMMANDS.items(), tqdm(DATASETS_TAGS)):
        model_paths = get_model_path(model_tag, filter_='all')


        results[shuffle_method][model_tag] = [run_spectral_cluster(path / f'{dataset_tag}-cnn-pruned-weights.pckl',
                                               n_samples=N_SHUFFLES,
                                               shuffle_method=shuffle_method)
            for path in tqdm(model_paths[-N_TRAINED_MODELS:])]

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=6.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=6.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))





In [7]:
def build_all_models_per_shuffle_method_df(single_shuffle_method_results):
    model_dfs = []

    for model_tag, model_results in single_shuffle_method_results.items():
        _, metrics  = zip(*model_results)
        model_dfs.append(pd.DataFrame(metrics).assign(model=model_tag))

    df = pd.concat(model_dfs, ignore_index=True)

    df['is_sig'] = np.isclose(df['percentile'], 1 / (N_SHUFFLES + 1))
    
    return df

layer_df = build_all_models_per_shuffle_method_df(results['layer'])
layer_nonzero_df = build_all_models_per_shuffle_method_df(results['layer_nonzero'])

In [8]:
layer_df.head()

Unnamed: 0,ncut,ave_in_out,n_samples,mean,stdev,z_score,percentile,train_acc,train_loss,test_acc,test_loss,model,is_sig
0,0.486664,3.609615,40,0.330076,0.038982,4.016928,1.0,0.99995,0.000733,0.9901,0.039492,CNN:MNIST,False
1,0.315247,5.844222,40,0.331046,0.035524,-0.444726,0.195122,0.999967,0.000744,0.9899,0.039181,CNN:MNIST,False
2,0.501487,3.488143,40,0.336751,0.037949,4.340922,1.0,0.999867,0.00094,0.9877,0.044666,CNN:MNIST,False
3,0.319131,5.767016,40,0.338664,0.031092,-0.628243,0.146341,1.0,0.000629,0.9888,0.045396,CNN:MNIST,False
4,0.491144,3.572124,40,0.339505,0.023668,6.406966,1.0,0.99995,0.000703,0.989,0.042868,CNN:MNIST,False


In [9]:
layer_nonzero_df.head()

Unnamed: 0,ncut,ave_in_out,n_samples,mean,stdev,z_score,percentile,train_acc,train_loss,test_acc,test_loss,model,is_sig
0,0.486664,3.609615,40,0.4941,0.012158,-0.611626,0.243902,0.99995,0.000733,0.9901,0.039492,CNN:MNIST,False
1,0.315247,5.844222,40,0.334218,0.079026,-0.240056,0.04878,0.999967,0.000744,0.9899,0.039181,CNN:MNIST,False
2,0.501487,3.488143,40,0.497428,0.070506,0.057558,0.560976,0.999867,0.00094,0.9877,0.044666,CNN:MNIST,False
3,0.319131,5.767016,40,0.318392,0.021595,0.034227,0.95122,1.0,0.000629,0.9888,0.045396,CNN:MNIST,False
4,0.491144,3.572124,40,0.493979,0.01591,-0.178166,0.414634,0.99995,0.000703,0.989,0.042868,CNN:MNIST,False


In [10]:
MODEL_DATA_COLUMNS = ['model',
                      'train_acc', 'train_loss', 'test_acc', 'test_loss',
                      'ncut', 'ave_in_out',
                      'n_samples']

assert layer_df[MODEL_DATA_COLUMNS].equals(layer_nonzero_df[MODEL_DATA_COLUMNS])

SHUFFLING_DATA_COLUMNS = ['mean', 'percentile', 'stdev', 'z_score', 'is_sig']

two_shuffling_method_df = pd.merge(layer_df[SHUFFLING_DATA_COLUMNS],
                                     layer_nonzero_df[SHUFFLING_DATA_COLUMNS],
                                     left_index=True, right_index=True,
                                     suffixes=('_layer', '_layer_nonzero'))

assert layer_df[MODEL_DATA_COLUMNS].equals(layer_nonzero_df[MODEL_DATA_COLUMNS])

df = pd.merge(layer_df[MODEL_DATA_COLUMNS], two_shuffling_method_df,
              left_index=True, right_index=True,)

df['cohen_d'] = df.apply(lambda r: cohen_d_stats(r['mean_layer'], r['stdev_layer'], r['n_samples'],
                                 r['mean_layer_nonzero'], r['stdev_layer_nonzero'], r['n_samples']),
                                axis=1)

df

Unnamed: 0,model,train_acc,train_loss,test_acc,test_loss,ncut,ave_in_out,n_samples,mean_layer,percentile_layer,stdev_layer,z_score_layer,is_sig_layer,mean_layer_nonzero,percentile_layer_nonzero,stdev_layer_nonzero,z_score_layer_nonzero,is_sig_layer_nonzero,cohen_d
0,CNN:MNIST,0.99995,0.000733,0.9901,0.039492,0.486664,3.609615,40,0.330076,1.0,0.038982,4.016928,False,0.4941,0.243902,0.012158,-0.611626,False,0.332024
1,CNN:MNIST,0.999967,0.000744,0.9899,0.039181,0.315247,5.844222,40,0.331046,0.195122,0.035524,-0.444726,False,0.334218,0.04878,0.079026,-0.240056,False,0.159385
2,CNN:MNIST,0.999867,0.00094,0.9877,0.044666,0.501487,3.488143,40,0.336751,1.0,0.037949,4.340922,False,0.497428,0.560976,0.070506,0.057558,False,0.166434
3,CNN:MNIST,1.0,0.000629,0.9888,0.045396,0.319131,5.767016,40,0.338664,0.146341,0.031092,-0.628243,False,0.318392,0.95122,0.021595,0.034227,False,0.305234
4,CNN:MNIST,0.99995,0.000703,0.989,0.042868,0.491144,3.572124,40,0.339505,1.0,0.023668,6.406966,False,0.493979,0.414634,0.01591,-0.178166,False,0.328154
5,CNN:FASHION,0.99085,0.032584,0.9193,0.335111,0.490673,3.576031,40,0.30199,1.0,0.017349,10.875814,False,0.490627,0.585366,0.002221,0.02069,False,0.483302
6,CNN:CIFAR10,0.80548,0.570816,0.6183,1.227513,0.45865,3.860626,40,0.333917,0.97561,0.054087,2.30617,False,0.472424,0.439024,0.047159,-0.29209,False,0.187902
7,CNN:CIFAR10,0.79386,0.60253,0.618,1.175385,0.523723,3.318811,40,0.440204,0.707317,0.108956,0.766536,False,0.519404,0.268293,0.031938,0.135244,False,0.259662
8,CNN:CIFAR10,0.81548,0.544781,0.6217,1.219707,0.470916,3.747046,40,0.43713,0.585366,0.154304,0.218953,False,0.410612,0.97561,0.017777,3.39214,False,0.293978
9,CNN:CIFAR10,0.82166,0.528239,0.6225,1.209161,0.504985,3.460512,40,0.306165,1.0,0.023737,8.376005,False,0.48603,0.634146,0.030281,0.625974,False,0.228896


In [11]:
# df = pd.read_csv('../results/cnn-clustering-stability-two-shuffling-methods-all-samples.csv' index_col=0)

In [12]:
df_grpby_model = df.groupby('model')

TWO_SHUFFLE_STATS_COLUMNS = [f'{stat}_{shuffle_method}'
                             for stat in ['mean', 'stdev', 'z_score', 'percentile', 'is_sig']
                             for shuffle_method in SHUFFLE_METHODS]

statistics_df = pd.concat([(df_grpby_model[['ncut'] + TWO_SHUFFLE_STATS_COLUMNS]
                             .agg(['mean', 'std'])),
                            df_grpby_model.size().rename('n_models'),
                            (df_grpby_model[['train_acc', 'test_acc']]
                             .agg('mean'))],
                           axis=1)

In [13]:
statistics_df

Unnamed: 0_level_0,"(ncut, mean)","(ncut, std)","(mean_layer, mean)","(mean_layer, std)","(mean_layer_nonzero, mean)","(mean_layer_nonzero, std)","(stdev_layer, mean)","(stdev_layer, std)","(stdev_layer_nonzero, mean)","(stdev_layer_nonzero, std)",...,"(percentile_layer, std)","(percentile_layer_nonzero, mean)","(percentile_layer_nonzero, std)","(is_sig_layer, mean)","(is_sig_layer, std)","(is_sig_layer_nonzero, mean)","(is_sig_layer_nonzero, std)",n_models,train_acc,test_acc
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CNN:CIFAR10,0.635806,0.328032,0.396956,0.071866,0.628368,0.351602,0.077264,0.05331,0.073886,0.094708,...,0.194358,0.556098,0.26796,0.0,0.0,0.0,0.0,5,0.80566,0.62048
CNN:CIFAR10+DROPOUT,0.311777,0.093464,0.273218,0.041664,0.325572,0.09535,0.025436,0.022269,0.020757,0.019053,...,0.406301,0.458537,0.44821,0.0,0.0,0.0,0.0,5,0.507384,0.58398
CNN:FASHION,0.490673,,0.30199,,0.490627,,0.017349,,0.002221,,...,,0.585366,,0.0,,0.0,,1,0.99085,0.9193
CNN:FASHION+DROPOUT,0.292595,0.004229,0.302949,0.003882,0.354858,0.099046,0.022093,0.00323,0.075312,0.067135,...,0.027809,0.107317,0.145934,0.0,0.0,0.4,0.547723,5,0.85993,0.9015
CNN:MNIST,0.422735,0.096509,0.335208,0.004372,0.427623,0.09267,0.033443,0.006253,0.039839,0.032202,...,0.454536,0.443902,0.342073,0.0,0.0,0.0,0.0,5,0.999947,0.9891
CNN:MNIST+DROPOUT,0.292566,0.032941,0.312917,0.004185,0.300924,0.020703,0.032124,0.008799,0.01615,0.019671,...,0.060731,0.263415,0.163978,0.2,0.447214,0.0,0.0,5,0.967953,0.98834


In [14]:
# TODO: ask for a code review
# std = np.sqrt(np.mean(x**2) - np.mean(x)**2)

def calc_overall_stdev(row, suffix=''):
    return np.sqrt(                 # 5. apply root square to get the SD from the variance
        (np.mean(                   # 3. divide by the number of trained model
            (row[f'stdev{suffix}']**2)       # 1. cancel the overall square root
             + row[f'mean{suffix}']**2))     # 2. add the mean and get the sum or squares
        - np.mean(row[f'mean{suffix}'])**2)  # 4. substrct the square of the overall mean

In [15]:
SHUFFLE_METHOD_SUFFIX = ['_layer', '_layer_nonzero']
for suffix in SHUFFLE_METHOD_SUFFIX:
    statistics_df[f'overall_std{suffix}'] = df_grpby_model.apply(calc_overall_stdev, suffix)

for suffix in SHUFFLE_METHOD_SUFFIX:
    statistics_df[f'is_stable{suffix}'] = (statistics_df[(f'is_sig{suffix}', 'mean')] == 1)

statistics_df = statistics_df.loc[TAGS_COMMANDS.keys(), :]

In [16]:
statistics_df

Unnamed: 0_level_0,"(ncut, mean)","(ncut, std)","(mean_layer, mean)","(mean_layer, std)","(mean_layer_nonzero, mean)","(mean_layer_nonzero, std)","(stdev_layer, mean)","(stdev_layer, std)","(stdev_layer_nonzero, mean)","(stdev_layer_nonzero, std)",...,"(is_sig_layer, std)","(is_sig_layer_nonzero, mean)","(is_sig_layer_nonzero, std)",n_models,train_acc,test_acc,overall_std_layer,overall_std_layer_nonzero,is_stable_layer,is_stable_layer_nonzero
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CNN:MNIST,0.422735,0.096509,0.335208,0.004372,0.427623,0.09267,0.033443,0.006253,0.039839,0.032202,...,0.0,0.0,0.0,5,0.999947,0.9891,0.034132,0.096368,False,False
CNN:FASHION,0.490673,,0.30199,,0.490627,,0.017349,,0.002221,,...,,0.0,,1,0.99085,0.9193,0.017349,0.002221,False,False
CNN:CIFAR10,0.635806,0.328032,0.396956,0.071866,0.628368,0.351602,0.077264,0.05331,0.073886,0.094708,...,0.0,0.0,0.0,5,0.80566,0.62048,0.111243,0.333967,False,False
CNN:MNIST+DROPOUT,0.292566,0.032941,0.312917,0.004185,0.300924,0.020703,0.032124,0.008799,0.01615,0.019671,...,0.447214,0.0,0.0,5,0.967953,0.98834,0.033285,0.030221,False,False
CNN:FASHION+DROPOUT,0.292595,0.004229,0.302949,0.003882,0.354858,0.099046,0.022093,0.00323,0.075312,0.067135,...,0.0,0.4,0.547723,5,0.85993,0.9015,0.02255,0.130865,False,False
CNN:CIFAR10+DROPOUT,0.311777,0.093464,0.273218,0.041664,0.325572,0.09535,0.025436,0.022269,0.020757,0.019053,...,0.0,0.0,0.0,5,0.507384,0.58398,0.04932,0.089413,False,False


In [17]:
df.to_csv('../results/cnn-clustering-stability-two-shuffling-methods-all-samples.csv')
statistics_df.to_csv('../results/cnn-clustering-stability-two-shuffling-methods-statistics.csv')

### Model Stability Statistics

In [18]:
statistics_df

Unnamed: 0_level_0,"(ncut, mean)","(ncut, std)","(mean_layer, mean)","(mean_layer, std)","(mean_layer_nonzero, mean)","(mean_layer_nonzero, std)","(stdev_layer, mean)","(stdev_layer, std)","(stdev_layer_nonzero, mean)","(stdev_layer_nonzero, std)",...,"(is_sig_layer, std)","(is_sig_layer_nonzero, mean)","(is_sig_layer_nonzero, std)",n_models,train_acc,test_acc,overall_std_layer,overall_std_layer_nonzero,is_stable_layer,is_stable_layer_nonzero
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CNN:MNIST,0.422735,0.096509,0.335208,0.004372,0.427623,0.09267,0.033443,0.006253,0.039839,0.032202,...,0.0,0.0,0.0,5,0.999947,0.9891,0.034132,0.096368,False,False
CNN:FASHION,0.490673,,0.30199,,0.490627,,0.017349,,0.002221,,...,,0.0,,1,0.99085,0.9193,0.017349,0.002221,False,False
CNN:CIFAR10,0.635806,0.328032,0.396956,0.071866,0.628368,0.351602,0.077264,0.05331,0.073886,0.094708,...,0.0,0.0,0.0,5,0.80566,0.62048,0.111243,0.333967,False,False
CNN:MNIST+DROPOUT,0.292566,0.032941,0.312917,0.004185,0.300924,0.020703,0.032124,0.008799,0.01615,0.019671,...,0.447214,0.0,0.0,5,0.967953,0.98834,0.033285,0.030221,False,False
CNN:FASHION+DROPOUT,0.292595,0.004229,0.302949,0.003882,0.354858,0.099046,0.022093,0.00323,0.075312,0.067135,...,0.0,0.4,0.547723,5,0.85993,0.9015,0.02255,0.130865,False,False
CNN:CIFAR10+DROPOUT,0.311777,0.093464,0.273218,0.041664,0.325572,0.09535,0.025436,0.022269,0.020757,0.019053,...,0.0,0.0,0.0,5,0.507384,0.58398,0.04932,0.089413,False,False


### Appendix - All Samples

In [19]:
df

Unnamed: 0,model,train_acc,train_loss,test_acc,test_loss,ncut,ave_in_out,n_samples,mean_layer,percentile_layer,stdev_layer,z_score_layer,is_sig_layer,mean_layer_nonzero,percentile_layer_nonzero,stdev_layer_nonzero,z_score_layer_nonzero,is_sig_layer_nonzero,cohen_d
0,CNN:MNIST,0.99995,0.000733,0.9901,0.039492,0.486664,3.609615,40,0.330076,1.0,0.038982,4.016928,False,0.4941,0.243902,0.012158,-0.611626,False,0.332024
1,CNN:MNIST,0.999967,0.000744,0.9899,0.039181,0.315247,5.844222,40,0.331046,0.195122,0.035524,-0.444726,False,0.334218,0.04878,0.079026,-0.240056,False,0.159385
2,CNN:MNIST,0.999867,0.00094,0.9877,0.044666,0.501487,3.488143,40,0.336751,1.0,0.037949,4.340922,False,0.497428,0.560976,0.070506,0.057558,False,0.166434
3,CNN:MNIST,1.0,0.000629,0.9888,0.045396,0.319131,5.767016,40,0.338664,0.146341,0.031092,-0.628243,False,0.318392,0.95122,0.021595,0.034227,False,0.305234
4,CNN:MNIST,0.99995,0.000703,0.989,0.042868,0.491144,3.572124,40,0.339505,1.0,0.023668,6.406966,False,0.493979,0.414634,0.01591,-0.178166,False,0.328154
5,CNN:FASHION,0.99085,0.032584,0.9193,0.335111,0.490673,3.576031,40,0.30199,1.0,0.017349,10.875814,False,0.490627,0.585366,0.002221,0.02069,False,0.483302
6,CNN:CIFAR10,0.80548,0.570816,0.6183,1.227513,0.45865,3.860626,40,0.333917,0.97561,0.054087,2.30617,False,0.472424,0.439024,0.047159,-0.29209,False,0.187902
7,CNN:CIFAR10,0.79386,0.60253,0.618,1.175385,0.523723,3.318811,40,0.440204,0.707317,0.108956,0.766536,False,0.519404,0.268293,0.031938,0.135244,False,0.259662
8,CNN:CIFAR10,0.81548,0.544781,0.6217,1.219707,0.470916,3.747046,40,0.43713,0.585366,0.154304,0.218953,False,0.410612,0.97561,0.017777,3.39214,False,0.293978
9,CNN:CIFAR10,0.82166,0.528239,0.6225,1.209161,0.504985,3.460512,40,0.306165,1.0,0.023737,8.376005,False,0.48603,0.634146,0.030281,0.625974,False,0.228896


### Unpruned results without shuffling
#### TODO: Refactor
- [ ] Combine with the unpruned table

In [None]:
unpruned_results = {}

for (model_tag, _), dataset_tag in zip(TAGS_COMMANDS.items(), tqdm(DATASETS_TAGS)):
    model_paths = get_model_path(model_tag, filter_='all')
    unpruned_results[model_tag] = [run_spectral_cluster(path / f'{dataset_tag}-mlp-unpruned-weights.pckl',
                                               with_shuffle=False)
            for path in tqdm(model_paths[-N_TRAINED_MODELS:])]
       
unpruned_model_dfs = []

for model_tag, model_results in unpruned_results.items():
    _, metrics  = zip(*model_results)
    unpruned_model_dfs.append(pd.DataFrame(metrics).assign(model=model_tag))

unpruned_df = pd.concat(unpruned_model_dfs, ignore_index=True)

unpruned_df_grpby_model = unpruned_df.groupby('model')


unpruned_statistics_df = pd.concat([(unpruned_df_grpby_model[['ncut']]
                             .agg(['mean', 'std'])),
                            unpruned_df_grpby_model.size().rename('n_models'),
                            (unpruned_df_grpby_model[['train_acc', 'test_acc']]
                             .agg('mean'))],
                           axis=1)

unpruned_statistics_df = unpruned_statistics_df.loc[TAGS_COMMANDS.keys(), :]

unpruned_statistics_df

In [None]:
unpruned_df.to_csv('../results/cnn-clustering-stability-all-samples-unpruned.csv')
unpruned_statistics_df.to_csv('../results/cnn-clustering-stability-statistics-unpruned.csv')