# MLP Clustering Significance Stability over Multiple Trainings

### Before Running on Perceptron

Go to `src/train_nn.py` to line 46, and change the path `./models/` to `/scratch/<your username>/models`

Do not forget to undo this change!

DO NOT COMMIT THIS CHANGE TO GITHUB!!!

In [1]:
%load_ext autoreload
%autoreload 1

In [2]:
import sys
sys.path.append('..')

import random
import subprocess
from multiprocessing import Pool

import numpy as np
import matplotlib.pylab as plt
import pandas as pd
from tqdm import tqdm, trange
from IPython import display

from src.visualization import draw_mlp_clustering_report, run_double_spectral_cluster, run_spectral_cluster
from src.utils import get_weights_paths, build_clustering_results, cohen_d_stats
from src.experiment_tagging import get_model_path, MODEL_TAG_LOOKUP
# from src.spectral_cluster_model import SHUFFLE_METHODS
SHUFFLE_METHODS = ['layer']#, 'layer_nonzero']

In [3]:
MODEL_BASE_PATH = '/scratch/shlomi/models/'

N_TRAINED_MODELS = 5

# we ar doing 320 experiments (=32 models x 10 times),
# and let's have expectation of false-positive <= 1
# so we can set n_shuffles to 320, and then p-values < 1/401
# if we make it too large, let see, 1000 shuffles...
# ... we might reduce the statistical power (given H1, the probability to reject it incorrectly)!
N_SHUFFLES = 50

N_CLUSTERS = 12

N_GPUS = 4

TAGS_COMMANDS = {
#    'MNIST': 'python -m src.train_nn with mlp_config dataset_name=mnist',
#    'CIFAR10': 'python -m src.train_nn with mlp_config dataset_name=cifar10 pruning_epochs=40',
     'LINE': 'python -m src.train_nn with mlp_config dataset_name=line',
#    'FASHION': 'python -m src.train_nn with mlp_config dataset_name=fashion',
#    'MNIST+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=mnist with_dropout=True',
#    'CIFAR10+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=cifar10 epochs=100 pruning_epochs=40 with_dropout=True dropout_rate=0.2',
     'LINE+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=line with_dropout=True',
#    'FASHION+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=fashion with_dropout=True',
     'LINE-MNIST': 'python -m src.train_nn with mlp_config dataset_name=line-mnist',
#    'LINE-CIFAR10': 'python -m src.train_nn with mlp_config dataset_name=line-cifar10 epochs=30 pruning_epochs=40',
#    'MNIST-CIFAR10': 'python -m src.train_nn with mlp_config dataset_name=mnist-cifar10 epochs=30 pruning_epochs=40',
     'LINE-MNIST-SEPARATED': 'python -m src.train_nn with mlp_config dataset_name=line-mnist-separated',
#    'LINE-CIFAR10-SEPARATED': 'python -m src.train_nn with mlp_config dataset_name=line-cifar10-separated epochs=30 pruning_epochs=40',
#    'MNIST-CIFAR10-SEPARATED': 'python -m src.train_nn with mlp_config dataset_name=mnist-cifar10-separated epochs=30 pruning_epochs=40',
     'LINE-MNIST+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=line-mnist with_dropout=True',
#    'LINE-CIFAR10+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=line-cifar10 epochs=30 pruning_epochs=40 with_dropout=True dropout_rate=0.2',
#    'MNIST-CIFAR10+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=mnist-cifar10 epochs=30 pruning_epochs=40 with_dropout=True dropout_rate=0.2',
     'LINE-MNIST-SEPARATED+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=line-mnist-separated with_dropout=True',
#    'LINE-CIFAR10-SEPARATED+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=line-cifar10-separated epochs=30 pruning_epochs=40 with_dropout=True dropout_rate=0.2',
#    'MNIST-CIFAR10-SEPARATED+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=mnist-cifar10-separated epochs=30 pruning_epochs=40 with_dropout=True dropout_rate=0.2',
#    'RANDOM': 'python -m src.train_nn with mlp_config dataset_name=random',
#    'RANDOM+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=random with_dropout=True',
#    'MNIST-x1.5-EPOCHS': 'python -m src.train_nn with mlp_config dataset_name=mnist epochs=30',
#    'MNIST-x1.5-EPOCHS+DROPOUT':'python -m src.train_nn with mlp_config dataset_name=mnist epochs=30 with_dropout=True',
#    'MNIST-x2-EPOCHS':'python -m src.train_nn with mlp_config dataset_name=mnist epochs=40',
#    'MNIST-x2-EPOCHS+DROPOUT':'python -m src.train_nn with mlp_config dataset_name=mnist epochs=40 with_dropout=True',
#    'MNIST-x10-EPOCHS': 'python -m src.train_nn with mlp_config dataset_name=mnist epochs=200',
#    'MNIST-x10-EPOCHS+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=mnist epochs=200 with_dropout=True',
#    'RANDOM-x50-EPOCHS': 'python -m src.train_nn with mlp_config dataset_name=random epochs=1000',
#    'RANDOM-x50-EPOCHS+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=random epochs=1000 with_dropout=True',
#    'RANDOM-OVERFITTING': 'python -m src.train_nn with mlp_config dataset_name=random epochs=100 pruning_epochs=100 shuffle=False n_train=3000',
#    'RANDOM-OVERFITTING+DROPOUT': 'python -m src.train_nn with mlp_config dataset_name=random epochs=100 pruning_epochs=100 shuffle=False n_train=3000 with_dropout=True'
}
DATASETS_TAGS = [command.split()[5][13:] for command in TAGS_COMMANDS.values()]

In [4]:
if False:
    def train_model_multiply(command, n_models_per_command, gpu_id=None):    
        for _ in range(n_models_per_command):
            actual_command = f'cd .. && {command}'#' > /dev/null 2>&1'
            print(actual_command)
            subprocess.run(actual_command, shell=True,
                           env={'CUDA_VISIBLE_DEVICES': str(gpu_id)})


    def train_models(commands, n_models_per_command, gpu_id=None):
        for command in commands:
            train_model_multiply(command, n_models_per_command, gpu_id)

    commands = list(TAGS_COMMANDS.values())
    random.shuffle(commands)

    assert len(commands) % N_GPUS == 0
    n_commands_per_gpu = len(commands) // N_GPUS

    with Pool(N_GPUS) as p:

        p.starmap(train_models,
                 [(commands[gpu_id*n_commands_per_gpu : (gpu_id+1)*n_commands_per_gpu], N_TRAINED_MODELS, gpu_id)
                 for gpu_id in range(N_GPUS)])

In [5]:
results = {}

for shuffle_method in SHUFFLE_METHODS:
    results[shuffle_method] = {}
    
    for (model_tag, _), dataset_tag in zip(TAGS_COMMANDS.items(), tqdm(DATASETS_TAGS)):
        model_paths = get_model_path(model_tag, filter_='all', model_base_path=MODEL_BASE_PATH)


        results[shuffle_method][model_tag] = [run_spectral_cluster(path / f'{dataset_tag}-mlp-pruned-weights.pckl',
                                               n_samples=N_SHUFFLES,
                                                n_clusters=N_CLUSTERS,
                                               shuffle_method=shuffle_method)
            for path in tqdm(model_paths[-N_TRAINED_MODELS:])]

  0%|          | 0/6 [00:00<?, ?it/s]




  0%|          | 0/5 [00:00<?, ?it/s]

[A




 20%|██        | 1/5 [08:46<35:07, 526.83s/it]

[A




 40%|████      | 2/5 [16:47<25:38, 512.88s/it]

[A




 60%|██████    | 3/5 [25:56<17:27, 523.87s/it]

[A




 80%|████████  | 4/5 [36:02<09:08, 548.41s/it]

[A




100%|██████████| 5/5 [42:15<00:00, 495.69s/it]

[A

100%|██████████| 5/5 [42:15<00:00, 507.01s/it]


 17%|█▋        | 1/6 [42:15<3:31:15, 2535.05s/it]




  0%|          | 0/5 [00:00<?, ?it/s]

[A




 20%|██        | 1/5 [09:31<38:07, 571.81s/it]

[A




 40%|████      | 2/5 [20:18<29:42, 594.19s/it]

[A




 60%|██████    | 3/5 [30:14<19:49, 594.74s/it]

[A




 80%|████████  | 4/5 [38:33<09:26, 566.19s/it]

[A




100%|██████████| 5/5 [47:59<00:00, 566.04s/it]

[A

100%|██████████| 5/5 [47:59<00:00, 575.91s/it]


 33%|███▎      | 2/6 [1:30:14<2:55:53, 2638.40s/it]




  0%|          | 0/5 [00:00<?, ?it/s]

[A




 20%|██        | 1/5 [09:02<36:10, 542.62s/it]

[A




 40%|████      | 2/5 [20:05<28:56, 578.82s/it]

[A




 60%|██████    | 3/5 [29:44<19:17, 578.79s/it]

[A




 80%|████████  | 4/5 [38:53<09:29, 569.72s/it]

[A




100%|██████████| 5/5 [47:24<00:00, 552.22s/it]

[A

100%|██████████| 5/5 [47:24<00:00, 568.92s/it]


 50%|█████     | 3/6 [2:17:39<2:15:00, 2700.26s/it]




  0%|          | 0/5 [00:00<?, ?it/s]

[A




 20%|██        | 1/5 [07:16<29:04, 436.19s/it]

[A




 40%|████      | 2/5 [15:50<22:58, 459.49s/it]

[A




 60%|██████    | 3/5 [21:33<14:09, 424.64s/it]

[A




 80%|████████  | 4/5 [28:17<06:58, 418.46s/it]

[A




100%|██████████| 5/5 [36:50<00:00, 446.96s/it]

[A

100%|██████████| 5/5 [36:50<00:00, 442.18s/it]


 67%|██████▋   | 4/6 [2:54:30<1:25:06, 2553.47s/it]




  0%|          | 0/5 [00:00<?, ?it/s]

[A




 20%|██        | 1/5 [08:45<35:02, 525.61s/it]

[A




 40%|████      | 2/5 [15:58<24:53, 497.75s/it]

[A




 60%|██████    | 3/5 [25:34<17:22, 521.28s/it]

[A




 80%|████████  | 4/5 [35:49<09:09, 549.47s/it]

[A




100%|██████████| 5/5 [45:31<00:00, 559.24s/it]

[A

100%|██████████| 5/5 [45:31<00:00, 546.37s/it]


 83%|████████▎ | 5/6 [3:40:02<43:26, 2606.99s/it]  




  0%|          | 0/5 [00:00<?, ?it/s]

[A




 20%|██        | 1/5 [07:33<30:13, 453.35s/it]

[A




 40%|████      | 2/5 [14:46<22:22, 447.35s/it]

[A




 60%|██████    | 3/5 [21:54<14:42, 441.42s/it]

[A




 80%|████████  | 4/5 [29:11<07:20, 440.22s/it]

[A




100%|██████████| 5/5 [36:11<00:00, 434.03s/it]

[A

100%|██████████| 5/5 [36:11<00:00, 434.27s/it]


 83%|████████▎ | 5/6 [4:16:13<51:14, 3074.68s/it]




In [6]:
def build_all_models_per_shuffle_method_df(single_shuffle_method_results):
    model_dfs = []

    for model_tag, model_results in single_shuffle_method_results.items():
        _, metrics  = zip(*model_results)
        model_dfs.append(pd.DataFrame(metrics).assign(model=model_tag))

    df = pd.concat(model_dfs, ignore_index=True)

    df['is_sig'] = np.isclose(df['percentile'], 1 / (N_SHUFFLES + 1))
    
    return df

layer_df = build_all_models_per_shuffle_method_df(results['layer'])
# layer_nonzero_df = build_all_models_per_shuffle_method_df(results['layer_nonzero'])

In [7]:
layer_df.head()

Unnamed: 0,ave_in_out,mean,n_samples,ncut,percentile,stdev,test_acc,test_loss,train_acc,train_loss,z_score,model,is_sig
0,0.327202,7.644785,50,7.253364,0.019608,0.108187,1.0,2.6e-05,1.0,1.1e-05,-3.617994,LINE,True
1,0.313899,7.783066,50,7.37192,0.019608,0.10699,1.0,3.6e-05,1.0,2.2e-05,-3.84283,LINE,True
2,0.315323,7.667933,50,7.359045,0.019608,0.12237,1.0,3.9e-05,1.0,2.7e-05,-2.524223,LINE,True
3,0.298867,7.708244,50,7.510642,0.019608,0.096152,1.0,4.8e-05,1.0,2.2e-05,-2.0551,LINE,True
4,0.320602,7.91062,50,7.311702,0.019608,0.082062,1.0,0.000181,1.0,0.000171,-7.298357,LINE,True


In [8]:
# layer_nonzero_df.head()

In [9]:
"""
MODEL_DATA_COLUMNS = ['model',
                      'train_acc', 'train_loss', 'test_acc', 'test_loss',
                      'ncut', 'ave_in_out',
                      'n_samples']

assert layer_df[MODEL_DATA_COLUMNS].equals(layer_nonzero_df[MODEL_DATA_COLUMNS])

SHUFFLING_DATA_COLUMNS = ['mean', 'percentile', 'stdev', 'z_score', 'is_sig']

two_shuffling_method_df = pd.merge(layer_df[SHUFFLING_DATA_COLUMNS],
                                     layer_nonzero_df[SHUFFLING_DATA_COLUMNS],
                                     left_index=True, right_index=True,
                                     suffixes=('_layer', '_layer_nonzero'))

assert layer_df[MODEL_DATA_COLUMNS].equals(layer_nonzero_df[MODEL_DATA_COLUMNS])

df = pd.merge(layer_df[MODEL_DATA_COLUMNS], two_shuffling_method_df,
              left_index=True, right_index=True,)
              
df['cohen_d'] = df.apply(lambda r: cohen_d_stats(r['mean_layer'], r['stdev_layer'], r['n_samples'],
                                 r['mean_layer_nonzero'], r['stdev_layer_nonzero'], r['n_samples']),
                                axis=1)
"""

"\nMODEL_DATA_COLUMNS = ['model',\n                      'train_acc', 'train_loss', 'test_acc', 'test_loss',\n                      'ncut', 'ave_in_out',\n                      'n_samples']\n\nassert layer_df[MODEL_DATA_COLUMNS].equals(layer_nonzero_df[MODEL_DATA_COLUMNS])\n\nSHUFFLING_DATA_COLUMNS = ['mean', 'percentile', 'stdev', 'z_score', 'is_sig']\n\ntwo_shuffling_method_df = pd.merge(layer_df[SHUFFLING_DATA_COLUMNS],\n                                     layer_nonzero_df[SHUFFLING_DATA_COLUMNS],\n                                     left_index=True, right_index=True,\n                                     suffixes=('_layer', '_layer_nonzero'))\n\nassert layer_df[MODEL_DATA_COLUMNS].equals(layer_nonzero_df[MODEL_DATA_COLUMNS])\n\ndf = pd.merge(layer_df[MODEL_DATA_COLUMNS], two_shuffling_method_df,\n              left_index=True, right_index=True,)\n              \ndf['cohen_d'] = df.apply(lambda r: cohen_d_stats(r['mean_layer'], r['stdev_layer'], r['n_samples'],\n                  

In [10]:
df = layer_df


df

Unnamed: 0,ave_in_out,mean,n_samples,ncut,percentile,stdev,test_acc,test_loss,train_acc,train_loss,z_score,model,is_sig
0,0.327202,7.644785,50,7.253364,0.019608,0.108187,1.0,2.6e-05,1.0,1.1e-05,-3.617994,LINE,True
1,0.313899,7.783066,50,7.37192,0.019608,0.10699,1.0,3.6e-05,1.0,2.2e-05,-3.84283,LINE,True
2,0.315323,7.667933,50,7.359045,0.019608,0.12237,1.0,3.9e-05,1.0,2.7e-05,-2.524223,LINE,True
3,0.298867,7.708244,50,7.510642,0.019608,0.096152,1.0,4.8e-05,1.0,2.2e-05,-2.0551,LINE,True
4,0.320602,7.91062,50,7.311702,0.019608,0.082062,1.0,0.000181,1.0,0.000171,-7.298357,LINE,True
5,0.397428,8.892256,50,6.685774,0.019608,0.044247,0.2952,2.996638,0.903183,0.253735,-49.867484,LINE+DROPOUT,True
6,0.358717,8.891767,50,6.98717,0.019608,0.038094,0.2939,5.397707,0.920683,0.206457,-49.997865,LINE+DROPOUT,True
7,0.372457,8.885253,50,6.877133,0.019608,0.032116,0.3032,5.312357,0.924333,0.204587,-62.527534,LINE+DROPOUT,True
8,0.354684,8.88573,50,7.020136,0.019608,0.032859,0.2939,4.98537,0.91095,0.230818,-56.774956,LINE+DROPOUT,True
9,0.397984,8.876844,50,6.681631,0.019608,0.038527,0.2937,4.942878,0.902517,0.250102,-56.978968,LINE+DROPOUT,True


In [11]:
# df = pd.read_csv('../results/mlp-clustering-stability-two-shuffling-methods-all-samples.csv' index_col=0)

In [12]:
df_grpby_model = df.groupby('model')

# TWO_SHUFFLE_STATS_COLUMNS = [f'{stat}_{shuffle_method}'
#                              for stat in ['mean', 'stdev', 'z_score', 'percentile', 'is_sig']
#                             for shuffle_method in SHUFFLE_METHODS]

TWO_SHUFFLE_STATS_COLUMNS = ['mean', 'stdev', 'z_score', 'percentile', 'is_sig']

statistics_df = pd.concat([(df_grpby_model[['ncut'] + TWO_SHUFFLE_STATS_COLUMNS]
                             .agg(['mean', 'std'])),
                            df_grpby_model.size().rename('n_models'),
                            (df_grpby_model[['train_acc', 'test_acc']]
                             .agg('mean'))],
                           axis=1)

In [13]:
statistics_df

Unnamed: 0_level_0,"(ncut, mean)","(ncut, std)","(mean, mean)","(mean, std)","(stdev, mean)","(stdev, std)","(z_score, mean)","(z_score, std)","(percentile, mean)","(percentile, std)","(is_sig, mean)","(is_sig, std)",n_models,train_acc,test_acc
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
LINE,7.361335,0.095565,7.74293,0.10746,0.103152,0.015027,-3.867701,2.056868,0.019608,0.0,1.0,0.0,5,1.0,1.0
LINE+DROPOUT,6.850369,0.161102,8.88637,0.006248,0.037168,0.004922,-55.229361,5.357975,0.019608,0.0,1.0,0.0,5,0.912333,0.29598
LINE-MNIST,7.954501,0.095978,8.99305,0.16632,0.052454,0.018418,-22.169842,9.220927,0.019608,0.0,1.0,0.0,5,0.999983,0.99078
LINE-MNIST+DROPOUT,6.932611,0.093737,8.700609,0.046496,0.050636,0.009425,-35.881974,6.811205,0.019608,0.0,1.0,0.0,5,0.87285,0.63693
LINE-MNIST-SEPARATED,8.261181,0.19495,8.497317,0.148798,0.064825,0.017637,-3.823383,2.171242,0.027451,0.01074,0.6,0.547723,5,1.0,0.993773
LINE-MNIST-SEPARATED+DROPOUT,7.779054,0.071594,9.036508,0.018402,0.034218,0.00661,-37.907284,7.861859,0.019608,0.0,1.0,0.0,5,0.983902,0.893529


In [14]:
# TODO: ask for a code review
# std = np.sqrt(np.mean(x**2) - np.mean(x)**2)

def calc_overall_stdev(row, suffix=''):
    return np.sqrt(                 # 5. apply root square to get the SD from the variance
        (np.mean(                   # 3. divide by the number of trained model
            (row[f'stdev{suffix}']**2)       # 1. cancel the overall square root
             + row[f'mean{suffix}']**2))     # 2. add the mean and get the sum or squares
        - np.mean(row[f'mean{suffix}'])**2)  # 4. substrct the square of the overall mean

In [15]:
SHUFFLE_METHOD_SUFFIX = [''] # ['_layer', '_layer_nonzero']
for suffix in SHUFFLE_METHOD_SUFFIX:
    statistics_df[f'overall_std{suffix}'] = df_grpby_model.apply(calc_overall_stdev, suffix)

for suffix in SHUFFLE_METHOD_SUFFIX:
    statistics_df[f'is_stable{suffix}'] = (statistics_df[(f'is_sig{suffix}', 'mean')] == 1)

statistics_df = statistics_df.loc[TAGS_COMMANDS.keys(), :]

In [16]:
statistics_df

Unnamed: 0_level_0,"(ncut, mean)","(ncut, std)","(mean, mean)","(mean, std)","(stdev, mean)","(stdev, std)","(z_score, mean)","(z_score, std)","(percentile, mean)","(percentile, std)","(is_sig, mean)","(is_sig, std)",n_models,train_acc,test_acc,overall_std,is_stable
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
LINE,7.361335,0.095565,7.74293,0.10746,0.103152,0.015027,-3.867701,2.056868,0.019608,0.0,1.0,0.0,5,1.0,1.0,0.141631,True
LINE+DROPOUT,6.850369,0.161102,8.88637,0.006248,0.037168,0.004922,-55.229361,5.357975,0.019608,0.0,1.0,0.0,5,0.912333,0.29598,0.037843,True
LINE-MNIST,7.954501,0.095978,8.99305,0.16632,0.052454,0.018418,-22.169842,9.220927,0.019608,0.0,1.0,0.0,5,0.999983,0.99078,0.158596,True
LINE-MNIST-SEPARATED,8.261181,0.19495,8.497317,0.148798,0.064825,0.017637,-3.823383,2.171242,0.027451,0.01074,0.6,0.547723,5,1.0,0.993773,0.148875,False
LINE-MNIST+DROPOUT,6.932611,0.093737,8.700609,0.046496,0.050636,0.009425,-35.881974,6.811205,0.019608,0.0,1.0,0.0,5,0.87285,0.63693,0.066066,True
LINE-MNIST-SEPARATED+DROPOUT,7.779054,0.071594,9.036508,0.018402,0.034218,0.00661,-37.907284,7.861859,0.019608,0.0,1.0,0.0,5,0.983902,0.893529,0.038429,True


In [17]:
df.to_csv(f'../results/mlp-clustering-stability-two-shuffling-methods-all-samples-k{N_CLUSTERS}-n{N_SHUFFLES}.csv')
statistics_df.to_csv(f'../results/mlp-clustering-stability-two-shuffling-methods-statistics-k{N_CLUSTERS}-n{N_SHUFFLES}.csv')

### Model Stability Statistics

In [18]:
statistics_df

Unnamed: 0_level_0,"(ncut, mean)","(ncut, std)","(mean, mean)","(mean, std)","(stdev, mean)","(stdev, std)","(z_score, mean)","(z_score, std)","(percentile, mean)","(percentile, std)","(is_sig, mean)","(is_sig, std)",n_models,train_acc,test_acc,overall_std,is_stable
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
LINE,7.361335,0.095565,7.74293,0.10746,0.103152,0.015027,-3.867701,2.056868,0.019608,0.0,1.0,0.0,5,1.0,1.0,0.141631,True
LINE+DROPOUT,6.850369,0.161102,8.88637,0.006248,0.037168,0.004922,-55.229361,5.357975,0.019608,0.0,1.0,0.0,5,0.912333,0.29598,0.037843,True
LINE-MNIST,7.954501,0.095978,8.99305,0.16632,0.052454,0.018418,-22.169842,9.220927,0.019608,0.0,1.0,0.0,5,0.999983,0.99078,0.158596,True
LINE-MNIST-SEPARATED,8.261181,0.19495,8.497317,0.148798,0.064825,0.017637,-3.823383,2.171242,0.027451,0.01074,0.6,0.547723,5,1.0,0.993773,0.148875,False
LINE-MNIST+DROPOUT,6.932611,0.093737,8.700609,0.046496,0.050636,0.009425,-35.881974,6.811205,0.019608,0.0,1.0,0.0,5,0.87285,0.63693,0.066066,True
LINE-MNIST-SEPARATED+DROPOUT,7.779054,0.071594,9.036508,0.018402,0.034218,0.00661,-37.907284,7.861859,0.019608,0.0,1.0,0.0,5,0.983902,0.893529,0.038429,True


### Appendix - All Samples

In [19]:
df

Unnamed: 0,ave_in_out,mean,n_samples,ncut,percentile,stdev,test_acc,test_loss,train_acc,train_loss,z_score,model,is_sig
0,0.327202,7.644785,50,7.253364,0.019608,0.108187,1.0,2.6e-05,1.0,1.1e-05,-3.617994,LINE,True
1,0.313899,7.783066,50,7.37192,0.019608,0.10699,1.0,3.6e-05,1.0,2.2e-05,-3.84283,LINE,True
2,0.315323,7.667933,50,7.359045,0.019608,0.12237,1.0,3.9e-05,1.0,2.7e-05,-2.524223,LINE,True
3,0.298867,7.708244,50,7.510642,0.019608,0.096152,1.0,4.8e-05,1.0,2.2e-05,-2.0551,LINE,True
4,0.320602,7.91062,50,7.311702,0.019608,0.082062,1.0,0.000181,1.0,0.000171,-7.298357,LINE,True
5,0.397428,8.892256,50,6.685774,0.019608,0.044247,0.2952,2.996638,0.903183,0.253735,-49.867484,LINE+DROPOUT,True
6,0.358717,8.891767,50,6.98717,0.019608,0.038094,0.2939,5.397707,0.920683,0.206457,-49.997865,LINE+DROPOUT,True
7,0.372457,8.885253,50,6.877133,0.019608,0.032116,0.3032,5.312357,0.924333,0.204587,-62.527534,LINE+DROPOUT,True
8,0.354684,8.88573,50,7.020136,0.019608,0.032859,0.2939,4.98537,0.91095,0.230818,-56.774956,LINE+DROPOUT,True
9,0.397984,8.876844,50,6.681631,0.019608,0.038527,0.2937,4.942878,0.902517,0.250102,-56.978968,LINE+DROPOUT,True


### Unpruned results without shuffling
#### TODO: Refactor
- [ ] Combine with the unpruned table

# second for the pruned results
pruned_results = {}

for (model_tag, _), dataset_tag in zip(TAGS_COMMANDS.items(), tqdm(DATASETS_TAGS)):
    model_paths = get_model_path(model_tag, filter_='all')
    pruned_results[model_tag] = [run_spectral_cluster(path / f'{dataset_tag}-mlp-pruned-weights.pckl',
                                               with_shuffle=False)
            for path in tqdm(model_paths[-N_TRAINED_MODELS:])]
       
pruned_model_dfs = []

for model_tag, model_results in pruned_results.items():
    _, metrics  = zip(*model_results)
    pruned_model_dfs.append(pd.DataFrame(metrics).assign(model=model_tag))

pruned_df = pd.concat(pruned_model_dfs, ignore_index=True)

pruned_df_grpby_model = pruned_df.groupby('model')


pruned_statistics_df = pd.concat([(pruned_df_grpby_model[['ncut']]
                             .agg(['mean', 'std'])),
                            pruned_df_grpby_model.size().rename('n_models'),
                            (pruned_df_grpby_model[['train_acc', 'test_acc']]
                             .agg('mean'))],
                           axis=1)

pruned_statistics_df = pruned_statistics_df.loc[TAGS_COMMANDS.keys(), :]

pruned_statistics_df