In [1]:
# Factors of Clusterability Analysis: MLPs

In [1]:
import sys
sys.path.append('..')
import pandas as pd
from tqdm import tqdm
from src.visualization import run_spectral_cluster
from src.experiment_tagging import get_model_path
from src.utils import get_weights_paths

Using TensorFlow backend.


In [2]:
n_clust = 12
n_samples = 50
n_workers = 10
n_reps = 10

model_tags = ('SMALL-MNIST', 'SMALL-MNIST+CLUSTERABILITY-GRADIENT')
tag_to_net = {'SMALL-MNIST': 'Control', 'SMALL-MNIST+CLUSTERABILITY-GRADIENT': 'Cluster Gradient'}

model_paths = {tag: get_model_path(tag, filter_='all')[-n_reps:] for tag in model_tags}
assert all([len(mps)==n_reps for mps in model_paths.values()])

clustering_results = {}
clustering_results_pruned = {}

In [3]:
for tag, paths in tqdm(model_paths.items()):
    
    clustering_results[tag] = {}
    clustering_results_pruned[tag] = {}

    for rep in range(n_reps):

        weight_paths = get_weights_paths(paths[rep])
        results = run_spectral_cluster(weight_paths[True], n_clusters=n_clust, n_samples=n_samples,
                                       n_workers=n_workers, eigen_solver='arpack')
        clustering_results[tag][rep] = results

        results_pruned = run_spectral_cluster(weight_paths[False], n_clusters=n_clust, n_samples=n_samples,
                                              n_workers=n_workers, eigen_solver='arpack')
        clustering_results_pruned[tag][rep] = results_pruned

all_results = []
for i, res in enumerate([clustering_results, clustering_results_pruned]):
    for tag in res:

        network = tag_to_net[tag]
        if i == 1:
            network += ', Pruning'

        for rep in res[tag]:
            result = {'model': tag,
                      'network_type': 'cnn' if 'CNN' in tag else 'mlp',
                      'Network': network,
                      'Dataset': 'Small MNIST'}
            labels, metrics = res[tag][rep]
            result.update(metrics)
            all_results.append(pd.Series(result))

result_df = pd.DataFrame(all_results)
savepath = '../results/clustering_factors_clust_grad.csv'
result_df.to_csv(savepath)
result_df

100%|██████████| 2/2 [01:32<00:00, 46.35s/it]


Unnamed: 0,model,network_type,Network,Dataset,ncut,ave_in_out,n_samples,mean,stdev,z_score,percentile,train_acc,train_loss,test_acc,test_loss
0,SMALL-MNIST,mlp,Control,Small MNIST,9.958109,0.102524,50,10.020713,0.030648,-2.042638,0.019608,0.9552,0.141094,0.9601,0.131384
1,SMALL-MNIST,mlp,Control,Small MNIST,10.027099,0.098378,50,10.042874,0.023404,-0.674033,0.235294,0.954017,0.145775,0.959,0.135999
2,SMALL-MNIST,mlp,Control,Small MNIST,9.951248,0.102939,50,10.03339,0.033278,-2.468386,0.019608,0.955483,0.143672,0.9547,0.143884
3,SMALL-MNIST,mlp,Control,Small MNIST,10.006982,0.099581,50,10.03729,0.033493,-0.904903,0.176471,0.951917,0.149067,0.9556,0.140822
4,SMALL-MNIST,mlp,Control,Small MNIST,10.056823,0.09661,50,10.032909,0.036827,0.649354,0.72549,0.95665,0.138705,0.959,0.139256
5,SMALL-MNIST,mlp,Control,Small MNIST,10.018129,0.098914,50,10.007045,0.032518,0.340835,0.666667,0.957633,0.135327,0.9584,0.134037
6,SMALL-MNIST,mlp,Control,Small MNIST,9.986409,0.100817,50,10.014049,0.028718,-0.962487,0.196078,0.956367,0.136459,0.9573,0.139452
7,SMALL-MNIST,mlp,Control,Small MNIST,10.063253,0.096229,50,10.06411,0.032107,-0.026701,0.509804,0.956633,0.138069,0.9586,0.136479
8,SMALL-MNIST,mlp,Control,Small MNIST,10.086946,0.094828,50,10.035312,0.028965,1.782675,0.980392,0.95405,0.14721,0.9526,0.145845
9,SMALL-MNIST,mlp,Control,Small MNIST,10.044387,0.097349,50,10.059346,0.023852,-0.627171,0.235294,0.956067,0.139477,0.9589,0.137138
