In [1]:
import numpy as np
import torch
import sys
import os
from torchvision import models
from mftma.manifold_analysis_correlation import manifold_analysis_corr
from mftma.utils.make_manifold_data import make_manifold_data
from mftma.utils.activation_extractor import extractor
from mftma.utils.analyze_pytorch import analyze
import getpass
import argparse
from neural_manifold_utils import CFAR100_fake_dataset_mftma , save_dict
from datetime import datetime
print('__cuda available ',torch.cuda.is_available())
print('__Python VERSION:', sys.version)
print('__CUDNN VERSION:', torch.backends.cudnn.version())
print('__Number CUDA Devices:', torch.cuda.device_count())
import torch
from torchvision import datasets, transforms, models

__cuda available  False
__Python VERSION: 3.6.10 (default, Jun  9 2020, 18:36:16) 
[GCC 8.3.0]
__CUDNN VERSION: 7605
__Number CUDA Devices: 0


In [2]:
user=getpass.getuser()
print(user)
if user=='eghbalhosseini':
    save_dir='/Users/eghbalhosseini/MyData/neural_manifolds/network_training_on_synthetic/'
    data_dir='/Users/eghbalhosseini/MyData/neural_manifolds/synthetic_datasets/'
elif user=='ehoseini':
    save_dir='/om/user/ehoseini/MyData/neural_manifolds/network_training_on_synthetic/'
    data_dir='/om/user/ehoseini/MyData/neural_manifolds/synthetic_datasets/'
datafile='synth_partition_nobj_50000_nclass_50_nfeat_3072_beta_0.01_sigma_1.50_norm_1.mat'

ehoseini


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset = CFAR100_fake_dataset_mftma(data_dir=os.path.join(data_dir, datafile))

<HDF5 dataset "data": shape (100000,), type "<f8"> is not a matlab type
<HDF5 dataset "ir": shape (100000,), type "<u8"> is not a matlab type
<HDF5 dataset "jc": shape (50051,), type "<u8"> is not a matlab type
data type not supported: graph, uint32


In [8]:
sampled_classes = 10
examples_per_class = 10
data = make_manifold_data(dataset, sampled_classes, examples_per_class, seed=0)

In [9]:
model_save_path=save_dir+'VGG16_synthdata_'+dataset.structure+'_nclass_'+str(int(dataset.n_class))+'_n_exm_'+str(int(dataset.exm_per_class))
model = models.vgg16(num_classes=dataset.n_class)

In [10]:
model.load_state_dict(torch.load(model_save_path, map_location=device))
model = model.to(device)
model = model.eval()
activations = extractor(model, data, layer_types=['Conv2d', 'Linear'])
list(activations.keys())

['layer_0_Input',
 'layer_1_Conv2d',
 'layer_3_Conv2d',
 'layer_6_Conv2d',
 'layer_8_Conv2d',
 'layer_11_Conv2d',
 'layer_13_Conv2d',
 'layer_15_Conv2d',
 'layer_18_Conv2d',
 'layer_20_Conv2d',
 'layer_22_Conv2d',
 'layer_25_Conv2d',
 'layer_27_Conv2d',
 'layer_29_Conv2d',
 'layer_33_Linear',
 'layer_36_Linear',
 'layer_39_Linear']

In [11]:
for layer, data, in activations.items():
    X = [d.reshape(d.shape[0], -1).T for d in data]
    # Get the number of features in the flattened data
    N = X[0].shape[0]
    # If N is greater than 5000, do the random projection to 5000 features
    if N > 5000:
        print("Projecting {}".format(layer))
        M = np.random.randn(5000, N)
        M /= np.sqrt(np.sum(M * M, axis=1, keepdims=True))
        X = [np.matmul(M, d) for d in X]
    activations[layer] = X


Projecting layer_1_Conv2d
Projecting layer_3_Conv2d
Projecting layer_6_Conv2d
Projecting layer_8_Conv2d
Projecting layer_11_Conv2d
Projecting layer_13_Conv2d
Projecting layer_15_Conv2d
Projecting layer_18_Conv2d
Projecting layer_20_Conv2d
Projecting layer_22_Conv2d


In [12]:
capacities = []
radii = []
dimensions = []
correlations = []
for k, X, in activations.items():
    # Analyze each layer's activations
    a, r, d, r0, K = manifold_analysis_corr(X, 0, 300, n_reps=1)
    # Compute the mean values
    a = 1 / np.mean(1 / a)
    r = np.mean(r)
    d = np.mean(d)
    print("{} capacity: {:4f}, radius {:4f}, dimension {:4f}, correlation {:4f}".format(k, a, r, d, r0))

    # Store for later
    capacities.append(a)
    radii.append(r)
    dimensions.append(d)
    correlations.append(r0)
names = list(activations.keys())
names = [n.split('_')[1] + ' ' + n.split('_')[2] for n in names]
# save the results:
current_time = datetime.now().strftime('%b%d_%H-%M-%S')
results_file = os.path.join(save_dir,'mftma_'+model_save_path+'_'+current_time)
data_ = {'capacities': capacities,
             'radii': radii,
             'dimensions': dimensions,
             'correlations': correlations,
             'names': names,
             'analyze_exm_per_class': examples_per_class,
             'analyze_n_class': sampled_classes
             }

layer_0_Input capacity: 0.354178, radius 0.954466, dimension 4.889111, correlation 0.111076
layer_1_Conv2d capacity: 0.347111, radius 0.973281, dimension 4.838531, correlation 0.111006
layer_3_Conv2d capacity: 0.324501, radius 1.035510, dimension 5.094076, correlation 0.111028
layer_6_Conv2d capacity: 0.318713, radius 1.027056, dimension 5.057843, correlation 0.111617
layer_8_Conv2d capacity: 0.313656, radius 1.045578, dimension 5.108380, correlation 0.110979
layer_11_Conv2d capacity: 0.330684, radius 1.010406, dimension 4.885698, correlation 0.127790
layer_13_Conv2d capacity: 0.349242, radius 0.999653, dimension 4.779131, correlation 0.148460
layer_15_Conv2d capacity: 0.334839, radius 1.010647, dimension 4.863199, correlation 0.160911
layer_18_Conv2d capacity: 0.353067, radius 0.997954, dimension 4.670523, correlation 0.170372
layer_20_Conv2d capacity: 0.382376, radius 0.944428, dimension 4.367276, correlation 0.150131
layer_22_Conv2d capacity: 0.436850, radius 0.887231, dimension 3.9

In [13]:
result_save_path=save_dir+'mftma_VGG16_synthdata_'+train_dataset.structure+'_nclass_'+str(int(train_dataset.n_class))+'_n_exm_'+str(int(train_dataset.exm_per_class))+'_'+current_time
save_dict(data_, result_save_path)

NameError: name 'train_dataset' is not defined

In [None]:
result_save_path=save_dir+'mftma_VGG16_synthdata_'+train_dataset.structure+'_nclass_'+str(int(train_dataset.n_class))+'_n_exm_'+str(int(train_dataset.exm_per_class))+'_'+current_time


In [None]:
result_save_path

In [None]:
mean = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
std = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

train_dataset = datasets.CIFAR100('../data', train=True, download=True,
                   transform=transform_train)
test_dataset = datasets.CIFAR100('../data', train=False, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize(mean, std)
                   ]))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)