In [19]:
import os
import numpy as np
import torch
import itertools
import matplotlib.pyplot as plt
%matplotlib inline
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import math

from tqdm import tqdm
import time

In [20]:
# Corollary 2.4 in Mohammadi 2014 - for 1d
def alpha_estimator_one(m, X):
    N = len(X)
    n = int(N/m) # must be an integer
    
    X = X[0:n*m]
    
    Y = np.sum(X.reshape(n, m),1)
    eps = np.spacing(1)

    Y_log_norm =  np.log(np.abs(Y) + eps).mean()
    X_log_norm =  np.log(np.abs(X) + eps).mean()
    diff = (Y_log_norm - X_log_norm) / math.log(m)
    return 1 / diff

In [21]:
# Corollary 2.4 in Mohammadi 2014 - for multi-d
def alpha_estimator_multi(m, X):
    # X is N by d matrix
    N = X.size()[0]   
    n = int(N/m) # must be an integer
#     print(N,n)
    X = X[0:n*m,:]
#     print(X.size())
    Y = torch.sum(X.view(n, m, -1), 1)
    eps = np.spacing(1)
    Y_log_norm = torch.log(Y.norm(dim=1) + eps).mean()
    X_log_norm = torch.log(X.norm(dim=1) + eps).mean()
    diff = (Y_log_norm - X_log_norm) / math.log(m)
    return 1 / diff.item()

In [22]:
'''VGG11/13/16/19 in Pytorch.'''
import torch
import torch.nn as nn


cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)


def test():
    net = VGG('VGG11')
    x = torch.randn(2,3,32,32)
    y = net(x)
    print(y.size())

# test()

In [23]:
net = VGG("VGG11")
for ix, p in enumerate(net.parameters()):
    print(p.shape)

torch.Size([64, 3, 3, 3])
torch.Size([64])
torch.Size([64])
torch.Size([64])
torch.Size([128, 64, 3, 3])
torch.Size([128])
torch.Size([128])
torch.Size([128])
torch.Size([256, 128, 3, 3])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256, 256, 3, 3])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([512, 256, 3, 3])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([512, 512, 3, 3])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([512, 512, 3, 3])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([512, 512, 3, 3])
torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([10, 512])
torch.Size([10])


In [24]:
def compute_alphas_centralized(etas, PATH, depth):
#     alphas_mc    = np.zeros((len(etas), depth))-1
    alphas_multi = np.zeros((len(etas), depth))-1
#     alphas_single= np.zeros(len(etas))-1
#     alphas_haus    = np.zeros((len(etas), depth))-1
#     print(num_nets)
    
    
    
    for ei, eta in tqdm(enumerate(etas)):
        
        tmp_path = PATH + 'LR{}/'.format(ei)
        print(tmp_path)
        
#         weights = []
        weights_unfold = []
        weights_unfold_merge = []
        for i in range(depth):
#             weights.append([])
            weights_unfold.append([])

        # record the layers in different arrays
        for i in range(num_nets):
            tmp_path_mod = tmp_path + 'model{}'.format(i+1) +'.pth'
            tmp_net = VGG('VGG11')
            tmp_net = torch.load(tmp_path_mod,map_location='cpu')
#             layerwise_list = get_layerWise(tmp_net)
            for ix, p in enumerate(tmp_net.parameters()):
                if not (ix % 4 == 0):
                    continue
                layer = p.detach().numpy()#.astype(np.float16)
                if(i == 0):
                    weights_unfold[ix//4] = layer / (num_nets * 1.0)
                else:
                    weights_unfold[ix//4] += layer / (num_nets * 1.0)


                layer = layer.reshape(-1,1)
#                 weights[ix].append(layer)

#         for i in range(depth):
#             weights[i] = np.concatenate(weights[i], axis = 1).astype(np.float16)





        for i in range(depth):
#             print(weights_unfold[i].shape)
#             print(i)
            tmp_mean    = np.mean(weights_unfold[i], axis=0)
            
#             tmp_mean    = tmp_mean[..., np.newaxis]
            tmp_mean = tmp_mean[np.newaxis,...]
#             print(tmp_mean.shape)
#             tmp_weights = weights_unfold[i] - tmp_mean.T
            tmp_weights = weights_unfold[i] - tmp_mean
#             print(tmp_weights.shape)
#             print(len(tmp_weights.shape))
            if len(tmp_weights.shape) == 4:
#                 print('yes')
                tmp_weights = np.reshape(tmp_weights, (tmp_weights.shape[0] * tmp_weights.shape[1], -1))
#                 print(tmp_weights.shape)
            
            alphas_multi[ei,i] = np.median([alpha_estimator_multi(mm, torch.from_numpy(tmp_weights)) for mm in (2, 5, 10)])



#         for i in range(depth):
#             tmp_mean    = np.mean(weights[i], axis=1)
#             tmp_mean    = tmp_mean[..., np.newaxis]
#             tmp_weights = weights[i] - tmp_mean
#             tmp_weights = tmp_weights.reshape(-1,1)     
#             tmp_alphas = [alpha_estimator_one(mm, tmp_weights) for mm in (2, 5, 10, 20, 50, 100, 500, 1000)]
#             alphas_haus[ei,i] = np.median(tmp_alphas)
# #             print(tmp_alphas)


#         for i in range(depth):
#             tmp_weights = np.mean(weights[i], axis=1)
#             tmp_weights = tmp_weights.reshape(-1,1)
#             tmp_weights = tmp_weights - np.mean(tmp_weights)
#             tmp_alphas = [alpha_estimator_one(mm, tmp_weights) for mm in (2, 5, 10, 20, 50, 100, 500, 1000)]
#             alphas_mc[ei,i] = np.median(tmp_alphas)



    return alphas_multi


In [25]:
PATH = './VGG-CIFAR10-unif-umut/'
lr_list = [0.005, 0.01, 0.015, 0.02]

depth = 9
num_nets = 100
nets = []
alphas_mc_cent = compute_alphas_centralized(lr_list, PATH, depth)

0it [00:00, ?it/s]

./VGG-CIFAR10-unif-umut/LR0/


1it [02:21, 141.21s/it]

./VGG-CIFAR10-unif-umut/LR1/


2it [04:38, 139.14s/it]

./VGG-CIFAR10-unif-umut/LR2/


3it [07:01, 140.75s/it]

./VGG-CIFAR10-unif-umut/LR3/


4it [09:27, 141.80s/it]


In [26]:
alphas_mc_cent

array([[1.72248149, 1.86960766, 1.98689968, 1.97568236, 2.01579974,
        2.00915695, 2.02251512, 2.01675313, 2.20403633],
       [1.64685283, 1.77267212, 1.9501445 , 1.95695181, 1.99508851,
        1.99279813, 1.99549975, 1.98474122, 2.21496663],
       [1.57055818, 1.71379685, 1.95433893, 1.94206612, 1.98588086,
        1.98768144, 1.98113687, 1.93398328, 2.27443019],
       [1.51607948, 1.71635943, 1.92387462, 1.94022873, 1.96415099,
        1.96731349, 1.97351592, 1.90611496, 2.14515329]])

In [27]:
np.median(alphas_mc_cent, axis=1)

array([2.00915695, 1.98474122, 1.95433893, 1.94022873])

In [28]:
np.mean(alphas_mc_cent, axis=1)

array([1.98032583, 1.94552394, 1.92709697, 1.89475454])

In [29]:
np.min(alphas_mc_cent, axis=1)

array([1.72248149, 1.64685283, 1.57055818, 1.51607948])