#### Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch, os
from torchvision import datasets, transforms
from torch import optim, nn

from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
%load_ext autoreload
%autoreload 2

#### Load data and create loaders

In [3]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])
# Download and load the training data
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [69]:
# Specify Achitecture and trial
arch = 'dense'
trial = 4
tag = 'simple_mnist_'+arch+'_trial_'+str(trial)
sub_folders = ['Figures', 'Computed_Values', 'Weights']
for i in range(len(sub_folders)):
    path = os.path.join(os.getcwd(),tag+'/'+sub_folders[i])
    os.makedirs(path, exist_ok = True)

#### Architecture

In [70]:
if arch == 'dense':
    class Network(nn.Module):
        def __init__(self):
            super().__init__()
            # Defining the layers, 128, 64, 10 units each
            self.fc1 = nn.Linear(784, 100)
            self.fc2 = nn.Linear(100, 100)
            self.fc3 = nn.Linear(100, 100)
            self.fc4 = nn.Linear(100, 100)
            self.fc5 = nn.Linear(100, 100)
            # Output layer, 10 units - one for each digit
            self.fc6 = nn.Linear(100, 10)

            self.relu = nn.ReLU()
            self.logsoftmax = nn.LogSoftmax(dim=1)

        def forward(self, x):
            ''' Forward pass through the network, returns the output logits '''
            x = self.fc1(x)
            x = self.relu(x)
            x = self.fc2(x)
            x = self.relu(x)
            x = self.fc3(x)
            x = self.relu(x)
            x = self.fc4(x)
            x = self.relu(x)
            x = self.fc5(x)
            x = self.relu(x)
            x = self.fc6(x)
            x = self.logsoftmax(x)

            return x
elif arch == 'conv':    
    class Network(nn.Module):
        def __init__(self):
            super().__init__()
            # Defining the layers, 128, 64, 10 units each
            self.conv1 = nn.Conv2d(1, 32, kernel_size = 3, padding = 3//2)
            self.conv2 = nn.Conv2d(32, 32, kernel_size = 3, padding = 3//2)
            self.linear = nn.Linear(32*28*28*1, 10)

            self.relu = nn.ReLU()
            self.logsoftmax = nn.LogSoftmax(dim=1)

        def forward(self, x):
            ''' Forward pass through the network, returns the output logits '''
            x = self.conv1(x)
            x = self.relu(x)
            x = self.conv2(x)
            x = self.relu(x)
            x = x.view(x.shape[0], -1)
            x = self.linear(x)
            x = self.logsoftmax(x)

            return x

model = Network()
model

Network(
  (fc1): Linear(in_features=784, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=100, bias=True)
  (fc4): Linear(in_features=100, out_features=100, bias=True)
  (fc5): Linear(in_features=100, out_features=100, bias=True)
  (fc6): Linear(in_features=100, out_features=10, bias=True)
  (relu): ReLU()
  (logsoftmax): LogSoftmax(dim=1)
)

In [71]:
if torch.cuda.is_available():
    print('CUDA is available')
    use_cuda = torch.cuda.is_available()
else:
    print('CUDA is not available')
    use_cuda = 0
device = torch.device("cuda:0" if use_cuda else "cpu")

CUDA is available


In [72]:
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [73]:
model.to(device)
criterion.to(device)

NLLLoss()

In [74]:
epochs = 20

In [None]:
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Flatten MNIST images into a 784 long vector
        if arch =='dense':
            images = images.view(images.shape[0], -1)

        # TODO: Training pass
        optimizer.zero_grad()
        
        output = model(images.to(device))
        loss = criterion(output, labels.to(device))
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(trainloader)}")
        torch.save(model.state_dict(),tag+'/Weights/ep_'+str(e+1)+'.pt')

#### Load weights and compute NP, EP (layer-wise)

In [75]:
from fc_layer_neural_persistence import layer_neural_persistence

m_inter = Network()
m_inter.cpu

if arch == 'conv':
    nLayers = 3
elif arch =='dense':
    nLayers = 6
    
nBins = 10

# EPs = np.zeros((epochs,nLayers))
NPs = np.zeros((epochs,nLayers))
edge_vals = {}
edge_temp = {}


if arch == 'conv':
    w1_hist = np.zeros((epochs,nBins))
    w2_hist = np.zeros((epochs,nBins))
    w3_hist = np.zeros((epochs,nBins))

elif arch =='dense':
    w1_hist = np.zeros((epochs,nBins))
    w2_hist = np.zeros((epochs,nBins))
    w3_hist = np.zeros((epochs,nBins))
    w4_hist = np.zeros((epochs,nBins))
    w5_hist = np.zeros((epochs,nBins))
    w6_hist = np.zeros((epochs,nBins))

for ii in tqdm(range(epochs)):
    
    m_inter = torch.load(tag+'/Weights/ep_'+str(ii+1)+'.pt')
    
    if arch == 'conv':
        w1 = m_inter['conv1.weight'].cpu().numpy()
        w2 = m_inter['conv2.weight'].cpu().numpy()
        w3 = m_inter['linear.weight'].cpu().numpy().T
        
        NPs[ii,0], _ = layer_neural_persistence(w1,'NP',None,True, in_out_sizes = ((30,30),(28,28)))
        NPs[ii,1], _ = layer_neural_persistence(w2,'NP',None,True, in_out_sizes = ((30,30),(28,28)))
        NPs[ii,2], _ = layer_neural_persistence(w3,'NP',None,True)
        
        w1_hist[ii], w1_bin_edges = np.histogram(w1, bins=nBins, density=True)
        w2_hist[ii], w2_bin_edges = np.histogram(w2, bins=nBins, density=True)
        w3_hist[ii], w3_bin_edges = np.histogram(w3, bins=nBins, density=True)
        
    elif arch == 'dense':
                
        w1 = m_inter['fc1.weight'].cpu().numpy().T
        w2 = m_inter['fc2.weight'].cpu().numpy().T
        w3 = m_inter['fc3.weight'].cpu().numpy().T
        w4 = m_inter['fc4.weight'].cpu().numpy().T
        w5 = m_inter['fc5.weight'].cpu().numpy().T
        w6 = m_inter['fc6.weight'].cpu().numpy().T
        
        NPs[ii,0], edge_temp[('Dense 1', ii)]  = layer_neural_persistence(w1,None,True)
        NPs[ii,1], edge_temp[('Dense 2', ii)]  = layer_neural_persistence(w2,None,True)
        NPs[ii,2], edge_temp[('Dense 3', ii)]  = layer_neural_persistence(w3,None,True)
        NPs[ii,3], edge_temp[('Dense 4', ii)]  = layer_neural_persistence(w4,None,True)
        NPs[ii,4], edge_temp[('Dense 5', ii)]  = layer_neural_persistence(w5,None,True)
        NPs[ii,5], edge_temp[('Dense 6', ii)]  = layer_neural_persistence(w6,None,True)
        
        edge_vals[(ii,0)] = np.zeros((len(edge_temp[('Dense 1',ii)]),3),dtype=float)
        edge_vals[(ii,1)] = np.zeros((len(edge_temp[('Dense 2',ii)]),3),dtype=float)
        edge_vals[(ii,2)] = np.zeros((len(edge_temp[('Dense 3',ii)]),3),dtype=float)
        edge_vals[(ii,3)] = np.zeros((len(edge_temp[('Dense 4',ii)]),3),dtype=float)
        edge_vals[(ii,4)] = np.zeros((len(edge_temp[('Dense 5',ii)]),3),dtype=float)
        edge_vals[(ii,5)] = np.zeros((len(edge_temp[('Dense 6',ii)]),3),dtype=float)
        
        for cntr0 in range(len(edge_temp[('Dense 1',ii)])):
            m0 = edge_temp[('Dense 1',ii)][-1][0] + 1
            edge_vals[(ii,0)][cntr0,0] = edge_temp[('Dense 1',ii)][cntr0][0]
            edge_vals[(ii,0)][cntr0,1] = edge_temp[('Dense 1',ii)][cntr0][1] - m0
            edge_vals[(ii,0)][cntr0,2] = edge_temp[('Dense 1',ii)][cntr0][2]["weight"]
            
        for cntr1 in range(len(edge_temp[('Dense 2',ii)])):
            m1 = edge_temp[('Dense 2',ii)][-1][0] + 1
            edge_vals[(ii,1)][cntr1,0] = edge_temp[('Dense 2',ii)][cntr1][0]
            edge_vals[(ii,1)][cntr1,1] = edge_temp[('Dense 2',ii)][cntr1][1] - m1
            edge_vals[(ii,1)][cntr1,2] = edge_temp[('Dense 2',ii)][cntr1][2]["weight"]
            
        for cntr2 in range(len(edge_temp[('Dense 3',ii)])):
            m2 = edge_temp[('Dense 3',ii)][-1][0] + 1
            edge_vals[(ii,2)][cntr2,0] = edge_temp[('Dense 3',ii)][cntr2][0]
            edge_vals[(ii,2)][cntr2,1] = edge_temp[('Dense 3',ii)][cntr2][1] - m2
            edge_vals[(ii,2)][cntr2,2] = edge_temp[('Dense 3',ii)][cntr2][2]["weight"]
            
        for cntr3 in range(len(edge_temp[('Dense 4',ii)])):
            m3 = edge_temp[('Dense 4',ii)][-1][0] + 1
            edge_vals[(ii,3)][cntr3,0] = edge_temp[('Dense 4',ii)][cntr3][0]
            edge_vals[(ii,3)][cntr3,1] = edge_temp[('Dense 4',ii)][cntr3][1] - m3
            edge_vals[(ii,3)][cntr3,2] = edge_temp[('Dense 4',ii)][cntr3][2]["weight"]
            
        for cntr4 in range(len(edge_temp[('Dense 5',ii)])):
            m4 = edge_temp[('Dense 5',ii)][-1][0] + 1
            edge_vals[(ii,4)][cntr4,0] = edge_temp[('Dense 5',ii)][cntr4][0]
            edge_vals[(ii,4)][cntr4,1] = edge_temp[('Dense 5',ii)][cntr4][1] - m4
            edge_vals[(ii,4)][cntr4,2] = edge_temp[('Dense 5',ii)][cntr4][2]["weight"]
            
        for cntr5 in range(len(edge_temp[('Dense 6',ii)])):
            m5 = edge_temp[('Dense 6',ii)][-1][0] + 1
            edge_vals[(ii,5)][cntr5,0] = edge_temp[('Dense 6',ii)][cntr5][0]
            edge_vals[(ii,5)][cntr5,1] = edge_temp[('Dense 6',ii)][cntr5][1] - m5
            edge_vals[(ii,5)][cntr5,2] = edge_temp[('Dense 6',ii)][cntr5][2]["weight"]
            
        edge_vals[ii,0] = np.flipud(edge_vals[ii,0][np.argsort(edge_vals[ii,0][:, -1])])
        edge_vals[ii,1] = np.flipud(edge_vals[ii,1][np.argsort(edge_vals[ii,1][:, -1])])
        edge_vals[ii,2] = np.flipud(edge_vals[ii,2][np.argsort(edge_vals[ii,2][:, -1])])
        edge_vals[ii,3] = np.flipud(edge_vals[ii,3][np.argsort(edge_vals[ii,3][:, -1])])
        edge_vals[ii,4] = np.flipud(edge_vals[ii,4][np.argsort(edge_vals[ii,4][:, -1])])
        edge_vals[ii,5] = np.flipud(edge_vals[ii,5][np.argsort(edge_vals[ii,5][:, -1])])
        
#         w1_hist[ii], w1_bin_edges = np.histogram(w1, bins=nBins, density=True)
#         w2_hist[ii], w2_bin_edges = np.histogram(w2, bins=nBins, density=True)
#         w3_hist[ii], w3_bin_edges = np.histogram(w3, bins=nBins, density=True)
#         w4_hist[ii], w4_bin_edges = np.histogram(w4, bins=nBins, density=True)
#         w5_hist[ii], w5_bin_edges = np.histogram(w5, bins=nBins, density=True)
#         w6_hist[ii], w6_bin_edges = np.histogram(w6, bins=nBins, density=True)
    
    # EPs[ii,0], _ = layer_neural_persistence(w1,'EP',True)
    # EPs[ii,1], _ = layer_neural_persistence(w2,'EP',True)
    # EPs[ii,2], _ = layer_neural_persistence(w3,'EP',True)

np.save(tag+'/Computed_Values/NPs', NPs)
np.save(tag+'/Computed_Values/edge_vals', edge_vals)

100%|██████████| 20/20 [00:47<00:00,  2.38s/it]


In [39]:
# np.sort(edge_vals[(10,1)],axis=-1)

In [41]:
# edge_vals[0,0] = edge_vals[0,0][np.argsort(edge_vals[0,0][:, -1])]

In [47]:
(edge_vals[1,1])

KeyError: (-1, -1)

In [None]:
# one shot mag prune to compression ratio fo dense network
# get accuracy
# apply mask on weights wrt NP pruning

## Compute averages and std dev's for the NPs

In [10]:
arch = 'dense'

In [11]:
# load all data
NP1 = np.load('simple_mnist_'+arch+'_trial_0/Computed_Values/NPs.npy')
NP2 = np.load('simple_mnist_'+arch+'_trial_1/Computed_Values/NPs.npy')
NP3 = np.load('simple_mnist_'+arch+'_trial_2/Computed_Values/NPs.npy')
NP4 = np.load('simple_mnist_'+arch+'_trial_3/Computed_Values/NPs.npy')
NP5 = np.load('simple_mnist_'+arch+'_trial_4/Computed_Values/NPs.npy')

In [None]:
if arch == 'dense':
    nLayers = 6
elif arch == 'conv':
    nLayers = 4

layer_mu  = []
layer_std = []
all_NP    = np.zeros((5,20,nLayers))
for n in range(nLayers):
    layer_mu.append(np.mean((NP1[:,n],NP2[:,n],NP3[:,n],NP4[:,n],NP5[:,n]), axis = 0))
    layer_std.append(np.std((NP1[:,n],NP2[:,n],NP3[:,n],NP4[:,n],NP5[:,n]), axis = 0))
    
all_NP[0] = NP1
all_NP[1] = NP2
all_NP[2] = NP3
all_NP[3] = NP4
all_NP[4] = NP5

In [None]:
import itertools
marker = itertools.cycle((',', '+', '.', 'o', '*', '^')) 

In [None]:
# Plot!

if arch == 'dense':
    plot_labels = ['Layer ', 'Layer ', 'Layer ', 'Layer ', 'Layer ', 'Layer ']
    
else:
    plot_labels = []
    for idx in range(nLayers-1):
        plot_labels.append('Filter ')
    plot_labels.append('Dense ')

plt.figure(figsize=(12,6))

plt.subplot(1,2,1)
for n in range(nLayers):
    plt.plot(layer_mu[n], label=plot_labels[n]+str(n+1), marker = next(marker))
    plt.fill_between(np.arange(0,20), layer_mu[n]+layer_std[n], layer_mu[n]-layer_std[n],
    alpha=0.2, antialiased=True)

plt.legend()
plt.xticks([0,5,10,15,20])
plt.xlabel('Training Epochs')
plt.ylabel('Normalized Neural Persistence')

plt.subplot(1,2,2)
for n in range(5):
    plt.plot(np.sum(all_NP, axis = 2)[n], label='Trial '+str(n+1), marker = next(marker))
    plt.fill_between(np.arange(0,20), np.sum(all_NP, axis = 2)[n]+ np.std(all_NP, axis = 2)[n], np.sum(all_NP, axis = 2)[n]- np.std(all_NP, axis = 2)[n],
    alpha=0.2, antialiased=True)

plt.xticks([0,5,10,15,20])
plt.legend()
plt.xlabel('Training Epochs')
plt.ylabel('Total Normalized Neural Persistence')

plt.savefig('NP_Plots_'+arch, transparent = True, bbox_inches = 'tight', pad_inches = 0.01)

In [None]:
ff1 = plt.figure(figsize=(10,8))

plt.subplot(221)
plt.plot(EPs[:,0],label='EP - L1')
plt.legend()

plt.subplot(222)
plt.plot(EPs[:,1],label='EP - L2')
plt.legend()

plt.subplot(223)
plt.plot(EPs[:,2],label='EP - L3')
plt.legend()

plt.subplot(224)
plt.plot(np.sum(EPs,axis=-1),label='EP - sum')
plt.legend()

plt.show()