In [None]:
cd MNIST/AlexNet

In [6]:
import numpy as np
import sys, os, random
import matplotlib.pyplot as plt
import pickle, gzip
from tqdm import tqdm,tqdm_notebook
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

In [7]:
batch_size = 128
seed_num = 81

# For reproducibility when you run the file with .py
torch.cuda.is_available()
torch.manual_seed(seed_num)
torch.cuda.manual_seed(seed_num)
np.random.seed(seed_num)
random.seed(seed_num)
torch.backends.cudnn.benchmark = True

torch.backends.cudnn.deterministic =True

In [8]:
# Data Augmentation 
train_transform = transforms.Compose([transforms.RandomRotation(35), transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize([0.1307,],[0.3081,])])
test_transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize([0.1307,],[0.3081,])])

# Splitting the training and test datasets
train_data = datasets.MNIST(os.getcwd(), train=True,
                              download=True, transform=train_transform)
test_data = datasets.MNIST(os.getcwd(), train=False,
                             download=True, transform=test_transform)

In [9]:
# Split the training set indices into training and validation set indices using 84:16 ratio
np.random.seed(seed_num)
len_trainset = len(train_data)
index_list = list(range(len_trainset))
np.random.shuffle(index_list)
split_index = 50000
train_indices, valid_indices =  index_list[:split_index], index_list[split_index:]

# Creating Samplers for training and validation set using the indices
np.random.seed(seed_num)
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(valid_indices)

torch.manual_seed(seed_num)

train_iterator = DataLoader(train_data, batch_size=batch_size, sampler=train_sampler)
val_iterator = DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler)
test_iterator = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [10]:
# AlexNet Model

class AlexNet(nn.Module):
    def __init__(self, output_dim):
        super().__init__()
        
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1), #in_channels, out_channels, kernel_size, stride, padding
            nn.BatchNorm2d(32),
            nn.ReLU(inplace = True),
            nn.Conv2d(32, 64, 3, padding = 1),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.ReLU(inplace = True),

            nn.Conv2d(64, 128, 3, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace = True),
            nn.Conv2d(128, 256, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(256, 256, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
        )
        
        self.classifier = nn.Sequential(
            nn.Dropout(0.1),
            nn.Linear(256 * 6 * 6, 256),
            nn.ReLU(inplace = True),
            nn.Linear(256,256),
            nn.ReLU(inplace = True),
            nn.Linear(256, output_dim),
        )


    def forward(self, x):
        x = self.features(x)
        h = x.view(x.size(0), -1)
        x = self.classifier(h)
        
        return x

In [11]:
torch.manual_seed(seed_num)
unit=128

# Summary
model = AlexNet(10)
print("Model:\n",model)

Model:
 AlexNet(
  (features): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): ReLU(inplace=True)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
    (9): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=9216

In [12]:
# Layer names
layer_name = [n for n, p in model.named_parameters()]
print(layer_name)

['features.0.weight', 'features.0.bias', 'features.1.weight', 'features.1.bias', 'features.3.weight', 'features.3.bias', 'features.6.weight', 'features.6.bias', 'features.7.weight', 'features.7.bias', 'features.9.weight', 'features.9.bias', 'features.11.weight', 'features.11.bias', 'classifier.1.weight', 'classifier.1.bias', 'classifier.3.weight', 'classifier.3.bias', 'classifier.5.weight', 'classifier.5.bias']


In [13]:
# Loading the weights of ternary model 
model = torch.load('AlexNet_mnist_Quant.pt')
model = model.cuda()
print("Loading weights done !")

# Total number of ternary weights (+w, -w)
totalParams = 0
for i in layer_name:
  print(i,(model.state_dict()[i] !=0).sum())
  totalParams +=  (model.state_dict()[i] !=0).sum()
    
print("Total Parameters:",totalParams, '\n')

Loading weights done !
features.0.weight tensor(135, device='cuda:0')
features.0.bias tensor(22, device='cuda:0')
features.1.weight tensor(24, device='cuda:0')
features.1.bias tensor(13, device='cuda:0')
features.3.weight tensor(452, device='cuda:0')
features.3.bias tensor(32, device='cuda:0')
features.6.weight tensor(492, device='cuda:0')
features.6.bias tensor(35, device='cuda:0')
features.7.weight tensor(94, device='cuda:0')
features.7.bias tensor(69, device='cuda:0')
features.9.weight tensor(502, device='cuda:0')
features.9.bias tensor(26, device='cuda:0')
features.11.weight tensor(366, device='cuda:0')
features.11.bias tensor(19, device='cuda:0')
classifier.1.weight tensor(1559, device='cuda:0')
classifier.1.bias tensor(36, device='cuda:0')
classifier.3.weight tensor(287, device='cuda:0')
classifier.3.bias tensor(47, device='cuda:0')
classifier.5.weight tensor(222, device='cuda:0')
classifier.5.bias tensor(6, device='cuda:0')
Total Parameters: tensor(4438, device='cuda:0') 



In [14]:
# Model's performance on test set

correct_count, all_count = 0, 0
model.eval()
for images,labels in test_iterator:
      for image,label in zip(images,labels):

        if torch.cuda.is_available():
            img = image.cuda()
            lab = label.cuda()
            img = img[None,].type('torch.cuda.FloatTensor')

        with torch.no_grad():
            output_ = model(img) 

        pred_label = output_.argmax()

        if(pred_label.item()==lab.item()):
          correct_count += 1
        all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Test Accuracy =", (correct_count/(all_count)))


Number Of Images Tested = 10000

Model Test Accuracy = 0.881


In [15]:
# Duplicate architecture of AlexNet Model

class AlexNet1(nn.Module):
    def __init__(self, output_dim, dn_info, dn_info1):
        super().__init__()
        
        self.dn_info = dn_info       # Dead Neuron info
        self.dn_info1 = dn_info1       # Dead Neuron info
        
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1), #in_channels, out_channels, kernel_size, stride, padding
            nn.BatchNorm2d(32),
            nn.ReLU(inplace = True),
            nn.Conv2d(32, 64, 3, padding = 1),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.ReLU(inplace = True),
            nn.Conv2d(64, 128, 3, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace = True),
            nn.Conv2d(128, 256, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(256, 256, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size=2, stride=2),)
        
        
        self.relu = nn.ReLU(inplace = True)
        
        self.dp = nn.Dropout(0.1)
        
        self.fc1 = nn.Linear(256 * 6 * 6, 256, bias= True)
        
        # Fully connected 2
        self.fc3 = nn.Linear(256, 256,bias= True) 

        # Fully connected 3
        self.fc5 = nn.Linear(256,output_dim, bias= True) 
        

    def forward(self, x):
        x = self.features(x)
        h = x.view(x.size(0), -1)
        h = self.dp(h)
        x = self.fc1(h)
        
        x = self.relu(x)
        
        ###################################
        
        # Storing dead neurons indices
        idx = torch.where(x.cpu() == 0.)[1]
        
        for i in idx:
            self.dn_info[str(i.item())] += 1
            
        #####################################
        
        x = self.fc3(x)
        x = self.relu(x)
        
        ####################################
        # Storing dead neurons indices
        idx1 = torch.where(x.cpu() == 0.)[1]
        
        for j in idx1:
            self.dn_info1[str(j.item())] += 1
            
        #####################################
        
        x =  self.fc5(x)
        
        return x


In [16]:
keys = list(AlexNet1(10, {}, {}).state_dict().keys())
print(keys)

['features.0.weight', 'features.0.bias', 'features.1.weight', 'features.1.bias', 'features.1.running_mean', 'features.1.running_var', 'features.1.num_batches_tracked', 'features.3.weight', 'features.3.bias', 'features.6.weight', 'features.6.bias', 'features.7.weight', 'features.7.bias', 'features.7.running_mean', 'features.7.running_var', 'features.7.num_batches_tracked', 'features.9.weight', 'features.9.bias', 'features.11.weight', 'features.11.bias', 'fc1.weight', 'fc1.bias', 'fc3.weight', 'fc3.bias', 'fc5.weight', 'fc5.bias']


In [17]:
from collections import OrderedDict

weights_q = OrderedDict()

for n in model.state_dict().keys():
    if n in keys:
        weights_q[n] = model.state_dict()[n].clone()
        weights_q[n].requires_grad=False
    
    elif n.startswith("classifier"):
        weights_q['fc' + n[11:]] = model.state_dict()[n].clone()
        weights_q['fc' + n[11:]].requires_grad=False

state_dict = weights_q

In [18]:
print(state_dict.keys())

odict_keys(['features.0.bias', 'features.0.weight', 'features.1.bias', 'features.1.weight', 'features.1.running_mean', 'features.1.running_var', 'features.1.num_batches_tracked', 'features.3.bias', 'features.3.weight', 'features.6.bias', 'features.6.weight', 'features.7.bias', 'features.7.weight', 'features.7.running_mean', 'features.7.running_var', 'features.7.num_batches_tracked', 'features.9.bias', 'features.9.weight', 'features.11.bias', 'features.11.weight', 'fc1.bias', 'fc1.weight', 'fc3.bias', 'fc3.weight', 'fc5.bias', 'fc5.weight'])


In [19]:
def getDeadN_info(dn_info, dn_info1, unit, state_dict, ds, nameOftheSet):

  for i in range(unit):
      dn_info[str(i)] = 0
      dn_info1[str(i)] = 0

      
  model1 = AlexNet1(10, dn_info, dn_info1)
  model1 = model1.cuda()

  model1.load_state_dict(state_dict)

  correct_count, all_count = 0, 0
  model1.eval()

  for images,labels in ds:
        for image,label in zip(images,labels):

          if torch.cuda.is_available():
              img = image.cuda()
              lab = label.cuda()
              img = img[None,].type('torch.cuda.FloatTensor')

          with torch.no_grad():
              output_ = model1(img) 

          pred_label = output_.argmax()

          if(pred_label.item()==lab.item()):
            correct_count += 1
          all_count += 1

  print("Number Of Images =", all_count)
  print(f"Model {nameOftheSet} Accuracy =", (correct_count/(all_count)))

In [20]:
dn_info_train = {}
dn_info_val = {}
dn_info_test = {}

dn_info_train1 = {}
dn_info_val1 = {}
dn_info_test1 = {}

getDeadN_info(dn_info= dn_info_train, dn_info1= dn_info_train1, unit = 256, state_dict = state_dict, ds= train_iterator, nameOftheSet = "Training")
getDeadN_info(dn_info= dn_info_val, dn_info1= dn_info_val1, unit = 256, state_dict = state_dict, ds= val_iterator, nameOftheSet = "Validation")
getDeadN_info(dn_info= dn_info_test, dn_info1= dn_info_test1, unit = 256, state_dict = state_dict, ds= test_iterator, nameOftheSet = "Test")

Number Of Images = 50000
Model Training Accuracy = 0.84964
Number Of Images = 10000
Model Validation Accuracy = 0.8434
Number Of Images = 10000
Model Test Accuracy = 0.881


In [21]:
max_dn_val = max(dn_info_val.values())
max_dn_test = max(dn_info_test.values())
max_dn_train = max(dn_info_train.values())

max_dn_val1 = max(dn_info_val1.values())
max_dn_test1 = max(dn_info_test1.values())
max_dn_train1 = max(dn_info_train1.values())

max_dn_train, max_dn_val, max_dn_test, max_dn_train1, max_dn_val1, max_dn_test1

(50000, 10000, 10000, 50000, 10000, 10000)

In [22]:
dead_n_idx = [] 

for i, j in dn_info_test.items():
  if j == max_dn_test:
    dead_n_idx.append(i)

dead_n_idx1 = [] 

for i, j in dn_info_test1.items():
  if j == max_dn_test1:
    dead_n_idx1.append(i)

In [23]:
print("Penultimate Layer :",len(dead_n_idx)) # Number of neurons that are dead
print("Output Layer :",len(dead_n_idx1)) # Number of neurons that are dead

Penultimate Layer : 193
Output Layer : 147


In [24]:
print(dead_n_idx) # Indices of neuron that are dead

['0', '1', '2', '3', '4', '6', '7', '9', '10', '11', '12', '13', '14', '15', '17', '18', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '32', '33', '35', '37', '40', '41', '42', '43', '44', '46', '47', '48', '49', '50', '52', '53', '55', '56', '57', '58', '59', '61', '62', '64', '65', '66', '69', '70', '71', '73', '75', '76', '78', '79', '80', '81', '82', '83', '84', '85', '86', '88', '89', '90', '92', '93', '95', '96', '97', '98', '99', '100', '101', '103', '104', '107', '108', '109', '110', '111', '112', '113', '114', '115', '117', '118', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '132', '133', '136', '137', '139', '140', '143', '144', '145', '146', '147', '148', '149', '150', '151', '157', '158', '159', '160', '161', '162', '163', '165', '166', '167', '168', '169', '171', '172', '173', '175', '176', '177', '179', '181', '182', '184', '185', '186', '187', '189', '192', '195', '196', '197', '198', '199', '200', '201', '203', '204', '205', 

In [25]:
print(dead_n_idx1) # Indices of neuron that are dead

['0', '5', '6', '10', '12', '17', '19', '22', '24', '26', '29', '32', '33', '34', '36', '37', '38', '39', '41', '42', '43', '48', '49', '50', '51', '53', '54', '57', '59', '62', '66', '67', '68', '69', '70', '72', '73', '74', '75', '76', '78', '79', '80', '81', '82', '83', '87', '91', '92', '94', '95', '96', '98', '100', '101', '103', '104', '106', '107', '110', '111', '115', '116', '117', '118', '121', '123', '126', '128', '129', '132', '134', '135', '137', '139', '141', '144', '145', '147', '149', '150', '151', '152', '153', '158', '159', '164', '166', '168', '170', '171', '172', '173', '174', '175', '176', '178', '179', '180', '181', '182', '186', '187', '189', '191', '194', '195', '197', '199', '200', '201', '202', '204', '206', '207', '209', '212', '213', '216', '217', '218', '219', '221', '222', '223', '224', '225', '226', '229', '233', '235', '236', '237', '238', '239', '240', '241', '243', '244', '245', '246', '248', '249', '251', '252', '253', '255']


In [26]:
state_dict1 = state_dict

In [27]:
# For each layer, model's ternary weights

layer_distinct_weights = {}

for i in keys:
  if ("mean" in i) | ("var" in i) | ('batches' in i):
    continue
  else:
    imd = torch.unique(state_dict[i])
    print(i+ ' hidden layer dimension', state_dict[i].shape)
    print("Unique values of weight in "+ i+ " the hidden layer : ", imd)
    layer_distinct_weights[i] = imd.cpu().numpy().tolist()
    print()

features.0.weight hidden layer dimension torch.Size([32, 1, 3, 3])
Unique values of weight in features.0.weight the hidden layer :  tensor([-0.9837,  0.0000,  0.9837], device='cuda:0')

features.0.bias hidden layer dimension torch.Size([32])
Unique values of weight in features.0.bias the hidden layer :  tensor([-0.9982,  0.0000,  0.9985], device='cuda:0')

features.1.weight hidden layer dimension torch.Size([32])
Unique values of weight in features.1.weight the hidden layer :  tensor([0.0000, 1.0144], device='cuda:0')

features.1.bias hidden layer dimension torch.Size([32])
Unique values of weight in features.1.bias the hidden layer :  tensor([-1.0126,  0.0000,  0.9783], device='cuda:0')

features.3.weight hidden layer dimension torch.Size([64, 32, 3, 3])
Unique values of weight in features.3.weight the hidden layer :  tensor([-1.0157,  0.0000,  1.0089], device='cuda:0')

features.3.bias hidden layer dimension torch.Size([64])
Unique values of weight in features.3.bias the hidden layer

In [28]:
for i in keys:
  print(i, state_dict[i].shape)

features.0.weight torch.Size([32, 1, 3, 3])
features.0.bias torch.Size([32])
features.1.weight torch.Size([32])
features.1.bias torch.Size([32])
features.1.running_mean torch.Size([32])
features.1.running_var torch.Size([32])
features.1.num_batches_tracked torch.Size([])
features.3.weight torch.Size([64, 32, 3, 3])
features.3.bias torch.Size([64])
features.6.weight torch.Size([128, 64, 3, 3])
features.6.bias torch.Size([128])
features.7.weight torch.Size([128])
features.7.bias torch.Size([128])
features.7.running_mean torch.Size([128])
features.7.running_var torch.Size([128])
features.7.num_batches_tracked torch.Size([])
features.9.weight torch.Size([256, 128, 3, 3])
features.9.bias torch.Size([256])
features.11.weight torch.Size([256, 256, 3, 3])
features.11.bias torch.Size([256])
fc1.weight torch.Size([256, 9216])
fc1.bias torch.Size([256])
fc3.weight torch.Size([256, 256])
fc3.bias torch.Size([256])
fc5.weight torch.Size([10, 256])
fc5.bias torch.Size([10])


In [29]:
total_trans = 0
layers_name = ['fc3.weight', 'fc5.weight']

n_idx = [dead_n_idx, dead_n_idx1]

for ix,l in enumerate(layers_name):
  print(l)
  z = state_dict1[l]

  if len(layer_distinct_weights[l]) > 2 :
    w_neg, w_0, w_pos =  layer_distinct_weights[l]
    print(w_neg, w_0, w_pos)
  else:
    w_neg, w_pos = layer_distinct_weights[l]
    print(w_neg, w_pos)

  for idx in n_idx[ix]:
      if 'bias' in l:
        imd = z[eval(idx)]
      else:
        imd = z[:,eval(idx)]

      trans = torch.where(imd == w_neg)[0].nelement() + torch.where(imd == w_pos)[0].nelement()
      total_trans += trans *2
    
print("Total dead transitions for the layer :", total_trans)

fc3.weight
-0.9861384630203247 0.0 0.9487199783325195
fc5.weight
-1.048563003540039 0.0 0.9514178037643433
Total dead transitions for the layer : 0


In [None]:
# Fault coverage that we have obtained from main file : 8410/8689

In [30]:
# Adding 0 transition to the numerator will make the net fault coverage
(8410 + 0)/ 8689

0.9678904361836805