In [None]:
cd MNIST/ANN-2

In [4]:
import numpy as np
import sys, os, random
import matplotlib.pyplot as plt
import pickle, gzip
from tqdm import tqdm,tqdm_notebook
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

In [5]:
batch_size = 128
seed_num = 81

# For reproducibility when you run the file with .py
torch.cuda.is_available()
torch.manual_seed(seed_num)
torch.cuda.manual_seed(seed_num)
np.random.seed(seed_num)
random.seed(seed_num)
torch.backends.cudnn.benchmark = True

torch.backends.cudnn.deterministic =True

In [6]:
# Data Augmentation 
train_transform = transforms.Compose([transforms.RandomRotation(30), transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize([0.1307,],[0.3081,])])
test_transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize([0.1307,],[0.3081,])])

# Splitting the training and test datasets
train_data = datasets.MNIST(os.getcwd(), train=True,
                              download=True, transform=train_transform)
test_data = datasets.MNIST(os.getcwd(), train=False,
                             download=True, transform=test_transform)

In [7]:
# Split the training set indices into training and validation set indices using 80:20 ratio
np.random.seed(seed_num)
len_trainset = len(train_data)
index_list = list(range(len_trainset))
np.random.shuffle(index_list)
split_index = 50000
train_indices, valid_indices =  index_list[:split_index], index_list[split_index:]

# Creating Samplers for training and validation set using the indices
np.random.seed(seed_num)
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(valid_indices)

torch.manual_seed(seed_num)

train_iterator = DataLoader(train_data, batch_size=batch_size, sampler=train_sampler)
val_iterator = DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler)
test_iterator = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [8]:
# 2-Layers ANN model

class MLP(nn.Module):
    def __init__(self, units):
        super(MLP, self).__init__()
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(784,units, bias=True)
        self.linear2 = nn.Linear(units,10,bias=True)
    
    def forward(self,X):
        X = self.flatten(X)
        X = F.relu(self.linear1(X))
        X = self.linear2(X)
        return X

In [9]:
torch.manual_seed(seed_num)
unit=128

# Summary
model = MLP(unit).cuda()
print("Model:\n",model)

Model:
 MLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear1): Linear(in_features=784, out_features=128, bias=True)
  (linear2): Linear(in_features=128, out_features=10, bias=True)
)


In [10]:
# Layer names
layer_name = [n for n, p in model.named_parameters()]
layer_name

['linear1.weight', 'linear1.bias', 'linear2.weight', 'linear2.bias']

In [11]:
import joblib

# Loading the weights of ternary model 
k = joblib.load('Model_1_ANN.pkl')

print(model.load_state_dict(k))
print("Loading weights done !")

# Total number of ternary weights (+w, -w)
totalParams = 0
for i in layer_name:
  print(i,(model.state_dict()[i] !=0).sum())
  totalParams +=  (model.state_dict()[i] !=0).sum()
    
print("Total Parameters:",totalParams, '\n')

<All keys matched successfully>
Loading weights done !
linear1.weight tensor(1343, device='cuda:0')
linear1.bias tensor(74, device='cuda:0')
linear2.weight tensor(256, device='cuda:0')
linear2.bias tensor(10, device='cuda:0')
Total Parameters: tensor(1683, device='cuda:0') 



In [12]:
model = model.cuda() # move the model to gpu

In [13]:
# Model's performance on test set

correct_count, all_count = 0,0
model.eval()
for images,labels in test_iterator:
  for image,label in zip(images,labels):
    if torch.cuda.is_available():
        img = image.cuda()
        lab = label.cuda()
    
    with torch.no_grad():
        output_ = model(img)

    pred_label = output_.argmax()

    if(pred_label.item()==lab.item()):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Test Accuracy =", (correct_count/all_count))

Number Of Images Tested = 10000

Model Test Accuracy = 0.7065


In [14]:
# For each layer, model's ternary weights
state_dict = model.state_dict()

layer_distinct_weights = {}

for i in layer_name:
  imd = torch.unique(model.state_dict()[i])
  print(i+ ' hidden layer dimension', model.state_dict()[i].shape)
  print("Unique values of weight in "+ i+ " th hidden layer : ", imd)
  layer_distinct_weights[i] = imd.cpu().numpy().tolist()
  print()

linear1.weight hidden layer dimension torch.Size([128, 784])
Unique values of weight in linear1.weight th hidden layer :  tensor([-1.0265,  0.0000,  0.9719], device='cuda:0')

linear1.bias hidden layer dimension torch.Size([128])
Unique values of weight in linear1.bias th hidden layer :  tensor([-0.9846,  0.0000,  0.9911], device='cuda:0')

linear2.weight hidden layer dimension torch.Size([10, 128])
Unique values of weight in linear2.weight th hidden layer :  tensor([-1.0300,  0.0000,  0.9739], device='cuda:0')

linear2.bias hidden layer dimension torch.Size([10])
Unique values of weight in linear2.bias th hidden layer :  tensor([-0.9850,  1.0149], device='cuda:0')



In [15]:
keys = list(state_dict.keys())
print(keys)

['linear1.weight', 'linear1.bias', 'linear2.weight', 'linear2.bias']


In [16]:
# Duplicate architecture of the model

class MLP1(nn.Module):
    def __init__(self, units, dn_info):
        super(MLP1, self).__init__()
        self.dn_info = dn_info       # Dead Neuron info
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(784,units, bias=True)
        self.linear2 = nn.Linear(units,10,bias=True)
    
    def forward(self,X):

        X = self.flatten(X)
        X = F.relu(self.linear1(X))

        ####################################
        # Storing dead neurons indices
        idx1 = torch.where(X.cpu() == 0.)[1]
        
        for j in idx1:
            self.dn_info[str(j.item())] += 1
            
        #####################################

        X = self.linear2(X)
        return X

In [17]:
def getDeadN_info(dn_info, unit, state_dict, ds, nameOftheSet):

  for i in range(unit):
      dn_info[str(i)] = 0
      

  model1 = MLP1(unit, dn_info)
  model1 = model1.cuda()

  model1.load_state_dict(state_dict)

  correct_count, all_count = 0, 0
  model1.eval()

  for ds_e in ds:
    for images,labels in ds_e:
          for image,label in zip(images,labels):

            if torch.cuda.is_available():
                img = image.cuda()
                lab = label.cuda()
                img = img[None,].type('torch.cuda.FloatTensor')

            with torch.no_grad():
                output_ = model1(img) 

            pred_label = output_.argmax()

            if(pred_label.item()==lab.item()):
              correct_count += 1
            all_count += 1

  print("Number Of Images =", all_count)
  print(f"Model {nameOftheSet} Accuracy =", (correct_count/(all_count)))

In [18]:
dn_info_train = {}
dn_info_test = {}

getDeadN_info(dn_info= dn_info_train, unit = unit, state_dict = state_dict, ds= [train_iterator, val_iterator, test_iterator], nameOftheSet = "Training")
getDeadN_info(dn_info= dn_info_test, unit = unit, state_dict = state_dict, ds= [test_iterator], nameOftheSet = "Test")

Number Of Images = 70000
Model Training Accuracy = 0.6572571428571429
Number Of Images = 10000
Model Test Accuracy = 0.7065


In [19]:
max_dn_test = max(dn_info_test.values())
max_dn_train = max(dn_info_train.values())
max_dn_train, max_dn_test

(70000, 10000)

In [20]:
dead_n_idx = [] 

for i, j in dn_info_train.items():
  if j == max_dn_train:
    dead_n_idx.append(i)

In [21]:
print("FC Layer :",len(dead_n_idx)) # Number of neurons that are dead

FC Layer : 16


In [22]:
print(dead_n_idx) # Indices of neuron that are dead

['28', '34', '46', '49', '52', '53', '72', '96', '98', '101', '112', '118', '123', '125', '126', '127']


In [23]:
state_dict1 = state_dict

In [24]:
layer_distinct_weights

{'linear1.bias': [-0.9845763444900513, 0.0, 0.9911096692085266],
 'linear1.weight': [-1.0265190601348877, 0.0, 0.97193843126297],
 'linear2.bias': [-0.9849514365196228, 1.0149472951889038],
 'linear2.weight': [-1.0300489664077759, 0.0, 0.9739477634429932]}

In [25]:
total_trans = 0
layers_name = ['linear2.weight']

for l in layers_name:
  print(l)
  z = state_dict1[l]

  if len(layer_distinct_weights[l]) > 2 :
    w_neg, w_0, w_pos =  layer_distinct_weights[l]
  else:
    w_neg, w_pos = layer_distinct_weights[l]

  for idx in dead_n_idx:
      imd = z[:,eval(idx)]
      trans = torch.where(imd == w_neg)[0].nelement() + torch.where(imd == w_pos)[0].nelement()
      total_trans += trans * 2
    
print("Total dead transitions for the output layer :", total_trans)
    
    

linear2.weight
Total dead transitions for the output layer : 60


In [None]:
# Fault coverage that we have obtained from main file : 3143 / 3366

In [None]:
# Adding more 60 transitions to the numerator will make the net fault coverage
(3143 + 60)/3366

0.9515745692216281