In [None]:
import pickle as cPickle, gzip, numpy
import numpy as np
import torch
import torch.utils.data as td
import torch.nn.functional as F
import time, random
import matplotlib.pyplot as plt
from typing import Tuple
from torch import nn
from torch.utils.data.dataset import TensorDataset
from torchvision import datasets, transforms

# COLAB _SPECIFIC CODE

# from google.colab import drive
# drive.mount('/content/drive')
# try:
#   from fxpmath import Fxp
# except:
#   !pip install fxpmath
from fxpmath import Fxp

# Setting Random Seed
manualSeed = 5000
device = 'cuda' if torch.cuda.is_available() else 'cpu'
def set_seed():
  if device == 'cuda':
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.enabled = False 
  torch.manual_seed(manualSeed)
  np.random.seed(manualSeed)
  random.seed(manualSeed)

sparsity = 0

## Dataset loading

In [3]:
# Load the dataset
def data_processing(cnn):
  # f = gzip.open('/content/drive/MyDrive/Sem 2 (Spring 2022)/EEE 598/Homeworks/HW2/mnist.pkl.gz', 'rb')
  f = gzip.open('mnist.pkl.gz', 'rb')
  train_set, valid_set, test_set = cPickle.load(f, encoding='latin1') 
  f.close()
  # Converting dataset from numpy to Tensor
  train_t = torch.from_numpy(train_set[0])
  valid_t = torch.from_numpy(valid_set[0])
  test_t  = torch.from_numpy(test_set[0])
  # Converting labels to Tensor
  train_label= torch.from_numpy(train_set[1])
  valid_label= torch.from_numpy(valid_set[1])
  test_label = torch.from_numpy(test_set[1])
  if cnn:
    # resize and normalize
    train_t = train_t.reshape((train_t.shape[0], 1, 28, 28))
    valid_t = valid_t.reshape((valid_t.shape[0], 1, 28, 28))
    test_t = test_t.reshape((test_t.shape[0], 1, 28, 28))
    input_shape = (1, 28, 28)
  # Wrapping it
  trainset = TensorDataset(train_t, train_label)
  validset = TensorDataset(valid_t, valid_label)
  testset = TensorDataset(test_t, test_label)

  return trainset, validset, testset

def loss_curve(train_loss, val_loss, y_range, name):
  plt.figure(figsize=(10,5))
  plt.xlabel("Epochs")
  plt.ylabel("Loss")
  plt.title(name)
  plt.plot(train_loss, label="Training Loss")
  plt.plot(val_loss, label="Validation Loss")
  plt.legend()
  plt.savefig(name+".png", bbox_inches='tight', pad_inches=0.1)
  plt.show()


## Accuracy Calculator

In [4]:
# Accuracy Calculator
def acc_calc(pred, actual):
  best = pred.argmax(1)
  comp = best.eq(actual.view_as(best)).float().sum()
  return comp


## Evaluation

In [5]:
# Function for validation and testing
def eval_loop(data_loader, model, loss_fn):
  size = len(data_loader.dataset)
  loss, acc, loss_val = 0, 0, 0
  model.eval()
  torch.no_grad() 
  for batch, (X, y) in enumerate(data_loader):
    if torch.cuda.is_available():
      X = X.cuda()
      y = y.cuda()
    
    target = model(X)
    loss = loss_fn(target, y).item()
    # Accuracy
    acc = acc+acc_calc(nn.Softmax(dim=1)(target), y)
    loss_val = loss_val+loss

  return loss_val/size, (acc/size)


In [6]:
# Primary function for execution of the neural networks
def nn_run(nn_model, trainset, validset, testset, batch, learn_rate, epoch_max, mom, model, decay):
  
  # Loading datasets
  train_loader = td.DataLoader(trainset, batch_size=batch, shuffle=True, worker_init_fn=np.random.seed(manualSeed),num_workers=0,pin_memory=True)
  valid_loader = td.DataLoader(validset, batch_size=batch, shuffle=True, worker_init_fn=np.random.seed(manualSeed),num_workers=0,pin_memory=True)
  test_loader  = td.DataLoader(testset, batch_size=batch, shuffle=True, worker_init_fn=np.random.seed(manualSeed),num_workers=0,pin_memory=True)

  # Loading neural network model to device
  loss_fn = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(nn_model.parameters(), lr=learn_rate, momentum=mom, weight_decay=decay)
  # optimizer = torch.optim.SGD(nn_model.parameters(), lr=learn_rate, weight_decay=decay)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=60, gamma=0.1)

  print(f'\n=============={model}==============')
  print(f'Trained model loaded.')
  timer = time.perf_counter()
  valid_loss = 1
  valid_loss_list = []
  train_loss_list = []

  #=========TESTING=========
  print(f'\nTesting using {device} device.')
  timer = time.perf_counter()
  test_loss, test_acc = eval_loop(test_loader, nn_model, loss_fn)
  timer = round(time.perf_counter()-timer, 2)

  print('--------Test complete.--------')
  print(f'Model       : {model}')
  print(f'Device      : {device}')
  print(f'Loss        : {test_loss:>7f}')
  print(f'Accuracy    : {np.round(test_acc.cpu().detach().numpy()*100, 3)}%') if device == 'cuda' else print(f'Accuracy    : {np.round(test_acc.detach().numpy()*100, 3)}%')
  print(f'Runtime     : {timer}s')
  print(f'Dataset size: {len(test_loader.dataset)}')
  print('------------------------------\n')

  # Returning test accuracy to main function
  return np.round(test_acc.cpu().detach().numpy()*100, 3) if device == 'cuda' else np.round(test_acc.detach().numpy()*100, 3)


## Neural Network Definition

In [7]:
# Defining the convolutional neural network (CNN)
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    first_layer_features = 16
    second_layer_features = 32
    wbit = 4
    abit = 4
    self.cnn_layers = nn.Sequential(
      # Defining a 2D convolution layer
      QConv2d(1, first_layer_features, kernel_size=3, stride=1, padding=1, wbit=wbit, abit=abit, scaling_next=1, layer=1, sparsity_fraction=0.333),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2),
      # Defining another 2D convolution layer
      QConv2d(first_layer_features, second_layer_features, kernel_size=3, stride=1, padding=1, wbit=wbit, abit=abit, scaling_next=1, layer=2, sparsity_fraction=0.5),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2),
    )

    self.linear_layers = nn.Sequential(
      QLinear(second_layer_features * 7 * 7, 10)
    )

    # Defining the forward pass    
  def forward(self, x):
    x = self.cnn_layers(x)
    x = x.view(x.size(0), -1)
    x = self.linear_layers(x)
    return x

## Quantized Convolution

In [8]:
# Step 0. Define your quantization backward node (function)
import logging    # first of all import the module
logging.basicConfig(filename='std.log', filemode='w')

class QSTE(torch.autograd.Function):
  @staticmethod
  def forward(ctx, x, scale):
    # quantization - dequantization
    x = x / scale
    x = torch.round(x)

    xdeq = x * scale
    return xdeq

  @staticmethod
  def backward(ctx, grad_output):
    return grad_output, None


# Step 1. Define your convolutional layer
class QConv2d(nn.Conv2d):
  def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False, padding_mode='zeros', device=None, dtype=None, wbit=4, abit=4, scaling_next=1, layer=1, sparsity_fraction=0):
      super().__init__(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode, device=device, dtype=dtype)

      self.register_buffer('mask', torch.ones(self.weight.size()))
      self.register_buffer('wint', torch.ones(self.weight.size()))
      self.register_buffer('scale', torch.tensor(1.))      
      self.wbit = wbit
      self.abit = abit
      self.scaling_next = scaling_next
      self.layer = layer
      self.sparsity_fraction = sparsity_fraction
      global sparsity

  def wquant(self):
    """
    Perform the weight quantization
    """
    # step 1: quantization boundary: 
    self.alpha_w = 2*self.weight.abs().mean()
    wc = self.weight.clamp(-self.alpha_w, self.alpha_w)
    scaling_factor = (self.alpha_w) / (2**(self.wbit-1)-1)
    # step 2: quantization
    wc = wc / scaling_factor
    wq = torch.round(wc)
    #wq = QSTE.apply(wc, scaling_factor)
    return wq, scaling_factor
  
  def xquant(self, x):
    """
    Perform the wight quantization
    """
    # step 1: quantization boundary: 
    self.alpha_w_upper = 6
    self.alpha_w_lower = 0
    xc = x.clamp(self.alpha_w_lower, self.alpha_w_upper)
    scaling_factor = (self.alpha_w_upper - self.alpha_w_lower) / (2**self.abit-1)
    # step 2: quantization
    xc = xc / scaling_factor
    xq = torch.round(xc)
    #xq = QSTE.apply(xc, scaling_factor)
    return xq, scaling_factor

  def forward(self, x):
    # step 1: Quantize your weights
    wq, Sw1 = self.wquant()
    # map the integer weights to the buffer
    self.wint.data = wq.mul(self.mask)
    # step 2: Quantize your input x
    if wq.size(1) == 1:
      xq, Sx1 = self.xquant(x)
    else:
      xq = x
      Sx1 = 6 / (2**self.abit-1)
    M = Sw1 * Sx1 / Sx1
    # use Fxp to do conversion of M 
    M_scalar = Fxp(M.detach().cpu().numpy(), signed=True, n_word=16, n_frac=12)
    Mint = M_scalar.base_repr(10)
    Mint = int(Mint)
    self.scale = torch.tensor(Mint)
    # self.wint = wq.mul(self.mask)
    Y = F.conv2d(xq, wq.mul(self.mask), self.bias, self.stride, self.padding, self.dilation, self.groups)
    Y = Y.mul(Mint).mul(2**(-12)).round()
    return Y


## Quantized Linear

In [9]:
# Step 1. Define your convolutional layer
class QLinear(nn.Linear):
  def __init__(self, in_channels, out_channels, bias=False, wbit=4, abit=4):
      super().__init__(in_channels, out_channels, bias=bias)

      #self.register_buffer('mask', torch.ones(self.weight.size()))
      self.register_buffer('wint', torch.ones(self.weight.size()))
      self.register_buffer('scale', torch.tensor(1.))      
      self.wbit = wbit
      self.abit = abit
      #global sparsity

  
  def wquant(self):
    """
    Perform the weight quantization
    """
    # step 1: quantization boundary: 
    self.alpha_w = 2*self.weight.abs().mean()
    wc = self.weight.clamp(-self.alpha_w, self.alpha_w)
    scaling_factor = (self.alpha_w) / (2**(self.wbit-1)-1)
    # step 2: quantization
    wc = wc / scaling_factor
    wq = torch.round(wc)
    #wq = QSTE.apply(wc, scaling_factor)
    return wq, scaling_factor
  
  def xquant(self, x):
    """
    Perform the wight quantization
    """
    # step 1: quantization boundary: 
    self.alpha_w_upper = 6
    self.alpha_w_lower = 0
    xc = x.clamp(self.alpha_w_lower, self.alpha_w_upper)
    scaling_factor = (self.alpha_w_upper - self.alpha_w_lower) / (2**self.abit-1)
    # step 2: quantization
    xc = xc / scaling_factor
    xq = torch.round(xc)
    #xq = QSTE.apply(xc, scaling_factor)
    return xq, scaling_factor

  def forward(self, x):
    # step 1: Quantize your weights
    wq, Sw1 = self.wquant()
    # step 2: Quantize your input x
    xq = x
    Sx1 = 6 / (2**self.abit-1)

    Y = F.linear(xq, wq, self.bias)
    M = Sw1 * Sx1
    M_scalar = Fxp(M.detach().cpu().numpy(), signed=True, n_word=16, n_frac=12)
    Mint = M_scalar.base_repr(10)
    Mint = int(Mint)
    self.scale = torch.tensor(Mint)


    Y = Y.mul(Mint).mul(2**(-12)).round()
    return Y


## Main function

In [None]:
# Main Function
import os
if __name__ == '__main__':
  batch = 128
  epoch = 100
  learn_rate = 0.005
  momentum = 0.9
  # Obtaining dataset
  trainset, validset, testset = data_processing(cnn=True)

  #========= Convolutional Neural Network =========
  set_seed()
  ckpt = torch.load('trained_network.pth.tar')
  print(os.path.abspath('trained_network.pth.tar'))
  model = CNN()

  curr_state_dict = model.state_dict()
  curr_state_dict.update(ckpt)
  model.load_state_dict(curr_state_dict)
  if torch.cuda.is_available():
    model = model.cuda()
  nn_run(model, trainset, validset, testset, batch, learn_rate, epoch, momentum, "CNN", decay=1e-4)
  

In [11]:
state_dict = model.state_dict()
print(state_dict.keys())
for k, v in state_dict.items():
  if 'wint' in k:
    if device == 'cuda':
      wint = v.cpu().numpy()
    else:
      wint = v.numpy()
    np.save(f"{k}_wint.npy", wint)
  elif 'scale' in k:
    scale_factor = v.numpy()
    np.save(f"{k}_scale.npy", scale_factor)


odict_keys(['cnn_layers.0.weight', 'cnn_layers.0.mask', 'cnn_layers.0.wint', 'cnn_layers.0.scale', 'cnn_layers.3.weight', 'cnn_layers.3.mask', 'cnn_layers.3.wint', 'cnn_layers.3.scale', 'linear_layers.0.weight', 'linear_layers.0.wint', 'linear_layers.0.scale'])


In [12]:
wnp = np.load("cnn_layers.0.wint_wint.npy")
f = open("cnn_layers.0.wint.txt", 'w')
for x in wnp:
  if(np.any(x)):
    f.write(np.array2string(x))
    f.write("\n")
f.close()

wnp = np.load("cnn_layers.3.wint_wint.npy")
f = open("cnn_layers.3.wint.txt", 'w')
for x in wnp:
  if(np.any(x)):
    f.write(np.array2string(x))
    f.write("\n")
f.close()

wnp = np.load("linear_layers.0.wint_wint.npy")
f = open("linear_layers.0.wint.txt", 'w')
for x in wnp:
  if(np.any(x)):
    f.write(np.array2string(x))
    f.write("\n")
f.close()

f = open("scaling_factors.txt", 'w')
wnp = np.load("cnn_layers.0.scale_scale.npy", allow_pickle=True)
f.write("\nScaling factor conv layer 0: ")
f.write(np.array2string(wnp))
wnp = np.load("cnn_layers.3.scale_scale.npy", allow_pickle=True)
f.write("\nScaling factor conv layer 3: ")
f.write(np.array2string(wnp))
wnp = np.load("linear_layers.0.scale_scale.npy", allow_pickle=True)
f.write("\nScaling factor linear layer 0: ")
f.write(np.array2string(wnp))
f.close()



In [13]:
def xquant_temp(x):
  """
  Perform the weight quantization
  """
  # step 1: quantization boundary: 
  alpha_w_upper = 6
  alpha_w_lower = 0
  xc = x.clamp(alpha_w_lower, alpha_w_upper)
  scaling_factor = (alpha_w_upper - alpha_w_lower) / (2**4-1)
  # step 2: quantization
  xc = xc / scaling_factor
  xq = torch.round(xc)
  #xq = QSTE.apply(xc, scaling_factor)
  return xq

from torch.functional import Tensor
test_loader  = td.DataLoader(testset, batch_size=1, shuffle=True, worker_init_fn=np.random.seed(manualSeed),num_workers=0,pin_memory=True)
len(test_loader.dataset)
f = open("input_fmap.txt", 'w')
for batch, (X, y) in enumerate(test_loader):
  X = X .resize(28,28)
  X = xquant_temp(X)
  X = X.numpy()
  for pixel in X:
    f.write(np.array2string(pixel))
  break
f.close()



# DEBUG CODES BELOW

In [14]:
# TESTING
for n, m in model.named_modules():
  if isinstance(m, QConv2d):
    wint = m.wint
    s = wint[wint.eq(0.)].numel()/wint.numel()
    print("Sparsity of the layer {}= {:.3f}".format(n, s))

Sparsity of the layer cnn_layers.0= 0.104
Sparsity of the layer cnn_layers.3= 0.516


In [15]:
state_dict = model.state_dict()
print(state_dict.keys())
for k, v in state_dict.items():
  if 'wint' in k:
    wint = v
    s = wint[wint.eq(0.)].numel()/wint.numel()
    print("Sparsity of the layer {}= {:.3f}".format(k, s))

odict_keys(['cnn_layers.0.weight', 'cnn_layers.0.mask', 'cnn_layers.0.wint', 'cnn_layers.0.scale', 'cnn_layers.3.weight', 'cnn_layers.3.mask', 'cnn_layers.3.wint', 'cnn_layers.3.scale', 'linear_layers.0.weight', 'linear_layers.0.wint', 'linear_layers.0.scale'])
Sparsity of the layer cnn_layers.0.wint= 0.104
Sparsity of the layer cnn_layers.3.wint= 0.516
Sparsity of the layer linear_layers.0.wint= 0.000


In [16]:
wnp = np.load("cnn_layers.3.wint_wint.npy")
print(wnp.shape)

(32, 16, 3, 3)


energy calc = power * time 
is time just cycles or period