<a href="https://colab.research.google.com/github/chekfung/cross_layer_final_project/blob/main/training_quantization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Source Code

## Imports Needed Throughout the Project

In [106]:
# All Imports 
import sys
import numpy as np
import copy
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
from google.colab import files

# argument parser
import easydict

## Get and Download Datasets

In [107]:
# MNIST Dataset (Images and Labels)
train_set = dsets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)

test_set = dsets.FashionMNIST(
    root = './data/FashionMNIST',
    train = False,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)

## More Helper Code for Training and Testing Accuracy

In [108]:
import copy

def fuse_conv_bn(model,out_model):

  conv_layer = None
  count = 0

  # 1. for loop to collect all Conv layers
  # 2. for loop to collect all BatchNorm layers
  for layer in model.modules():
    
    if isinstance(layer, nn.BatchNorm2d):

      conv_size = conv_layer.weight.size()


      with torch.no_grad():
        for i in range(conv_size[0]):

          
          # get the conv2d weights
          weights = conv_layer.weight[i]


          denominator = torch.sqrt(layer.eps+layer.running_var)
          gamma = layer.weight[i]
          beta = layer.bias[i]
          

          for j in range(conv_size[1]):
            for k in range(conv_size[2]):
              for l in range(conv_size[3]):
                # update out_model layer[count]

                if count == 0:
                  out_model.conv1.weight[i][j][k][l] = gamma * conv_layer.weight[i][j][k][l]  / denominator[i]  
                else:
                  out_model.conv2.weight[i][j][k][l] = gamma * conv_layer.weight[i][j][k][l]  / denominator[i]  

          # In i loop for bias since only 1D
          if count == 0:
            out_model.conv1.bias[i] = (gamma * (conv_layer.bias[i] - layer.running_mean[i])  / denominator[i]) + beta
          else:
            out_model.conv2.bias[i] = (gamma * (conv_layer.bias[i] - layer.running_mean[i])  / denominator[i]) + beta

        count += 1

    conv_layer = layer # conv2d


In [109]:
def set_random_seeds(random_seed=0):

    torch.manual_seed(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)

## FP32 Model 

In [110]:
class MyConvNet_FP32(nn.Module):
    def __init__(self, args):
        super(MyConvNet_FP32, self).__init__()
        # Layer 1
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.bn1   = nn.BatchNorm2d(16)
        self.act1  = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2)

        # Layer 2
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.bn2   = nn.BatchNorm2d(32)
        self.act2  = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        # Layer 3
        self.lin2  = nn.Linear(7*7*32, 10)

    def forward(self, x):
        # Layer 1
        c1 = self.conv1(x)
        b1  = self.bn1(c1)
        a1  = self.act1(b1)
        p1  = self.pool1(a1)

        # Layer 2
        c2  = self.conv2(p1)
        b2  = self.bn2(c2)
        a2  = self.act2(b2)
        p2  = self.pool2(a2)

        # Flatten and Layer 3
        flt = p2.view(p2.size(0), -1)
        out = self.lin2(flt)
        return out
  
# model = MyConvNet(args)
# model = model.cuda()

In [111]:
def copy_models(model,out_model):
  out_model.conv1 = copy.deepcopy(model.conv1)
  out_model.act1 = copy.deepcopy(model.act1)
  out_model.pool1 = copy.deepcopy(model.pool1)
  out_model.conv2 = copy.deepcopy(model.conv2)
  out_model.act2 = copy.deepcopy(model.act2)
  out_model.pool2 = copy.deepcopy(model.pool2)
  out_model.lin2 = copy.deepcopy(model.lin2)

## Quantization Helper Code

In [112]:
def simple_quantize_val(val, scale_factor, min_val, max_val):
  value = torch.round(val / scale_factor)

  if (value < min_val):
    value = min_val

  if (value > max_val):
    value = max_val

  return (value * scale_factor)

def fixed_point_quantize_val(val, num_bits, fractional_bits):
  integer_bits = num_bits - fractional_bits - 1 # Subtract one for sign bit
  smallest_step_size = 1 / np.power(2, fractional_bits)
  largest_number = (np.power(2, integer_bits) - 1) + ((np.power(2, fractional_bits)-1) * smallest_step_size)
  smallest_number = -1 * np.power(2, integer_bits)

  value = torch.round(val / smallest_step_size) * smallest_step_size

  if (value < smallest_number):
    value = smallest_number

  if (value > largest_number):
    value = largest_number

  return value

# Perhaps slightly optimized version?
def fixed_point_quantize_faster(val, smallest_step_size, largest_number, smallest_number):

  # Perform Pseudo Quantization
  value = torch.round(val / smallest_step_size) * smallest_step_size

  # Clamp Values
  if (value < smallest_number):
    value = smallest_number

  if (value > largest_number):
    value = largest_number

  return value

# For flattened and output tensors
def quantize_tensor_1d(tens, step_size, largest_num, smallest_num):
  shape = tens.shape
  new_tensor = torch.zeros(shape).cuda()

  # Go through entire tensor and quantize in place
  for i in range(shape[0]):
      new_tensor[i] = fixed_point_quantize_faster(tens[i], step_size, largest_num, smallest_num)
  return new_tensor

# For flattened and output tensors
def quantize_tensor_2d(tens, step_size, largest_num, smallest_num):
  shape = tens.shape
  new_tensor = torch.zeros(shape).cuda()

  # Go through entire tensor and quantize in place
  for i in range(shape[0]):
    for j in range(shape[1]):
      new_tensor[i][j] = fixed_point_quantize_faster(tens[i][j], step_size, largest_num, smallest_num)
  return new_tensor

# Literally for everything else
def quantize_tensor_4d(tens, step_size, largest_num, smallest_num):
  shape = tens.shape
  new_tensor = torch.zeros(shape).cuda()


  for i in range(shape[0]):
    for j in range(shape[1]):
      for k in range(shape[2]):
        for l in range(shape[3]):
          new_tensor[i][j][k][l] = fixed_point_quantize_faster(tens[i][j][k][l], step_size, largest_num, smallest_num)
  return new_tensor

In [113]:
import sys
import numpy as np
import copy
import matplotlib.pyplot as plt

def simple_quantize_val(val, scale_factor, min_val, max_val):
  value = torch.round(val / scale_factor)

  if (value < min_val):
    value = min_val

  if (value > max_val):
    value = max_val

  return (value * scale_factor)

def fixed_point_quantize_val(val, num_bits, fractional_bits):
  integer_bits = num_bits - fractional_bits - 1 # Subtract one for sign bit
  smallest_step_size = 1 / np.power(2, fractional_bits)
  largest_number = (np.power(2, integer_bits) - 1) + ((np.power(2, fractional_bits)-1) * smallest_step_size)
  smallest_number = -1 * np.power(2, integer_bits)

  value = torch.round(val / smallest_step_size) * smallest_step_size

  if (value < smallest_number):
    value = smallest_number

  if (value > largest_number):
    value = largest_number

  return value


# Gets global min and max
def get_min_max_weight_val(model):
  cnt = 0
  global_max = -np.inf
  global_min = np.inf

  # Loop through layers and get global min and max of weights
  for layer in model.modules():
    if not isinstance(layer, (nn.ReLU, nn.MaxPool2d))and cnt != 0:
      local_max = torch.max(layer.weight).data
      local_min = torch.min(layer.weight).data

      if local_max > global_max:
        global_max = local_max
      
      if local_min < global_min:
        global_min = local_min

    cnt+=1

  return global_max, global_min 

# Right now, only for integer quantization.
def quantize_model(model,num_bits, fixed_point_bool, num_fractional_bits):

  # max_val, min_val = get_min_max_weight_val(load_model)
  max_val, min_val = get_min_max_weight_val(model)

  # Assume at the moment, signed ints at the moment.
  quantize_range = np.power(2, num_bits) - 1
  quantize_min_val = -1 * np.power(2, num_bits - 1)
  quantize_max_val = np.power(2, num_bits-1)-1

  weight_range = max_val - min_val
  scale_factor = weight_range / quantize_range
  zero_point = torch.round(torch.abs(min_val) / scale_factor)

  count = 0
  for layer in model.modules():
      if not isinstance(layer, (nn.ReLU, nn.MaxPool2d)) and count != 0:

        layer_shape = layer.weight.shape

        with torch.no_grad():
          if isinstance(layer, nn.Conv2d):
            for i in range(layer_shape[0]):
              for j in range(layer_shape[1]):
                for k in range(layer_shape[2]):
                  for l in range(layer_shape[3]):
                    if not fixed_point_bool:
                      layer.weight[i][j][k][l] = simple_quantize_val(layer.weight[i][j][k][l], scale_factor, quantize_min_val, quantize_max_val)
                    else:
                      layer.weight[i][j][k][l] = fixed_point_quantize_val(layer.weight[i][j][k][l], num_bits, num_fractional_bits)

          if isinstance(layer, nn.BatchNorm2d):
            for i in range(layer_shape[0]):
              if not fixed_point_bool:
                layer.weight[i] = simple_quantize_val(layer.weight[i], scale_factor, quantize_min_val, quantize_max_val)
              else:
                layer.weight[i] = fixed_point_quantize_val(layer.weight[i], num_bits, num_fractional_bits)

          if isinstance(layer, nn.Linear):
            for i in range(layer_shape[0]):
              for j in range(layer_shape[1]):
                if not fixed_point_bool:
                  layer.weight[i][j] = simple_quantize_val(layer.weight[i][j], scale_factor, quantize_min_val, quantize_max_val)
                else:
                  layer.weight[i][j] = fixed_point_quantize_val(layer.weight[i][j], num_bits, num_fractional_bits)

      count += 1
  
  return 0

In [114]:
def train_model(model, criterion, optimizer, train_loader, quantize=False):
    print("---Training started")
    # Training the Model
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            # Load Images into GPU
            images = images.cuda()
            labels = Variable(labels).cuda()

            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            L1norm = model.parameters()
            arr = []

            # Calculate L1 Norm (if included in hyperparameters)
            if args.L1norm == True:
                for name,param in model.named_parameters():
                    if 'weight' in name.split('.'):
                        arr.append(param)

                L1loss = 0
                for Losstmp in arr:
                    L1loss = L1loss+Losstmp.abs().mean()

                if len(arr) > 0:
                    loss = loss+L1loss/len(arr)

            # if quantize:
            #     # quantize loss
            #     # TODO: Explore how to quantize loss
            #     loss = fixed_point_quantize_faster(loss,model.smallest_step_size,model.largest_num_representable, model.smallest_num_representable)

            # Optimizer Step, Propagate Loss backwards
            loss.backward()

            if quantize:
                # quantize gradients

                for name,param in model.named_parameters():
                    gradient = param.grad
                    
                    # Print BEFORE Gradients
                    # print(gradient)


                    if len(gradient.shape) == 4:
                      param.grad = quantize_tensor_4d(gradient, model.smallest_step_size, model.largest_num_representable, model.smallest_num_representable)
                    elif len(gradient.shape) == 2:
                      param.grad = quantize_tensor_2d(gradient, model.smallest_step_size, model.largest_num_representable, model.smallest_num_representable)
                    elif len(gradient.shape) == 1:
                      param.grad = quantize_tensor_1d(gradient, model.smallest_step_size, model.largest_num_representable, model.smallest_num_representable)
                    else:
                      print("Shape doesn't match 4, 2, or 1: ",gradient.shape)
                    
                    # Print AFTER Gradients
                    # print(gradient)
                
            optimizer.step()
            # quantize model
            fixed_point_bool = True
            quantize_model(model,model.fp_bits, fixed_point_bool, model.fractional_bits)




            if (i + 1) % 600 == 0:
                print('Epoch: [% d/% d], Step: [% d/% d], Loss: %.4f'
                        % (epoch + 1, num_epochs, i + 1,
                        len(train_set) // batch_size, loss.data.item()))
                return


# Gets accuracy given dataset as well as total test loss
def get_acc(model, criterion, test_loader):
    correct = 0
    total = 0

    for images, labels in test_loader:
        images = images.cuda()
        labels = labels.cuda()
        outputs = model(images)
        testloss = criterion(outputs, labels)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
        break

    return ((100 * correct / total), testloss)

## Quantization Model

In [115]:
# Assume that we always 
class MyConvNet_FIXED_POINT(nn.Module):
    def __init__(self, args, num_bits, num_fractional_bits):
        super(MyConvNet_FIXED_POINT, self).__init__()

        # Fixed Point Parameters
        self.fp_bits = num_bits
        self.sign_bit = 1
        self.integer_bits = (num_bits - 1 - num_fractional_bits)
        self.fractional_bits = num_fractional_bits

        # Fixed Point Computed Values for Quantization
        self.smallest_step_size = 1 / np.power(2, num_fractional_bits)
        self.largest_num_representable = (np.power(2, self.integer_bits) - 1) + ((np.power(2, self.fractional_bits)-1) * self.smallest_step_size)
        self.smallest_num_representable = -1 * np.power(2, self.integer_bits)

        # Layer 1
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.bn1   = nn.BatchNorm2d(16)
        self.act1  = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2)

        # Layer 2
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.bn2   = nn.BatchNorm2d(32)
        self.act2  = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        # Layer 3
        self.lin2  = nn.Linear(7*7*32, 10)


    # AUGMENT FORWARD PASS. forward pass not quantizes every single time we go through
    def forward(self, x):
        # Layer 1
        c1 = self.conv1(x)
        c1q = quantize_tensor_4d(c1, self.smallest_step_size, self.largest_num_representable, self.smallest_num_representable)
        b1  = self.bn1(c1q)
        b1q = quantize_tensor_4d(b1, self.smallest_step_size, self.largest_num_representable, self.smallest_num_representable)
        a1  = self.act1(b1q)
        a1q = quantize_tensor_4d(a1, self.smallest_step_size, self.largest_num_representable, self.smallest_num_representable)
        p1  = self.pool1(a1q)
        p1q = quantize_tensor_4d(p1, self.smallest_step_size, self.largest_num_representable, self.smallest_num_representable)

        # Layer 2
        c2  = self.conv2(p1q)
        c2q = quantize_tensor_4d(c2, self.smallest_step_size, self.largest_num_representable, self.smallest_num_representable)
        b2  = self.bn2(c2q)
        b2q = quantize_tensor_4d(b2, self.smallest_step_size, self.largest_num_representable, self.smallest_num_representable)
        a2  = self.act2(b2q)
        a2q = quantize_tensor_4d(a2, self.smallest_step_size, self.largest_num_representable, self.smallest_num_representable)
        p2  = self.pool2(a2q)
        p2q = quantize_tensor_4d(p2, self.smallest_step_size, self.largest_num_representable, self.smallest_num_representable)

        # Flatten and Layer 3
        flt = p2.view(p2q.size(0), -1)
        out = self.lin2(flt)
        out_new = quantize_tensor_2d(out, self.smallest_step_size, self.largest_num_representable, self.smallest_num_representable)
        return out_new

In [116]:
class QuantizedConvNet(nn.Module):
    def __init__(self, model_fp32):
        super(QuantizedConvNet, self).__init__()
        # QuantStub converts tensors from floating point to quantized.
        # This will only be used for inputs.
        self.quant = torch.quantization.QuantStub()
        # DeQuantStub converts tensors from quantized to floating point.
        # This will only be used for outputs.
        self.dequant = torch.quantization.DeQuantStub()
        # FP32 model
        self.model_fp32 = model_fp32

    def forward(self, x):
        # manually specify where tensors will be converted from floating
        # point to quantized in the quantized model
        x = self.quant(x)
        x = self.model_fp32(x)
        # manually specify where tensors will be converted from quantized
        # to floating point in the quantized model
        x = self.dequant(x)
        return x

# Test Code Here


In [117]:
args = easydict.EasyDict({
        "batch_size": 1,
        "epochs": 1,
        "lr": 0.001,
        "enable_cuda" : True,
        "L1norm" : False,
        "simpleNet" : True,
        "activation" : "relu", #relu, tanh, sigmoid
        "train_curve" : True, 
        "optimization" :"SGD"
})

# Hyper Parameter for FashionMNIST
input_size = 784
num_classes = 10
num_epochs = args.epochs
batch_size = args.batch_size
learning_rate = args.lr

# Dataset Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset = train_set, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_set, batch_size = batch_size, shuffle = False)

In [None]:
# Declare Model
num_bits = 8
num_fractional_bits = 5
model = MyConvNet_FIXED_POINT(args, num_bits, num_fractional_bits).cuda()
# model = MyConvNet_FP32(args).cuda()

criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) 

# Training? Commented out for now so that I can just make sure that the forward quantization works.
train_model(model, criterion, optimizer, train_loader, True)

# Testing ARC
test_acc, test_loss = get_acc(model, criterion, test_loader)
print("Test Accuracy: {}".format(test_acc))
print("Test Loss: {}".format(test_loss))



---Training started


## Preston's Training of Model

In [None]:
# cuda_device = torch.device("cuda:0")
# cpu_device = torch.device("cpu:0")

# # Initial model
# FP_model = MyConvNet_FP32(args).cuda()
# FP_fused_model = MyConvNet_FP32(args)

# # Training stuff
# criterion = nn.CrossEntropyLoss().cuda()
# optimizer = torch.optim.SGD(FP_model.parameters(), lr = learning_rate) 

# train_model(FP_model, criterion, optimizer, train_loader)



In [None]:
# test_acc_FP, test_loss_FP = get_acc(FP_model, criterion, test_loader)
# print("Test Accuracy: {}".format(test_acc_FP))
# print("Test Loss: {}".format(test_loss_FP))


In [None]:

# copy_models(FP_model,FP_fused_model)   
# fuse_conv_bn(FP_model,FP_fused_model)

# quantized_model = QuantizedConvNet(model_fp32=FP_fused_model).to(cpu_device)

# quantization_config = torch.quantization.get_default_qconfig("fbgemm")
# quantized_model.qconfig = quantization_config
# torch.quantization.prepare_qat(quantized_model, inplace=True)

# quantized_model.to(cuda_device)
# train_model(quantized_model, criterion, optimizer, train_loader)
# # quantized_model.to(cpu_device)

# # quantized_model = torch.quantization.convert(quantized_model, inplace=True)



In [None]:


# test_acc, test_loss = get_acc(quantized_model, criterion, test_loader)
# print("Test Accuracy: {}".format(test_acc))
# print("Test Loss: {}".format(test_loss))

