In [None]:
# 本次实验包括后量化的过程，主要在于学习如何对DNN的权重（weight）和激活层（activation）进行量化。

In [None]:
# 引用外部软件包
import torch # pytorch包
import torchvision # torchvision是pytorch官方提供的工具，包含很多常用数据集的封装等
import torchvision.transforms as transforms
import torch.nn as nn # nn 和下面的F均包含了主要的神经网络模块，包括卷积层、全连接层、池化层等等
import torch.nn.functional as F
import torch.optim as optim # 优化器

In [None]:
# 确保device为gpu，否则会花费过长时间。
if torch.cuda.is_available():
    device = torch.cuda.set_device(0) # 服务器上一共有两块显卡，可以自行设定使用第一块还是第二块（0 or 1）。在命令行端口输入nvidia-smi命令可以查看当前显卡的占用情况。
    # device = torch.device('cuda')
    print('using cuda')
else:
    device = torch.device('cpu')
    print('using cpu')

# 设置量化位宽
bitwidth_A = 8
bitwidth_W = 8

In [None]:
# Load training and test data from the CIFAR10 dataset.
# torchvision是pytorch官方集成的工具库，包括学界常用的数据集和数据预处理方式。
transform = transforms.Compose(
    [transforms.ToTensor(), # 将数据归一化，并转换成tensor的形式
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # 预处理数据的方式，normalize是将图像进行规则化。

trainset = torchvision.datasets.CIFAR10(root='/your/path/to/download/dataset', train=True,
                                        download=True, transform=transform) #定义了训练集，path是训练数据所在地址，指定位置自动下载。
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2) #定义数据迭代器，batchsize为一次迭代训练，送进去图像的数量（可以尝试调整，看看会发生什么）。shuffle表示要不要打乱训练图像的排序。

testset = torchvision.datasets.CIFAR10(root='/your/path/to/download/dataset', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

In [None]:
# Define a simple CNN that classifies CIFAR images.
# The network provided is similar to LeNet-5, and it has the following architecture:

# **Layer** |     **Type**    | **Input Shape** | **Output Shape** | **Activation**
#   conv1   |  Convolutional  |     3x32x32     |     6x28x28      |      ReLU 
#   pool1   |     Max pool    |     6x28x28     |     6x14x14      |      None                
#   conv2   |  Convolutional  |     6x14x14     |     16x10x10     |      ReLU                
#   pool2   |     Max pool    |     16x10x10    |     16x5x5       |      None                
#   fc1     | Fully-connected |       400       |       120        |      ReLU                
#   fc2     | Fully-connected |       120       |       84         |      ReLU                
#   fc3     | Fully-connected |       84        |       10         |      None                

# None of the layers in the network have a bias associated with them.
# This makes them easier to quantize.
# Towards the end of this assignment, we will add biases to the final layer and quantize it as well.

In [None]:
# 网络定义，conv2d是卷积层，linear是全连接层。forward下定义了计算图，显示送进来的图像（x）会怎么处理。
class Net(nn.Module):
    #网络结构:2层卷积层、1层最大值池化层、3层全连接层，留意这些参数的内涵
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5, bias=False)
        self.pool = nn.MaxPool2d(2, 2) # run after each conv (hence the 5x5 FC layer)
        self.conv2 = nn.Conv2d(6, 16, 5, bias=False)
        self.fc1 = nn.Linear(16 * 5 * 5, 120, bias=False)
        self.fc2 = nn.Linear(120, 84, bias=False)
        self.fc3 = nn.Linear(84, 10, bias=False)
    #定义前向传播,将tensor数据送入网络之后的计算图
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net().to(device)
# 到这里，我们定义好了网络和数据集的使用方式

In [None]:
from torch.utils.data import DataLoader
#将训练集的dataloader送入网络进行训练,每次读取batchsize数量的数据
def train(model: nn.Module, dataloader: DataLoader):
    criterion = nn.CrossEntropyLoss() # loss function（交叉熵损失函数）
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) #optimizer，使用SGD优化器训练，第一个参数表示model的所有参数都会参与训练，学习率为0.001，动量为0.9

    for epoch in range(2):  # loop over the dataset multiple times。epoch指遍历所有训练集图像的次数。这里遍历所有训练图像两次

        running_loss = 0.0
        for i, data in enumerate(dataloader, 0): # dataloader是数据迭代器，从这里每次读出batchsize数量的数据。
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()
            
            # forward + backward + optimize
            outputs = model(inputs) # 将数据送进网络，得到结果
            loss = criterion(outputs, labels) # 计算结果和标签的交叉熵损失函数
            loss.backward() # 基于上述损失函数求解梯度
            optimizer.step() # 更新参数

            # print statistics
            running_loss += loss.item() # 将loss的值取出来，记录其变化
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training')

def test(model: nn.Module, dataloader: DataLoader, max_samples=None) -> float:
    correct = 0
    total = 0
    n_inferences = 0

    with torch.no_grad(): # 测试时不用求解梯度，因此可以设置不计算梯度的模式
        for data in dataloader:
            images, labels = data

            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images) # get 1 batch worth of image predictions (i.e. 4 predictions of 10 each)
            other, predicted = torch.max(outputs.data, 1) # other == values, predicted == indicies
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if max_samples:
                n_inferences += images.shape[0]
                if n_inferences > max_samples:
                    break
    
    return 100 * correct / total

In [None]:
train(net, trainloader) # 进行训练，将网络定义和数据迭代器传给train函数。

In [None]:
# Now that the CNN has been trained, let's test it on our test dataset.
score = test(net, testloader)
print('Accuracy of the network on the test images: {}%'.format(score))

In [None]:
# Define a convenience function which we use to copy CNN's.
from copy import deepcopy

def copy_model(model: nn.Module) -> nn.Module:
    result = deepcopy(model)

    # Copy over the extra metadata we've collected which copy.deepcopy doesn't capture
    if hasattr(model, 'input_activations'):
        result.input_activations = deepcopy(model.input_activations)

    for result_layer, original_layer in zip(result.children(), model.children()):
        if isinstance(result_layer, nn.Conv2d) or isinstance(result_layer, nn.Linear):
            if hasattr(original_layer.weight, 'scale'):
                result_layer.weight.scale = deepcopy(original_layer.weight.scale)
            if hasattr(original_layer, 'activations'):
                result_layer.activations = deepcopy(original_layer.activations)
            if hasattr(original_layer, 'output_scale'):
                result_layer.output_scale = deepcopy(original_layer.output_scale)

    return result

In [None]:
# Question 1: Visualize Weights

In [None]:
## Question 1.1:

import matplotlib.pyplot as plt
import numpy as np

name = ['conv1', 'conv2', 'fc1', 'fc2', 'fc3']
n = 0

for layer in net.children(): # 遍历类的成员
    if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
        weight = layer.weight.data.cpu().view(-1).numpy()
        print(name[n] + ": the range is [{:.5f}, {:.5f}]".format(weight.min(), weight.max()))

        # find 3-sigma range (u-3s, u+3s), 该范围能囊括99.73%参数
        sigma3 = int(np.size(weight)*0.0027) 
        sigma3_value = np.abs(weight)[np.argpartition(np.abs(weight), -sigma3)[-sigma3]]
        print(name[n] + ": the 3-sigma range is [{:.5f}, {:.5f}]".format(max(-sigma3_value, weight.min()), min(sigma3_value,weight.max())))

        plt.hist(weight, bins=50, facecolor="blue", edgecolor="black", alpha=0.7) # 画直方图
        plt.xlabel("range")
        plt.ylabel("frequency")
        plt.title(name[n])
        plt.show()
        n += 1
        
# You can get a flattened vector of the weights of fc1 like this:
#   fc1_weights = net.fc1.weight.data.cpu().view(-1)

In [None]:
## Question 1.2: Explain which range you would prefer to use if you were to quantize each layer's weights and wanted to strike a balance between the range of values that could be expressed, and your precision.

In [None]:
# Question 2: Quantize Weights 首先我们把原来网络的参数copy到net_q2。

net_q2 = copy_model(net)

In [None]:
## Question 2.1:

from typing import Tuple
# 确定scale进行量化
def quantized_weights(weights: torch.Tensor, bitwidth_W: int):
    '''
    Quantize the weights so that all values are integers between -128 and 127.
    You may want to use the total range, 3-sigma range, or some other range when
    deciding just what factors to scale the float32 values by.

    Parameters:
    weights (Tensor): The unquantized weights
    #输入数据是尚未量化过的权重

    Returns:
    #返回数据是一个元组,(quantization result , scale )
    (Tensor, float): A tuple with the following elements:
                        * The weights in quantized form, where every value is an integer between -128 and 127.
                          The "dtype" will still be "float", but the values themselves should all be integers.
                        * The scaling factor that your weights were multiplied by.
                          This value does not need to be an 8-bit integer.
    '''

    # modify the following code to try other range

    W_border = 2**(bitwidth_W-1)
    w = weights.data.cpu().view(-1).numpy() # 这里将tensor转入numpy只是为了之后的计算
    sigma3 = int(np.size(w)*0.0027)
    sigma3_value = np.abs(w)[np.argpartition(np.abs(w), -sigma3)[-sigma3]] 
    weights = weights/sigma3_value*W_border
    quan_weight = weights.round() # 转化为值最接近的整数

    return torch.clamp(quan_weight, min=-W_border, max=W_border-1), W_border/sigma3_value # clamp是切顶函数。



In [None]:
def quantize_layer_weights(model: nn.Module, bitwidth_W: int):
    for layer in model.children():
        if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
            q_layer_data, scale = quantized_weights(layer.weight.data, bitwidth_W)
            q_layer_data = q_layer_data.to(device)

            layer.weight.data = q_layer_data
            layer.weight.scale = scale
            print(scale)

            if (q_layer_data < -2**(bitwidth_W-1)).any() or (q_layer_data > (2**(bitwidth_W-1)-1)).any():
                raise Exception("Quantized weights of {} layer include values out of bounds for an 8-bit signed integer".format(layer.__class__.__name__))
            if (q_layer_data != q_layer_data.round()).any():
                raise Exception("Quantized weights of {} layer include non-integer values".format(layer.__class__.__name__))

quantize_layer_weights(net_q2, bitwidth_W)


In [None]:
## Question 2.2:

#Record the accuracy change of the network after quantizing its weights. If you’ve done everything correctly, the accuracy change should be negligible.

score = test(net_q2, testloader)
print('Accuracy of the network after quantizing all weights: {}%'.format(score))

In [None]:
# Question 3: Visualize Activations

# Now that we have quantized the weights of the CNN, we must also quantize the activations (inputs and outputs to layers) traveling through it.
# But before doing so, let's analyze what values the activations take when travelling through the network.

# We provide convenience code which will record the values of every pixel of the outputs and inputs travelling through the neural network.
# (This is the initial CNN, where not even the weights had yet been quantized).
# We then profile these values when running on a subset of the training set.


In [None]:
# hook，顾名思义，其作用是把网络推理时的中间变量勾出来。通过hook函数，我们可以统计中间变量的范围。
"""
pytorch中的hook机制:
Pytorch会自动舍弃图计算中间结果,用hook以获取某些变量的中间结果.要及时删除,避免增加运行负载

Tensor级别：
register_hook(hook)->Tensor:为Tensor注册一个backward hook 获取变量梯度
hook遵循格式:hook(grad)->Tensor
nn.Module对象级别:
register_forward_hook(hook)
此时hook可以修改input和output却不影响forward结果,典型场景是提取中间层的输出特征,又不希望修改原有模型定义文件
格式为hook(layer,input,output)
"""
def register_activation_profiling_hooks(model: Net):
    model.input_activations = np.empty(0)
    model.conv1.activations = np.empty(0)
    model.conv2.activations = np.empty(0)
    model.fc1.activations = np.empty(0)
    model.fc2.activations = np.empty(0)
    model.fc3.activations = np.empty(0)
    
    model.profile_activations = True

    def conv1_activations_hook(layer, x, y):
        if model.profile_activations:
            model.input_activations = np.append(model.input_activations, x[0].cpu().view(-1))
    model.conv1.register_forward_hook(conv1_activations_hook)#寄存器暂时存下传递到下一层的activatio,下同

    def conv2_activations_hook(layer, x, y):
        if model.profile_activations:
            model.conv1.activations = np.append(model.conv1.activations, x[0].cpu().view(-1))
    model.conv2.register_forward_hook(conv2_activations_hook)

    def fc1_activations_hook(layer, x, y):
        if model.profile_activations:
            model.conv2.activations = np.append(model.conv2.activations, x[0].cpu().view(-1))
    model.fc1.register_forward_hook(fc1_activations_hook)
    

    def fc2_activations_hook(layer, x, y):
        if model.profile_activations:
            model.fc1.activations = np.append(model.fc1.activations, x[0].cpu().view(-1))
    model.fc2.register_forward_hook(fc2_activations_hook)

    def fc3_activations_hook(layer, x, y):
        if model.profile_activations:
            model.fc2.activations = np.append(model.fc2.activations, x[0].cpu().view(-1))
            model.fc3.activations = np.append(model.fc3.activations, y[0].cpu().view(-1))
    model.fc3.register_forward_hook(fc3_activations_hook)


In [None]:
# Use the unquantized model to profile input and output activations on a subset of the training dataset.

net_q3 = copy_model(net)
register_activation_profiling_hooks(net_q3)#把各个中间层都记录下来

# Run through the training dataset again while profiling the input and output activations this time
# We don't actually have to perform gradient descent for this, so we can use the "test" function
test(net_q3, trainloader, max_samples=400) # 跑400次，统计中间变量的数值范围
net_q3.profile_activations = False

In [None]:
## Question 3.1:

# Plot histograms of the input images and the outputs of every convolutional and fully-connected layer. 
# Record any observations you make about the distribution of the values.
# ADD YOUR CODE HERE 
Activation_border = 2**(bitwidth_A-1)
input_activations = net_q3.input_activations # 输入数据
conv1_output_activations = net_q3.conv1.activations # 第一层卷积层的输出数据
conv2_output_activations = net_q3.conv2.activations # 第二层卷积层的输出数据
fc1_output_activations = net_q3.fc1.activations # 同上类推
fc2_output_activations = net_q3.fc2.activations
fc3_output_activations = net_q3.fc3.activations

# to plot distributions of activations
# Plot histograms of the following variables, and calculate their ranges and 3-sigma ranges:

#   input_activations

#   conv1_output_activations

#   conv2_output_activations

#   fc1_output_activations

#   fc2_output_activations

#   fc3_output_activations



In [None]:
## Question 3.2:

# Additionally, record the range of the values, as well as their 3-sigma range (the difference between $\mu + 3\sigma$ and $\mu - 3\sigma$).
# For which layers is the 3-sigma range larger or smaller than the actual range?
# Then explain which range you would prefer to use if you were to quantize each layer's weights and wanted to strike a balance between the range of values that could be expressed, and your precision.
# Remember that you are plotting the activations *after* activation functions like ReLU have been applied, which means that you should not be worried if you find that your plots are asymmetric.



In [None]:
# Question 4: Quantize Activations

# Complete the `quantize_initial_input` and `quantize_activations` functions which calculate the scaling factors for the initial image which is input to the CNN, and the outputs of each layer, respectively.

# complete the `forward` function for the `NetQuantized` class.
# You will have to add code here to scale the outputs of each layer, and then to clamp the outputs of each layer to integers between -128 and 127 afterwards.


In [None]:
from typing import List

class NetQuantized(nn.Module):
    def __init__(self, net_with_weights_quantized: nn.Module, bitwidth_A: int):
        super(NetQuantized, self).__init__()
        
        net_init = copy_model(net_with_weights_quantized) # 最初的网络copy 过来,用于初始化量化网络

        self.conv1 = net_init.conv1
        self.pool = net_init.pool
        self.conv2 = net_init.conv2
        self.fc1 = net_init.fc1
        self.fc2 = net_init.fc2
        self.fc3 = net_init.fc3
        self.activation_border = 2**(bitwidth_A-1)
        print(self.activation_border)

        for layer in self.conv1, self.conv2, self.fc1, self.fc2, self.fc3:
            #逐层遍历,检查量化是否成功
            def pre_hook(l, x):
                x = x[0]
                #异常处理:检查之前的量化是否将所有的数转化进规定的区间
                if (x < -self.activation_border).any() or (x > (self.activation_border-1)).any():
                    raise Exception("Input to {} layer is out of bounds for an {}-bit signed integer".format(l.__class__.__name__, bitwidth_A))
                if (x != x.round()).any():
                    raise Exception("Input to {} layer has non-integer values".format(l.__class__.__name__))

            layer.register_forward_pre_hook(pre_hook)

        # Calculate the scaling factor for the initial input to the CNN
        self.input_activations = net_with_weights_quantized.input_activations
        self.input_scale = NetQuantized.quantize_initial_input(self.input_activations, self.activation_border)
        #以上2行计算了input activation 的scale和对应的quantization 版本
        # Calculate the output scaling factors for all the layers of the CNN

        for layer in self.conv1, self.conv2, self.fc1, self.fc2, self.fc3:
            # layer.output_scale = NetQuantized.quantize_activations(...)
            pass # call quantize_activations and add your code here

    #计算输入的scale函数
    @staticmethod
    def quantize_initial_input(pixels: np.ndarray, activation_border:int):
        '''
        Calculate a scaling factor for the images that are input to the first layer of the CNN.

        Parameters:
        pixels (ndarray): The values of all the pixels which were part of the input image during training

        Returns:
        float: A scaling factor that the input should be multiplied by before being fed into the first layer.
               This value does not need to be an 8-bit integer.
        '''

        sigma3 = int(np.size(pixels)*0.0027)
        sigma3_value = pixels[np.argpartition(pixels,-sigma3)[-sigma3]]
        max_value = np.max(np.abs(pixels))
        scale = activation_border/sigma3_value  # you can modify the range here
        return scale

    # 逐层激活送入量化
    @staticmethod
    def quantize_activations( n_w: float, pre_activation_scale: float, now_activation_scale: float) -> float:
        '''
        Calculate a scaling factor to multiply the output of a layer by.

        Parameters:
        activations (ndarray): The values of all the pixels which have been output by this layer during training
        n_w (float): The scale by which the weights of this layer were multiplied as part of the "quantize_weights" function you wrote earlier
        n_initial_input (float): The scale by which the initial input to the neural network was multiplied
        ns ([(float, float)]): A list of tuples, where each tuple represents the "weight scale" and "output scale" (in that order) for every preceding layer
        #用一个元组记录已经算好的所有网络的weight scale 和 output scale
        Returns:
        float: A scaling factor that the layer output should be multiplied by before being fed into the first layer.
               This value does not need to be an 8-bit integer.
        '''

        # ADD YOUR CODE HERE
        scale = 0
        return scale

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # You can access the output activation scales like this:
        #   fc1_output_scale = self.fc1.output_scale

        # 这里其实是对定点数计算的模拟仿真，因此每层的输出均需要手动乘以scaling factor，并限制在规定的表示范围内。
        # output_scale:不同网络层之间传递整数重新压缩为8bit的scale(在第三周阅读论文中是定点浮点数M)
        # activation_scale:网络层内数据和真实值差距的scale
        # weight_scale:权重量化值和真实值差距的scale
        x = x*self.input_scale 
        x = torch.clamp(x.round(), min=-self.activation_border, max=self.activation_border-1) # 8bit是-128~127

        x = self.pool(F.relu(self.conv1(x)))
        x = x*self.conv1.output_scale
        x = torch.clamp(x.round(), min=-self.activation_border, max=self.activation_border-1)

        x = self.pool(F.relu(self.conv2(x)))
        x = x*self.conv2.output_scale
        x = torch.clamp(x.round(), min=-self.activation_border, max=self.activation_border-1)     

        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))   
        x = x*self.fc1.output_scale
        x = torch.clamp(x.round(), min=-self.activation_border, max=self.activation_border-1)  


        x = F.relu(self.fc2(x))   
        x = x*self.fc2.output_scale
        x = torch.clamp(x.round(), min=-self.activation_border, max=self.activation_border-1) 


        x = self.fc3(x)*self.fc3.output_scale
        return x


In [None]:
# Merge the information from net_q2 and net_q3 together
#weight quantization & activation quantization 
net_init = copy_model(net_q2)
net_init.input_activations = deepcopy(net_q3.input_activations)
for layer_init, layer_q3 in zip(net_init.children(), net_q3.children()):
    if isinstance(layer_init, nn.Conv2d) or isinstance(layer_init, nn.Linear):
        layer_init.activations = deepcopy(layer_q3.activations)

net_quantized = NetQuantized(net_init, bitwidth_A)


In [None]:
# Finally, record the accuracy of your network after both weights and activations have been quantized.
# If you've done everything right, you should still find almost no accuracy change.

score = test(net_quantized, testloader)
print('Accuracy of the network after quantizing both weights and activations: {}%'.format(score))


In [None]:
# Question 5: Quantize Biases 以下为选做内容

# Let us now update our CNN to include a bias in its final layer, *fc3*.
# We have already included code to create and train a new CNN called `net_with_bias`.

# Consider how a bias affects the equation for an unquantized layer:

# W * In + bias = Out



In [None]:
## Question 5.1:

# Suppose that we again quantized a biased layer with the same scaling factors we used in previous questions: $n_W$, $n_{In}$, and $n_{Out}$.
# What would we scale bias by in this case?
# Write an equation in your lab report to describe the output of the quantized layer with a bias.

# Create a new network with a bias on *fc3*.


In [None]:
class NetWithBias(nn.Module):
    def __init__(self):
        super(NetWithBias, self).__init__()

        self.conv1 = nn.Conv2d(3, 6, 5, bias=False)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5, bias=False)
        self.fc1 = nn.Linear(16 * 5 * 5, 120, bias=False)
        self.fc2 = nn.Linear(120, 84, bias=False)
        self.fc3 = nn.Linear(84, 10, bias=True) # 最后一层的网络产生bias,这就是和之前的区别

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net_with_bias = NetWithBias().to(device)

In [None]:
# Train and test the model

In [None]:
train(net_with_bias, trainloader)

In [None]:
score = test(net_with_bias, testloader)
print('Accuracy of the network (with a bias) on the test images: {}%'.format(score))

In [None]:
register_activation_profiling_hooks(net_with_bias)
test(net_with_bias, trainloader, max_samples=400)
net_with_bias.profile_activations = False

In [None]:
net_with_bias_with_quantized_weights = copy_model(net_with_bias)
quantize_layer_weights(net_with_bias_with_quantized_weights)

score = test(net_with_bias_with_quantized_weights, testloader)
print('Accuracy of the network on the test images after all the weights are quantized but the bias isn\'t: {}%'.format(score))

In [None]:
# Fill in the `quantized_bias` function in the `NetQuantizedWithBias` class.
# This function is meant to quantize the bias on the final layer of the CNN.
# Keep in mind that biases are typically quantized to 32-bits, so your bias values do not all have to be between -128 and 127 (though 32-bits is a bit conservative).

In [None]:
# slightly clearer bias bounds (32b signed integer)
#bias量化后的最大值、最小值
MIN_32B_SINT = -(2**31) 
MAX_32B_SINT = (2**31) - 1

In [None]:
class NetQuantizedWithBias(NetQuantized):
    def __init__(self, net_with_weights_quantized: nn.Module):
        super(NetQuantizedWithBias, self).__init__(net_with_weights_quantized)

        preceding_scales = [(layer.weight.scale, layer.output_scale) for layer in self.children() if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear)][:-1]

        self.fc3.bias.data = NetQuantizedWithBias.quantized_bias(
            self.fc3.bias.data,
            self.fc3.weight.scale,
            self.input_scale,
            preceding_scales
        )
        
        self.quantized_bias()


        if (self.fc3.bias.data < MIN_32B_SINT).any() or (self.fc3.bias.data > MAX_32B_SINT).any():
            raise Exception("Bias has values which are out of bounds for an 32-bit signed integer")
        if (self.fc3.bias.data != self.fc3.bias.data.round()).any():
            raise Exception("Bias has non-integer values")
    
    @staticmethod
    def quantized_bias(bias: torch.Tensor, n_w: float, n_initial_input: float, ns: List[Tuple[float, float]]) -> torch.Tensor:
        '''
        Quantize the bias so that all values are integers between MIN_32B_SINT and MAX_32B_SINT.

        Parameters:
        bias (Tensor): The floating point values of the bias
        n_w (float): The scale by which the weights of this layer were multiplied
        n_initial_input (float): The scale by which the initial input to the neural network was multiplied
        ns ([(float, float)]): A list of tuples, where each tuple represents the "weight scale" and "output scale" (in that order) for every preceding layer

        Returns:
        Tensor: The bias in quantized form, where every value is an integer between MIN_32B_SINT and MAX_32B_SINT.
                The "dtype" will still be "float", but the values themselves should all be integers.
        '''

        # ADD YOUR CODE HERE
        
        return torch.clamp((bias).round(), min=MIN_32B_SINT, max=MAX_32B_SINT)




In [None]:
net_quantized_with_bias = NetQuantizedWithBias(net_with_bias_with_quantized_weights)


In [None]:
## Question 5.3:

# What is your accuracy before and after quantizing CNN with the bias?
# The accuracy change should ideally be negligible.

In [None]:
score = test(net_quantized_with_bias, testloader)
print('Accuracy of the network on the test images after all the weights and the bias are quantized: {}%'.format(score))