# Part 1 VGG16 4bit

In [18]:
import torch

import torchvision
import torchvision.transforms as transforms

# Include parent dir in path
import sys
from pathlib import Path
parent_dir = str(Path.cwd().parent)
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

from models import *

global best_prec
use_gpu = torch.cuda.is_available()
print('=> Building model...')
    
    
batch_size = 128
model_name = "VGG16_project_part1"
model = VGG(vgg_name=model_name, w_bits=4, a_bits=4)
print(model)

normalize = transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])

test_dataset = torchvision.datasets.CIFAR10(
    root='../data',
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ]))

testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

=> Building model...
VGG_quant(
  (features): Sequential(
    (0): QuantConv2d(
      3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
      (weight_quant): weight_quantize_fn()
    )
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): QuantConv2d(
      64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
      (weight_quant): weight_quantize_fn()
    )
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): QuantConv2d(
      64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
      (weight_quant): weight_quantize_fn()
    )
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): QuantConv2d(
      128, 128, kernel_size=(3, 3), stride

In [19]:
device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.mps.is_available() else 'cpu'
PATH = f"part1_model_best.pth.tar"
checkpoint = torch.load(PATH, map_location=device)
model.load_state_dict(checkpoint['state_dict'])

model.to(device)
model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
    for data, target in testloader:
        data, target = data.to(device), target.to(device) # loading to GPU
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(testloader.dataset)

test_acc = 100. * correct / len(testloader.dataset)
print('\nTest set: Accuracy: {}/{} ({:.2f}%)\n'.format(
        correct, len(testloader.dataset),
        test_acc))


Test set: Accuracy: 9225/10000 (92.25%)



In [13]:
class SaveOutput:
    def __init__(self):
        self.outputs = []
    def __call__(self, module, module_in):
        self.outputs.append(module_in)
    def clear(self):
        self.outputs = []  
        
######### Save inputs from squeezed layer ##########
save_output = SaveOutput()
model.features[27].register_forward_pre_hook(save_output) # input to squeezed layer
model.features[28].register_forward_pre_hook(save_output) # input to relu layer
model.features[29].register_forward_pre_hook(save_output) # input to next layer
#i=0
#for name, layer in model.named_modules():
#    i = i+1
#    if isinstance(layer, QuantConv2d):
#        print(name," layer prehooked")
#        layer.register_forward_pre_hook(save_output)             
####################################################

dataiter = iter(testloader)
images, labels = next(dataiter)
images = images.to(device)
out = model(images)

In [14]:
weight_q = model.features[27].weight_q
w_alpha = model.features[27].weight_quant.wgt_alpha
w_bit = 4

weight_int = weight_q / (w_alpha / (2**(w_bit-1)-1))
print(f"Unique values in weight_int: {torch.unique(weight_int)}")

Unique values in weight_int: tensor([-7.0000, -6.0000, -5.0000, -4.0000, -3.0000, -2.0000, -1.0000, -0.0000,
         1.0000,  2.0000,  3.0000,  4.0000,  5.0000,  6.0000,  7.0000],
       device='mps:0', grad_fn=<Unique2Backward0>)


In [15]:
act = save_output.outputs[0][0]
act_alpha  = model.features[27].act_alpha
act_bit = 4
act_quant_fn = act_quantization(act_bit)

act_q = act_quant_fn(act, act_alpha)

act_int = act_q / (act_alpha / (2**act_bit-1))
print(f"Unique values in act_int: {torch.unique(act_int)}")

Unique values in act_int: tensor([ 0.0000,  1.0000,  2.0000,  3.0000,  4.0000,  5.0000,  6.0000,  7.0000,
         8.0000,  9.0000, 10.0000, 11.0000, 12.0000, 13.0000, 14.0000, 15.0000],
       device='mps:0', grad_fn=<Unique2Backward0>)


In [16]:
conv_int = torch.nn.Conv2d(in_channels = 8, out_channels = 8, kernel_size = 3, padding=1)
conv_int.weight = torch.nn.parameter.Parameter(weight_int)
conv_int.bias = model.features[27].bias
relu = torch.nn.ReLU(inplace=True)

output_int = relu(conv_int(act_int))
output_recovered = output_int * (act_alpha / (2**act_bit-1)) * (w_alpha / (2**(w_bit-1)-1))
#print(output_recovered)

In [17]:
difference = output_recovered - save_output.outputs[2][0]
error = difference.abs().mean()
# error = torch.norm(difference) # L2 Norm
if error < 1e-3 and test_acc > 90:
    print("Part 1.1: PASS")
    print(f"Error = {error}")
    print(f"Test Acc = {test_acc}")
else:
    print("Part 1.1: FAIL")
    print(f"Error = {error}")
    print(f"Test Acc = {test_acc}")

Part 1.1: PASS
Error = 2.8741089863615343e-07
Test Acc = 92.25
