# Module

In [1]:
import argparse
import tqdm
from tqdm import tqdm_notebook as tq
import os, time, math, copy
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from collections import namedtuple
import matplotlib.pyplot as plt

torch.set_printoptions(precision=8, linewidth=50000)
import warnings
warnings.filterwarnings(action='ignore')

# Print Colors

In [2]:
BLACK	= '\033[30m'
RED		= '\033[31m'
GREEN	= '\033[32m'
YELLOW	= '\033[33m'
BLUE	= '\033[34m'
MAGENTA	= '\033[35m'
CYAN	= '\033[36m'
RESET	= '\033[0m'
SEL		= '\033[7m'

In [3]:
class	fxp:
	def	__init__(self, bIn, iBWF):
		self.iFullBW	= len(bIn)
		self.iIntgBW	= self.iFullBW - iBWF
		self.bSign		= bIn[0]
		self.bIntg		= bIn[:self.iIntgBW]
		self.bFrac		= bIn[self.iIntgBW:]
		self.fFull		= 0
		try:
			for idx, bit in enumerate(bIn):
				if	idx == 0:
					self.fFull = self.fFull + int(bit,2) * -pow(2, self.iIntgBW - 1)
				else:
					self.fFull = self.fFull + int(bit,2) * pow(2, self.iIntgBW - 1 - idx)
		except:
			print(bIn)
		self.dispFull	= RED + self.bIntg + BLUE + self.bFrac + RESET
		return

In [4]:
class	flp2fix:
	def	__init__(self, fIn, iBW, iBWF):
		self.fMin		= - 2 ** (iBW - iBWF - 1)
		self.fMax		= (2 ** (iBW-1) - 1) * (2 ** -iBWF)
		self.fResol		= 2 ** -iBWF
		if fIn < self.fMin or fIn > self.fMax:
			print(f'({fIn}): Out of input range ({self.fMax}/{self.fMin}) during flp -> fix converting ')
		self.iBW		= iBW
		self.iBWI		= iBW - iBWF
		self.iBWF		= iBWF

		self.iFLP2INT	= abs(int(fIn * 2 ** iBWF))
		if fIn < 0:
			self.iFLP2INT = 2 ** (iBW-1) - self.iFLP2INT

		if fIn >= 0:
			self.bFull = bin(self.iFLP2INT)[2:].rjust(iBW, '0')
		else:
			self.bFull = '1'+bin(self.iFLP2INT)[2:].rjust(iBW-1, '0')
			if len(self.bFull) > iBW:
				self.bFull = '0' * iBW

		self.cssFxp		= fxp(self.bFull, self.iBWF)
		self.bSign		= self.cssFxp.bSign
		self.bIntg		= self.cssFxp.bIntg
		self.bFrac		= self.cssFxp.bFrac
		self.fFull		= self.cssFxp.fFull
		return

In [5]:
# def	flp2fixTensor(fIn, iBW, iBWF):
# 	fMin = - 2 ** (iBW - iBWF - 1)
# 	fMax = (2 ** (iBW-1) - 1) * (2 ** -iBWF)
# 	fTensor = fIn * (2 ** iBWF)
# 	fTensor = fTensor.round() * (2 ** -iBWF)
# 	if (fTensor.min() < fMin or fMax < fTensor.max()):
# 		print(f'fMin: {fMin}, fMax: {fMax}, fTensor.Min:{fTensor.min()}, fTensor.Max:{fTensor.max()}')
# 	return fTensor

In [6]:
def	flp2fixTensor(fIn, iBW, iBWF):
	fMin = - 2 ** (iBW - iBWF - 1)
	fMax = (2 ** (iBW-1) - 1) * (2 ** -iBWF)
	fList = []
	for aTensor in fIn.view(-1):
		fList.append(flp2fix(aTensor, iBW, iBWF).fFull)
	return torch.tensor(fList).view(fIn.size())

# User Define Variable

In [7]:
data_path = '~/dataset'

# Parser

In [8]:
parser = argparse.ArgumentParser(description='PyTorch for MNIST dataset')
parser.add_argument('--device', type=str, default='cpu', help='Device')
parser.add_argument('--shuffle', action='store_true', default=False, help='enables data shuffle')
parser.add_argument('--dataset', type=str, default='mnist', help='training dataset')
parser.add_argument('--data_path', type=str, default=data_path, help='path to MNIST')
parser.add_argument('--batch_size', type=int, default=64, help='batch size')
parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
parser.add_argument('--optimizer', type=str, default='adam', help='optimizer')
parser.add_argument('--loss_func', type=str, default='cel', help='optimizer')
parser.add_argument('--quant_opt', type=str, default='asym', help='Type of Quantization')
parser.add_argument('--full_bits', type=int, default=16, help='Number of Quantization Bits')
parser.add_argument('--frac_bits', type=int, default=8, help='Number of Quantization Bits')
parser.add_argument('--pretrained', type=bool, default=True, help='Pretrained Model')
parser.add_argument('--act_quant', type=bool, default=False, help='Activation Quantization')
parser.add_argument('--disp', type=bool, default=False, help='Display Model Information')

args = parser.parse_args(args=[])

# Preparing Data

In [9]:
kwargs = {'num_workers': 1, 'pin_memory': True} if args.device == 'cuda' else {}
data_transform = transforms.Compose([transforms.Resize((32,32)),transforms.ToTensor()])
if args.dataset == 'mnist':
	train_loader = torch.utils.data.DataLoader(
		dataset=datasets.MNIST(
			root=args.data_path,
			train=True,
			download=True,
			transform=data_transform
		),
		batch_size=args.batch_size,
		shuffle=args.shuffle,
		**kwargs
	)

	test_loader = torch.utils.data.DataLoader(
		dataset=datasets.MNIST(
			root=args.data_path,
			train=False,
			download=True,
			transform=data_transform
		),
		batch_size=args.batch_size,
		shuffle=args.shuffle,
		**kwargs
	)

# Build Model

In [10]:
#class MLP(nn.Module):
#	def __init__(self):
#		super(MLP, self).__init__()
#		self.flatten = nn.Flatten()
#		self.fc1 = nn.Linear(28*28, 16)
#		self.relu1 = nn.ReLU()
#		self.fc2 = nn.Linear(16, 16)
#		self.relu2 = nn.ReLU()
#		self.fc3 = nn.Linear(16, 10)
		
#	def forward(self, x):
#		x = self.flatten(x)
#		x = self.fc1(x)
#		x = self.relu1(x)
#		x = self.fc2(x)
#		x = self.relu2(x)
#		logits = self.fc3(x)
#		return logits

In [11]:
class LeNet_5(nn.Module):
    def __init__(self):
        super(LeNet_5,self).__init__()
        self.flatten = nn.Flatten()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)
        self.conv3 = nn.Conv2d(16, 120, kernel_size=5, stride=1)  # Flatten layer
        self.fc1 = nn.Linear(120, 84)
        self.fc2 = nn.Linear(84, 10)
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
        self.act = nn.ReLU()

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.avg_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.avg_pool2d(x, 2, 2)
        x = F.relu(self.conv3(x))
        x = self.flatten(x)
#        x = x.view(-1, 120) # Flatten layer
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = LeNet_5()
print(model)

LeNet_5(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=120, out_features=84, bias=True)
  (fc2): Linear(in_features=84, out_features=10, bias=True)
  (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (act): ReLU()
)


In [12]:
def genOptimizer(model, args):
	if args.optimizer == 'sgd':
		optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
	if args.optimizer == 'adam':
		optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
	return optimizer

In [13]:
def genLossFunc(args):
	if args.loss_func == 'cel':
		loss_func = nn.CrossEntropyLoss()
	return loss_func

In [14]:
def train(train_loader, model, epoch, args):
    model.train()
    loss_func = genLossFunc(args)
    optimizer = genOptimizer(model, args)
    max_batch_index = int(np.floor(len(train_loader.dataset)/args.batch_size)) #batch 번호 ..?
    running_loss,correct = 0, 0
    
    for batch_index, (image, label) in enumerate(tq(train_loader, desc='Train', leave=False)):
        image, label = image.to(args.device), label.to(args.device)
        pred = model(image) #pred = model
        loss = loss_func(pred, label) # model 이용해서 loss 구함)
        running_loss += loss.item()#*image.size(0)
        correct += (pred.argmax(1) == label).type(torch.int).sum().item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    final_loss = running_loss/len(train_loader.dataset)
    correct_rate = 100 * correct / len(train_loader.dataset)
    print(f'Epoch {epoch+1:<3d}: Avg. Loss: {final_loss:.4f}', end = '\t')
    print(f'Accuracy: {correct}/{len(train_loader.dataset)} ({correct_rate:>.1f}%)')
    
    return final_loss,correct_rate

In [15]:
def test(test_loader, model, args):
	model.eval()
	with torch.no_grad():
		loss_func = genLossFunc(args)
		loss, correct = 0, 0
# 		for batch_index, (image, label) in enumerate(tq(test_loader, desc='Test', leave=False)):
		for batch_index, (image, label) in enumerate(test_loader):
			image, label = image.to(args.device), label.to(args.device)
			pred = model(image)
			loss += loss_func(pred, label).item()#*image.size(0)
			correct += (pred.argmax(1) == label).type(torch.int).sum().item()
	loss /= len(test_loader.dataset)
	correct_rate = 100 * correct / len(test_loader.dataset)
	print(f'Accuracy: {correct}/{len(test_loader.dataset)} ({correct_rate:>.1f}%)')

In [16]:
def main(model):
	for epoch in range(args.epochs):
		train(train_loader, model, epoch, args)
		test(test_loader, model, args)
	print("Done!")
	return model

model = main(LeNet_5().to(args.device))

Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 1  : Avg. Loss: 0.0054	Accuracy: 53740/60000 (89.6%)
Accuracy: 9523/10000 (95.2%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 2  : Avg. Loss: 0.0017	Accuracy: 58073/60000 (96.8%)
Accuracy: 9679/10000 (96.8%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 3  : Avg. Loss: 0.0011	Accuracy: 58717/60000 (97.9%)
Accuracy: 9784/10000 (97.8%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 4  : Avg. Loss: 0.0008	Accuracy: 59067/60000 (98.4%)
Accuracy: 9818/10000 (98.2%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 5  : Avg. Loss: 0.0006	Accuracy: 59281/60000 (98.8%)
Accuracy: 9840/10000 (98.4%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 6  : Avg. Loss: 0.0005	Accuracy: 59440/60000 (99.1%)
Accuracy: 9843/10000 (98.4%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 7  : Avg. Loss: 0.0004	Accuracy: 59563/60000 (99.3%)
Accuracy: 9813/10000 (98.1%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 8  : Avg. Loss: 0.0003	Accuracy: 59613/60000 (99.4%)
Accuracy: 9860/10000 (98.6%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 9  : Avg. Loss: 0.0003	Accuracy: 59676/60000 (99.5%)
Accuracy: 9849/10000 (98.5%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 10 : Avg. Loss: 0.0002	Accuracy: 59698/60000 (99.5%)
Accuracy: 9860/10000 (98.6%)
Done!


In [17]:
if args.pretrained:
	model = LeNet_5().to(args.device)
	model.load_state_dict(torch.load('PTQ_LeNet.pth'))
	test(test_loader, model, args)
else:
	model = main(LeNet_5().to(args.device))
	torch.save(model.state_dict(), 'PTQ_LeNet.pth')

Accuracy: 9667/10000 (96.7%)


In [18]:
def model2fix(model, args):
	for name, _ in model.named_parameters():
		exec(f'model.{name}.data = flp2fixTensor(model.{name}.data, {args.full_bits}, {args.frac_bits})')
	return model

In [19]:
# def in2fix(images, full_width=args.full_bits, frac_width=args.frac_bits):
# 	dim_images = images.size()
# 	images = images.view(-1)
# 	for idx_image, image in enumerate(images):
# 		temp_css = flp2fix(image, full_width, frac_width)
# 		images[idx_image] = torch.tensor(temp_css.fFull)
# 		del temp_css
# 	return images.view(dim_images)

In [20]:
def quantFixForward(model, x, args):
    cmodel = copy.deepcopy(model).to(args.device)
    with torch.no_grad():

        act0 = cmodel.pool(cmodel.act(cmodel.conv1(x)))
        act0 = flp2fixTensor(act0, args.full_bits, args.frac_bits)
        
        act1 = cmodel.pool(cmodel.act(cmodel.conv2(act0)))
        act1 = flp2fixTensor(act1, args.full_bits, args.frac_bits)
        
        act2 = cmodel.act(cmodel.conv3(act1))
        act2 = flp2fixTensor(act2, args.full_bits, args.frac_bits)

        act3 = cmodel.flatten(act2)
        act3 = flp2fixTensor(act3, args.full_bits, args.frac_bits)
		
        act4 = cmodel.act(cmodel.fc1(act3))
        act4 = flp2fixTensor(act4, args.full_bits, args.frac_bits)
        
        act5 = cmodel.fc2(act4)
        act5 = flp2fixTensor(act5, args.full_bits, args.frac_bits)
        
    return cmodel, act0, act1, act2, act3, act4, act5

In [21]:
def testQuant(model, test_loader, args):
	qmodel = copy.deepcopy(model).to(args.device)
	qmodel = model2fix(qmodel, args)
	qmodel.eval()

	with torch.no_grad():
		loss_func = genLossFunc(args)
		loss, correct = 0, 0
		for batch_index, (image, label) in enumerate(test_loader):
			image, label = image.to(args.device), label.to(args.device)
			qmodel, act0, act1, act2, act3, act4, act5  = quantFixForward(qmodel, image, args)
			y = act5
			loss += loss_func(y, label).item()#*image.size(0)
			correct += (y.argmax(1) == label).type(torch.int).sum().item()
	correct_rate = 100 * correct / len(test_loader.dataset)
	print(f'Accuracy: {correct}/{len(test_loader.dataset)} ({correct_rate:>.1f}%) Loss: {loss/len(test_loader.dataset):.2f}')
	return qmodel, act0, act1, act2, act3, act4, act5

In [22]:
qmodel, act0, act1, act2, act3, act4, act5 = testQuant(model, test_loader, args)

Accuracy: 9661/10000 (96.6%) Loss: 0.00


In [23]:
def extractParams(model, args):
    for key in model.state_dict().keys():
        layer_name = key.split('.')[0]
        param_type = 'w' if 'weight' in key else 'b'
        for idx, params in enumerate(eval(f'qmodel.{key}.data')):
        #    print(idx)
        #    print(params.dim())
            with open(f'./mif/{layer_name}_{param_type}_{idx}.mif', 'w') as fh:
                if param_type == 'w':
                    if params.dim() == 3 :
                        for idx, dim1 in enumerate(params):
                            for idx, dim2 in enumerate(dim1):
                                for idx, param in enumerate(dim2):
                                    bin_param = flp2fix(param, args.full_bits, args.frac_bits).bFull
                                    fh.write(bin_param + ('\n','')[idx == len(params)-1])
                    elif params.dim() == 1: # fc2 layer
                        for idx, param in enumerate(params):
                            bin_param = flp2fix(param, args.full_bits, args.frac_bits).bFull
                            fh.write(bin_param + ('\n', '')[idx == len(params)-1])
                    
                #    for idx, param in enumerate(params):
                #        bin_param = flp2fix(param, args.full_bits, args.frac_bits).bFull
                #        fh.write(bin_param + ('\n','')[idx == len(params)-1])
                else:
                    bin_param = flp2fix(params, args.full_bits, args.frac_bits).bFull
                    fh.write(bin_param)

In [24]:
def genInputVector(test_loader, args):
	out_path = './vec'
	os.system(f'rm -rf {out_path};mkdir -p {out_path}')
	with open(f'{out_path}/labels.vec', 'w') as fh_labels:
		with open(f'{out_path}/images.vec', 'w') as fh_images:
			for batch_index, (images, labels) in enumerate(test_loader):
				for (image, label) in zip(images, labels):
					bin_label = flp2fix(label, args.full_bits, 0).bFull
					fh_labels.write(bin_label+'\n')
					for pixel in image.view(-1):
						bin_pixel = flp2fix(pixel, args.full_bits, args.frac_bits).bFull
						fh_images.write(bin_pixel+'\n')
                        

In [25]:
#if not args.pretrained:
extractParams(model, args)
genInputVector(test_loader, args)

In [26]:
test_data = datasets.MNIST(	root=args.data_path,
							train=False,
							download=True,
							transform=data_transform)

In [27]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: /home/jaeeun/dataset
    Split: Test
    StandardTransform
Transform: Compose(
               Resize(size=(32, 32), interpolation=PIL.Image.BILINEAR)
               ToTensor()
           )

In [28]:
test_data[0][0][0].size()

torch.Size([32, 32])

In [29]:
test_data.data.size()

torch.Size([10000, 28, 28])

In [30]:
test_iter = iter(test_loader)

In [31]:
imgs, labs = next(test_iter)

In [32]:
labs[0:32]

tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5, 4, 0, 7, 4, 0, 1, 3, 1])

In [33]:
labs.size()

torch.Size([64])

In [34]:
qmodel.fc1.weight[0][0:8]

tensor([ 0.01562500, -0.06640625,  0.03515625, -0.01171875,  0.08203125, -0.12109375, -0.01171875,  0.11328125], grad_fn=<SliceBackward0>)

In [35]:
model.conv1.weight

Parameter containing:
tensor([[[[ 3.45630556e-01,  4.49961007e-01,  5.31258941e-01,  5.59453726e-01,  5.24021506e-01],
          [ 4.85830247e-01,  6.16211116e-01,  7.02184618e-01,  7.14567602e-01,  6.50589764e-01],
          [ 5.66228449e-01,  6.98803782e-01,  7.70675838e-01,  7.58951485e-01,  6.70978010e-01],
          [ 5.50445318e-01,  6.61313534e-01,  7.06644952e-01,  6.71979725e-01,  5.74740529e-01],
          [ 4.43041593e-01,  5.17175198e-01,  5.33528864e-01,  4.87122267e-01,  4.02040064e-01]]],


        [[[ 9.32341293e-02,  2.15957433e-01, -8.39374810e-02,  2.11399645e-01, -4.80696484e-02],
          [ 1.70069352e-01,  3.84582691e-02,  1.41682699e-01,  1.15408339e-01, -1.47190243e-01],
          [-4.23519462e-02,  1.38037458e-01,  2.24406019e-01, -8.58325958e-02, -4.56176996e-02],
          [ 6.56774044e-02,  1.80821389e-01,  1.64696380e-01, -4.18815576e-02,  9.94063169e-02],
          [ 8.67636204e-02, -3.69882733e-02, -1.14126392e-01,  3.44202444e-02, -7.69143831e-03]]],




In [36]:
qmodel.conv1.weight[0][0]

tensor([[0.34375000, 0.44921875, 0.53125000, 0.55859375, 0.52343750],
        [0.48437500, 0.61328125, 0.69921875, 0.71093750, 0.64843750],
        [0.56250000, 0.69531250, 0.76953125, 0.75781250, 0.66796875],
        [0.54687500, 0.66015625, 0.70312500, 0.67187500, 0.57421875],
        [0.44140625, 0.51562500, 0.53125000, 0.48437500, 0.39843750]], grad_fn=<SelectBackward0>)

In [37]:
test_data[0][1]

7

In [38]:
test_data[0][0].size() # 1024개의 elements(32x32) 1차원으로 핀 것

torch.Size([1, 32, 32])

In [39]:
test_data[0][0][0][1]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [40]:
model.conv1.bias

Parameter containing:
tensor([ 0.01798622,  0.08375905,  0.04467494, -0.02210203,  0.10713725,  0.04762135], requires_grad=True)

In [41]:
qmodel.conv1.weight[0][0][0:5]

tensor([[0.34375000, 0.44921875, 0.53125000, 0.55859375, 0.52343750],
        [0.48437500, 0.61328125, 0.69921875, 0.71093750, 0.64843750],
        [0.56250000, 0.69531250, 0.76953125, 0.75781250, 0.66796875],
        [0.54687500, 0.66015625, 0.70312500, 0.67187500, 0.57421875],
        [0.44140625, 0.51562500, 0.53125000, 0.48437500, 0.39843750]], grad_fn=<SliceBackward0>)

In [42]:
qmodel.conv1.weight[0][0][0]

tensor([0.34375000, 0.44921875, 0.53125000, 0.55859375, 0.52343750], grad_fn=<SelectBackward0>)

In [43]:
qmodel.conv1.weight[0][0][1]

tensor([0.48437500, 0.61328125, 0.69921875, 0.71093750, 0.64843750], grad_fn=<SelectBackward0>)

In [44]:
qmodel.conv1.bias

Parameter containing:
tensor([ 0.01562500,  0.08203125,  0.04296875, -0.01953125,  0.10546875,  0.04687500], requires_grad=True)

In [45]:
def evaluation(train_loader):
    total, correct = 0, 0
    for data in train_loader:
        inputs, labels = data
        inputs, labels = inputs.to(args.device), labels.to(args.device)
        output = model(inputs)
        max_pred, pred = torch.max(output.data, dim=1)
        total += labels.size(0)
        correct += (pred == labels).sum().item()
    return 100 * correct / total

evaluation(train_loader)

96.78333333333333

In [46]:
def evaluation(test_loader):
    total, correct = 0, 0
    for data in test_loader:
        inputs, labels = data
        inputs, labels = inputs.to(args.device), labels.to(args.device)
        output = model(inputs)
        max_pred, pred = torch.max(output.data, dim=1)
        total += labels.size(0)
        correct += (pred == labels).sum().item()
    return 100 * correct / total

evaluation(test_loader)

96.67

In [47]:
for batch_index, (image, label) in enumerate(test_loader):
    image, label = image.to(args.device), label.to(args.device)

In [48]:
image.size()

torch.Size([16, 1, 32, 32])

In [49]:
a_input = image
a_input.size()

torch.Size([16, 1, 32, 32])

# Check

conv

In [50]:
def conv(X, filters,bias, stride=1, pad=0):
    n, c, h, w = X.shape # 1, 1, 32, 32
    n_f, _, filter_h, filter_w = filters.shape
    
    out_h = (h+2*pad-filter_h)//stride + 1
    out_w = (w+2*pad-filter_w)//stride + 1
    # add padding to height and width.
    in_X = F.pad(X,(0,0,0,0,pad,pad,pad,pad),"constant", 0)
    out  = torch.zeros((n, n_f, out_h, out_w))
    
    for i in range(n): # for each image.
        for c in range(n_f): # for each channel.
            for h in range(out_h): # slide the filter vertically.
                h_start = h * stride
                h_end = h_start + filter_h
                for w in range(out_w): # slide the filter horizontally.
                    w_start = w * stride
                    w_end = w_start + filter_w
                    # Element-wise multiplication.
                    out[i, c, h, w] = torch.sum(in_X[i,:,h_start:h_end,w_start:w_end]*filters[c])+bias[c]
    
    return out

ReLU_4D

In [51]:
def ReLU_4D(X):
    n, c, h, w = X.shape
    
    out = torch.zeros(n,c,h,w)
    
    for i in range(n): # for each image
        for ch in range(c): # for each channel
            for o_h in range(h): # for each height
                for o_w in range(w): # for each width
                    x = X[i, ch, o_h, o_w]
                    if x > 0:
                        out[i, ch, o_h, o_w] = x
                    else:
                        out[i, ch, o_h, o_w] = 0
                        
    return out

ReLU_2D

In [52]:
def ReLU_2D(X):
    n, c = X.shape
    
    out = torch.zeros(n, c)
    
    for i in range(n): # for each image
        for ch in range(c): # for each channel
            x = X[i, ch]
            if x > 0:
                out[i, ch] = x
            else:
                out[i, ch] = 0
                
    return out

AvgPool

In [53]:
def AvgPool2d(X,kernel_size,stride,pad=0):
    n, c, h, w = X.shape
    ker_w, ker_h = kernel_size
    
    out_h = (h + 2*pad - ker_h)//stride + 1
    out_w = (w + 2*pad - ker_w)//stride + 1
    
    out = torch.zeros(n,c,out_h,out_w)
    for i in range(n) : #for each image
        for ch in range(c) : #for each channel 
             for h in range(out_h) :
                    h_start = h * stride
                    h_end = h_start + ker_h
                    for w in range(out_w):
                        w_start = w * stride
                        w_end = w_start + ker_w
                        #element average
                        out[i, ch, h, w] = torch.mean(X[i,ch,h_start:h_end,w_start:w_end])
    
    return out

# checking

In [54]:
filters1 = qmodel.conv1.weight
filters2 = qmodel.conv2.weight
filters3 = qmodel.conv3.weight

In [55]:
bias1 = qmodel.conv1.bias
bias2 = qmodel.conv2.bias
bias3 = qmodel.conv3.bias

In [56]:
fc1_filters1 = qmodel.fc1.weight
fc2_filters2 = qmodel.fc2.weight

In [57]:
fc1_bias1 = qmodel.fc1.bias
fc2_bias2 = qmodel.fc2.bias

act0

In [58]:
layer_conv1 = conv(a_input, filters1, bias1, stride=1, pad=0)

In [59]:
layer_relu1 = ReLU_4D(layer_conv1)

In [60]:
layer_avgpool1 = AvgPool2d(layer_relu1, (2,2),2,pad=0)

In [110]:
layer_avgpool1.size()

torch.Size([16, 6, 14, 14])

In [109]:
act0.size()

torch.Size([16, 6, 14, 14])

In [63]:
fix_act0 = torch.zeros(16,6,14,14)

total = 0
num = 0

for a in range(16) :
    for b in range(6) :
        for c in range(14) :
            for d in range(14) :
                fix_act0[a][b][c][d] = flp2fix(layer_avgpool1[a][b][c][d],args.full_bits,args.frac_bits).fFull
                if (torch.equal(fix_act0[a][b][c][d],act0[a][b][c][d])==False):
                    print(fix_act0[a][b][c][d].item(), act0[a][b][c][d].item())
                    num += 1
                total += 1

print("number of different value is : {num}/{total}".format(num=num,total=total))

0.02734375 0.03125
number of different value is : 1/18816


In [64]:
torch.equal(fix_act0, act0)

False

act1

In [65]:
layer_conv2 = conv(fix_act0, filters2, bias2, stride=1,pad=0)

In [66]:
layer_relu2 = ReLU_4D(layer_conv2)

In [67]:
layer_avgpool2 = AvgPool2d(layer_relu2, (2,2),2,pad=0)

In [68]:
layer_avgpool2.shape

torch.Size([16, 16, 5, 5])

In [108]:
act1.size()

torch.Size([16, 16, 5, 5])

In [70]:
fix_act1 = torch.zeros(16,16,5,5)

total = 0
num = 0

for a in range(16) :
    for b in range(16) :
        for c in range(5) :
            for d in range(5) :
                fix_act1[a][b][c][d] = flp2fix(layer_avgpool2[a][b][c][d],args.full_bits,args.frac_bits).fFull
                if (torch.equal(fix_act1[a][b][c][d],act1[a][b][c][d])==False):
                    num += 1
                    print(fix_act1[a][b][c][d].item(), act1[a][b][c][d].item())
                total += 1

print("number of different value is : {num}/{total}".format(num=num,total=total))

number of different value is : 0/6400


In [71]:
torch.equal(fix_act1, act1)

True

act2

In [80]:
layer_conv3 = conv(fix_act1, filters3, bias3, stride=1,pad=0)

In [81]:
layer_relu3 = ReLU_4D(layer_conv3)

In [107]:
act2.size()

torch.Size([16, 120, 1, 1])

In [84]:
fix_act2 = torch.zeros(16,120,1,1)

total = 0
num = 0

for a in range(16) :
    for b in range(120) :
        for c in range(1) :
            for d in range(1) :
                fix_act2[a][b][c][d] = flp2fix(layer_relu3[a][b][c][d],args.full_bits,args.frac_bits).fFull
                if (torch.equal(fix_act2[a][b][c][d],act2[a][b][c][d])==False):
                    num += 1
                    print(fix_act2[a][b][c][d].item(), act2[a][b][c][d].item())
                total += 1

print("number of different value is : {num}/{total}".format(num=num,total=total))

number of different value is : 0/1920


In [85]:
torch.equal(fix_act2, act2)

True

act3(flatten)

In [87]:
layer_flatten = fix_act2.view(16,120)

In [105]:
act3.size()

torch.Size([16, 120])

In [89]:
fix_act3 = torch.zeros(16,120)

total = 0
num = 0

for a in range(16) :
    for b in range(120) :
        fix_act3[a][b] = flp2fix(layer_flatten[a][b],args.full_bits,args.frac_bits).fFull
        if (torch.equal(fix_act3[a][b],act3[a][b])==False):
            num += 1
            print(fix_act3[a][b].item(), act3[a][b].item())
        total += 1

print("number of different value is : {num}/{total}".format(num=num,total=total))

number of different value is : 0/1920


In [90]:
torch.equal(fix_act3, act3)

True

act4(fc1)

In [95]:
layer_fc1 = torch.matmul(fix_act3, fc1_filters1.t()) + fc1_bias1

In [97]:
layer_relu_fc1 = ReLU_2D(layer_fc1)

In [104]:
act4.size()

torch.Size([16, 84])

In [99]:
fix_act4 = torch.zeros(16,84)

total = 0
num = 0

for a in range(16) :
    for b in range(84) :
        fix_act4[a][b] = flp2fix(layer_relu_fc1[a][b],args.full_bits,args.frac_bits).fFull
        if (torch.equal(fix_act4[a][b],act4[a][b])==False):
            num += 1
            print(fix_act4[a][b].item(), act4[a][b].item())
        total += 1

print("number of different value is : {num}/{total}".format(num=num,total=total))

number of different value is : 0/1344


In [113]:
torch.equal(fix_act4, act4)

True

act5(fc2)

In [101]:
layer_fc2 = torch.matmul(fix_act4, fc2_filters2.t()) + fc2_bias2

In [103]:
act5.size()

torch.Size([16, 10])

In [112]:
fix_act5 = torch.zeros(16,10)

total = 0
num = 0

for a in range(16) :
    for b in range(10) :
        fix_act5[a][b] = flp2fix(layer_fc2[a][b],args.full_bits,args.frac_bits).fFull
        if (torch.equal(fix_act5[a][b],act5[a][b])==False):
            num += 1
            print(fix_act5[a][b].item(), act5[a][b].item())
        total += 1

print("number of different value is : {num}/{total}".format(num=num,total=total))

number of different value is : 0/160


In [114]:
torch.equal(fix_act5, act5)

True