# Module

In [1]:
import argparse
from tqdm import tqdm_notebook as tq
import tqdm
import os, time, math, copy
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from collections import namedtuple
import matplotlib.pyplot as plt
import random
import time
import datetime

torch.set_printoptions(precision=8, linewidth=50000)
import warnings
warnings.filterwarnings(action='ignore')

# Print Colors

In [2]:
BLACK	= '\033[30m'
RED		= '\033[31m'
GREEN	= '\033[32m'
YELLOW	= '\033[33m'
BLUE	= '\033[34m'
MAGENTA	= '\033[35m'
CYAN	= '\033[36m'
RESET	= '\033[0m'
SEL		= '\033[7m'

In [3]:
def int2bin(iIn,iBW):
    iBW = iBW + 1
    if iIn >= 0:
        bOut = bin(iIn).replace('0b','').rjust(iBW,'0')
    else :
        bOut = bin(iIn & (pow(2,iBW)-1)).replace('0b','').rjust(iBW,'1')
    return bOut[1:]

In [4]:
def XOR(iA,iB):
    if iA != iB :
        iOut = '1'
    else : 
        iOut = '0'
    return iOut

In [5]:
def snum(a):
    if a >= 0 :
        return '0'
    else :
        return '1'

In [6]:
class fxp:
    def __init__(self, bIn, iBWF):
        self.iFullBW = len(bIn)
        self.iIntgBW = self.iFullBW - iBWF
        self.bSign = bIn[0]
        self.bIntg = bIn[:self.iIntgBW]
        self.bFrac = bIn[self.iIntgBW:]
        self.fFull = 0
        try:
            for idx, bit in enumerate(bIn):
                if idx == 0:
                    self.fFull = self.fFull + int(bit,2) * -pow(2, self.iIntgBW - 1)
                else:
                    self.fFull = self.fFull + int(bit,2) * pow(2, self.iIntgBW - 1 - idx)
        except:
            print(bIn)
        self.dispFull = self.bIntg +"."+ self.bFrac 
        return

In [7]:
class flp2fix:
    def __init__(self, fIn, iBW, iBWF):
        self.fMin = - 2 ** (iBW - iBWF - 1)
        self.fMax = (2 ** (iBW-1) - 1) * (2 ** -iBWF)
        self.fResol = 2 ** -iBWF
        #if fIn < self.fMin or fIn > self.fMax:
            #print(f'({fIn}): Out of input range ({self.fMax}/{self.fMin}) during flp -> fix converting ')
        self.iBW = iBW
        self.iBWI = iBW - iBWF
        self.iBWF = iBWF

        self.iFLP2INT = abs(int(fIn * 2 ** iBWF))
        if fIn < 0:
            self.iFLP2INT = 2 ** (iBW-1) - self.iFLP2INT

        if fIn >= 0:
            self.bFull = bin(self.iFLP2INT)[2:].rjust(iBW, '0')
        else:
            self.bFull = '1'+bin(self.iFLP2INT)[2:].rjust(iBW-1, '0')
            if len(self.bFull) > iBW:
                self.bFull = '0' * iBW

        self.cssFxp = fxp(self.bFull, self.iBWF)
        self.bSign = self.cssFxp.bSign
        self.bIntg = self.cssFxp.bIntg
        self.bFrac = self.cssFxp.bFrac
        self.fFull = self.cssFxp.fFull
        return

In [8]:
def flp2fixTensor(fIn, iBW, iBWF):
    fMin = - 2 ** (iBW - iBWF - 1)
    fMax = (2 ** (iBW-1) - 1) * (2 ** -iBWF)
    fList = []
    for aTensor in fIn.view(-1):
        fList.append(flp2fix(aTensor, iBW, iBWF).fFull)
    return torch.tensor(fList).view(fIn.size())

# User Define Variables

In [9]:
data_path = '~/dataset'

In [10]:
parser = argparse.ArgumentParser(description='PyTorch for MNIST dataset')
parser.add_argument('--device', type=str, default='cpu', help='Device')
parser.add_argument('--shuffle', action='store_true', default=False, help='enables data shuffle')
parser.add_argument('--dataset', type=str, default='mnist', help='training dataset')
parser.add_argument('--data_path', type=str, default=data_path, help='path to MNIST')
parser.add_argument('--batch_size', type=int, default=64, help='batch size')
parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
parser.add_argument('--optimizer', type=str, default='adam', help='optimizer')
parser.add_argument('--loss_func', type=str, default='cel', help='optimizer')
parser.add_argument('--quant_opt', type=str, default='asym', help='Type of Quantization')
parser.add_argument('--full_bits', type=int, default=16, help='Number of Quantization Bits')
parser.add_argument('--frac_bits', type=int, default=7, help='Number of Quantization Bits')
#parser.add_argument('--pretrained', type=bool, default=True, help='Pretrained Model')
parser.add_argument('--act_quant', type=bool, default=False, help='Activation Quantization')
parser.add_argument('--disp', type=bool, default=False, help='Display Model Information')
parser.add_argument('--bBW',type=int,default=7,help='bit number')
args = parser.parse_args(args=[])

# Preparing Data

In [11]:
kwargs = {'num_workers': 1, 'pin_memory': True} if args.device == 'cuda' else {}
if args.dataset == 'mnist':
	train_loader = torch.utils.data.DataLoader(
		dataset=datasets.MNIST(
			root=args.data_path,
			train=True,
			download=True,
			transform=transforms.ToTensor()
		),
		batch_size=args.batch_size,
		shuffle=args.shuffle,
		**kwargs
	)

	test_loader = torch.utils.data.DataLoader(
		dataset=datasets.MNIST(
			root=args.data_path,
			train=False,
			download=True,
			transform=transforms.ToTensor()
		),
		batch_size=args.batch_size,
		shuffle=args.shuffle,
		**kwargs
	)

# Build Model

In [12]:
class MLP(nn.Module):
	def __init__(self):
		super(MLP, self).__init__()
		self.flatten = nn.Flatten()
		self.fc1 = nn.Linear(28*28, 32)
		self.relu1 = nn.ReLU()
		self.fc2 = nn.Linear(32, 64)
		self.relu2 = nn.ReLU()
		self.fc3 = nn.Linear(64, 10)
		
	def forward(self, x):
		x = self.flatten(x)
		x = self.fc1(x)
		x = self.relu1(x)
		x = self.fc2(x)
		x = self.relu2(x)
		logits = self.fc3(x)
		return logits

In [13]:
def genOptimizer(model, args):
	if args.optimizer == 'sgd':
		optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
	if args.optimizer == 'adam':
		optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
	return optimizer

def genLossFunc(args):
	if args.loss_func == 'cel':
		loss_func = nn.CrossEntropyLoss()
	return loss_func

In [14]:
def train(train_loader, model, epoch, args):
	model.train()
	loss_func = genLossFunc(args)
	optimizer = genOptimizer(model, args)
	max_batch_index = int(np.floor(len(train_loader.dataset)/args.batch_size))
	running_loss = 0
	for batch_index, (image, label) in enumerate(tq(train_loader, desc='Train', leave=False)):
		image, label = image.to(args.device), label.to(args.device)
		pred = model(image)
		loss = loss_func(pred, label)
		running_loss += loss.item()#*image.size(0)

		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
				
	print(f'Epoch {epoch+1:<3d}: Avg. Loss: {running_loss/len(train_loader.dataset):.4f}', end = '\t')

In [15]:
def test(test_loader, model, args):
	model.eval()
	with torch.no_grad():
		loss_func = genLossFunc(args)
		loss, correct = 0, 0
# 		for batch_index, (image, label) in enumerate(tq(test_loader, desc='Test', leave=False)):
		for batch_index, (image, label) in enumerate(test_loader):
			image, label = image.to(args.device), label.to(args.device)
			pred = model(image)
			loss += loss_func(pred, label).item()#*image.size(0)
			correct += (pred.argmax(1) == label).type(torch.int).sum().item()
	loss /= len(test_loader.dataset)
	correct_rate = 100 * correct / len(test_loader.dataset)
	print(f'Accuracy: {correct}/{len(test_loader.dataset)} ({correct_rate:>.1f}%)')

In [16]:
def main(model):
	for epoch in range(args.epochs):
		train(train_loader, model, epoch, args)
		test(test_loader, model, args)
	print("Done!")
	return model

In [17]:
#if args.pretrained:
#    if os.path.isfile('preTrainedModel.pth'):
#        model = MLP().to(args.device)
#        model.load_state_dict(torch.load('preTrainedModel.pth'))
#        test(test_loader, model, args)
#    else:
#        model = main(MLP().to(args.device))
#        torch.save(model.state_dict(), 'preTrainedModel.pth')
#else:
model = main(MLP().to(args.device))
torch.save(model.state_dict(), 'preTrainedModel.pth')

Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 1  : Avg. Loss: 0.0071	Accuracy: 9224/10000 (92.2%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 2  : Avg. Loss: 0.0035	Accuracy: 9413/10000 (94.1%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 3  : Avg. Loss: 0.0027	Accuracy: 9487/10000 (94.9%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 4  : Avg. Loss: 0.0022	Accuracy: 9530/10000 (95.3%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 5  : Avg. Loss: 0.0019	Accuracy: 9557/10000 (95.6%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 6  : Avg. Loss: 0.0016	Accuracy: 9577/10000 (95.8%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 7  : Avg. Loss: 0.0015	Accuracy: 9594/10000 (95.9%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 8  : Avg. Loss: 0.0013	Accuracy: 9615/10000 (96.2%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 9  : Avg. Loss: 0.0012	Accuracy: 9622/10000 (96.2%)


Train:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 10 : Avg. Loss: 0.0011	Accuracy: 9630/10000 (96.3%)
Done!


In [18]:
print(model.fc1.weight.shape)

torch.Size([32, 784])


In [19]:
model.fc1.bias

Parameter containing:
tensor([ 0.03321286, -0.09126852, -0.04505986, -0.14803056,  0.03290765,  0.32821697,  0.29648983, -0.06592619, -0.15352549,  0.13001631,  0.08683386,  0.04764682, -0.00883541, -0.06734475,  0.11826833,  0.17528066,  0.03752080,  0.06540728,  0.08449285,  0.11017059,  0.23465872,  0.01827120, -0.04540134, -0.11596186, -0.03758489,  0.18912387,  0.07242502,  0.18917079,  0.14756361,  0.22807184,  0.00145409, -0.04245985], requires_grad=True)

# SNG

In [20]:
def Comp(a,lfsr,snum):
    for com in range(0,len(a)):
        oA = '0'
        if a[com]!=lfsr[com]:
            if(int(a[com]) > int(lfsr[com])):
                oA = '1'
            break
    return XOR(oA,snum)

In [21]:
def perm(a):
    al = len(a)
    blist = []
    for i in range(al) :
        #print(al-i-1)
        blist.append(a[al-i-1])
    
    b = "".join(blist)
    
    return b

In [22]:
class LFSR7:
    def Random(self):
        self.b0 = eval(f'str(random.randint(0,1))')
        self.b1 = eval(f'str(random.randint(0,1))')
        self.b2 = eval(f'str(random.randint(0,1))')
        self.b3 = eval(f'str(random.randint(0,1))')
        self.b4 = eval(f'str(random.randint(0,1))')
        self.b5 = eval(f'str(random.randint(0,1))')
        self.b6 = eval(f'str(random.randint(0,1))')
        return self.b0 + self.b1 + self.b2 + self.b3 + self.b4 + self.b5 + self.b6
    
    def Normal(self,stream):
        self.b0 = XOR(int(stream[5]),int(stream[6]))
        self.b1 = stream[0]
        self.b2 = stream[1]
        self.b3 = stream[2]
        self.b4 = stream[3]
        self.b5 = stream[4]
        self.b6 = stream[5]
        
        return self.b0 + self.b1 + self.b2 + self.b3 + self.b4 + self.b5 + self.b6
    
    def Allzero(self):
        self.b0 = '0'
        self.b1 = '0'
        self.b2 = '0'
        self.b3 = '0'
        self.b4 = '0'
        self.b5 = '0'
        self.b6 = '0'
        
        return self.b0 + self.b1 + self.b2 + self.b3 + self.b4 + self.b5 + self.b6

In [23]:
def LFSRlist7():
    lfsr = LFSR7()
    lfsrlist = []
    for k in range(2**(args.bBW)-1): #lfsr number generating
        if k == 0:
            lfsrlist.append(lfsr.Random())
        else :
            lfsrlist.append(lfsr.Normal(lfsrlist[k-1]))
        if (k == 2**(args.bBW)-2):
            lfsrlist.append(lfsr.Allzero())
    
    if (args.bBW) != args.frac_bits :
        if args.bBW < args.frac_bits :
            for i in range(len(lfsrlist)):
                lfsrlist[i] = lfsrlist[i] + (args.frac_bits-args.bBW)*'0'
        else :
            print("it can't work")
            return 0
    
    return lfsrlist

In [24]:
def SNG(iIN,lfsr):

    sNUM = snum(iIN)
    
    bIN = flp2fix(iIN,args.full_bits,args.frac_bits).bFull
    oAlist = []
    
    for k in range(2**(args.bBW)): #lfsr number generating
        lNUM = lfsr[k]
        a = Comp(bIN[-(args.frac_bits):],lNUM,sNUM)
        oAlist.append(a) #comparator of input a
    
    oAlist.insert(0,sNUM)
    sA = "".join(oAlist)
    if sNUM == '1' and bIN == args.full_bits*'0' :
        return '1'+(2**(args.bBW))*'0'
    else :
        return sA

In [25]:
def SNG_P(iIN,lfsr):
    sNUM = snum(iIN)
    
    bIN = flp2fix(iIN,args.full_bits,args.frac_bits).bFull
    oAlist = []
    
    for k in range(2**(args.bBW)): #lfsr number generating
        if (args.bBW == args.frac_bits) :    
            lNUM = perm(lfsr[k])
        elif (args.bBW < args.frac_bits) :
            lNUM = perm(lfsr[k][:args.bBW])+(args.frac_bits-args.bBW)*"0"
        a = Comp(bIN[-(args.frac_bits):],lNUM,sNUM)
        oAlist.append(Comp(bIN[-(args.frac_bits):],lNUM,sNUM)) #comparator of input a
    
    oAlist.insert(0,sNUM)
    sA = "".join(oAlist)
    if sNUM == '1' and bIN == args.full_bits*'0' :
        return '1'+(2**(args.bBW))*'0'
    else :
        return sA

In [26]:
def SNGnumpy(fIn,lfsr):
    start = time.time()
    sList = []
    for aNumpy in fIn.view(-1):
        sList.append(SNG(float(aNumpy),lfsr))
    end = time.time()
    sec = (end-start)
    result_list = str(datetime.timedelta(seconds=sec)).split(".")
    print(f'SNGnumpy : {result_list[0]}')                 
    return np.array(sList).reshape(fIn.size())

In [27]:
def SNGpnumpy(fIn,lfsr):
    start = time.time()
    sList = []
    for aNumpy in fIn.view(-1):
        sList.append(SNG_P(float(aNumpy),lfsr))
    end = time.time()
    sec = (end-start)
    result_list = str(datetime.timedelta(seconds=sec)).split(".")
    print(f'SNGpnumpy : {result_list[0]}')                   
    return  np.array(sList).reshape(fIn.size())

In [28]:
def CountOne(nIn):
    nlist = []
    for num in nIn.reshape(-1):
        n = 0
        for a in num:
            if a == '1' :
                n += 1
        if a[0] == '1' :
            nlist.append(n-1)
        else :
            nlist.append(n)
    return torch.tensor(nlist).view(nIn.shape)

In [29]:
def defSign(nIn):
    nlist = []
    for num in nIn.reshape(-1):
        if num[0] == '1' :
            nlist.append(-1)
        else :
            nlist.append(1)
    return torch.tensor(nlist).view(nIn.shape)

In [30]:
def mul(a,b):
    al = len(a)
    bl = len(b)
    
    outlist = []
    
    if al != bl :
        print("length of string is different")
        return 0
    
    outlist.append(XOR(a[0],b[0]))
    
    for i in range(al-1) :
        outlist.append(str(int(a[i+1]) & int(b[i+1])))
    
    #print(outlist)
    out = "".join(outlist)
    
    return out    

## Inter Blocks : Output Revision(OUR) Scheme

In [31]:
def pos(SN):
    return SN[0] == '0'
def neg(SN):
    return SN[0] == '1'
def counter(x):
    return x.count('1')

In [32]:
def gen_acclist(bBW, IN):
    A_list=[]
    acc_A=0
    acc_Alist=[]
    for i in range(0,2**(bBW-1)):
        A = 0
        for k in range(len(IN)):
            if IN[k][i] == '1':
                A += 1
        A_list.append(A)
        acc_A += A_list[i]
        acc_Alist.append(acc_A)     
    
    return acc_Alist

In [33]:
def gen_So(bBW, diff):
    So_list=[]
    Ao_list=[]
    global So
    for o in range(2**(bBW-1)):
        if o == 0:
            Ao = 0 # first A_o is fixed
            if diff[o] > 0:
                So = 1
            else:
                So = 0
            Ao_list.append(Ao)
            So_list.append(So)
        else:
            Ao += So_list[o-1]
            Ao_list.append(Ao)
            if diff[o] > Ao_list[o]:
                So = 1
            elif diff[o] <= Ao_list[o]:
                So = 0
            So_list.append(So)
    
    return So_list

In [34]:
def block(IN):
    BLOCK1=[]
    BLOCK2=[]
    BLOCK3=[]
    BLOCK4=[]
    BLOCK5=[]
    BLOCK6=[]
    BLOCK7=[]
    BLOCK8=[]
    for i in range(len(IN)):
        block1 = IN[i][1:17]
        block2 = IN[i][17:33]
        block3 = IN[i][33:49]
        block4 = IN[i][49:65]
        block5 = IN[i][65:81]
        block6 = IN[i][81:97]
        block7 = IN[i][97:113]
        block8 = IN[i][113:]
        BLOCK1.append(block1)
        BLOCK2.append(block2)
        BLOCK3.append(block3)
        BLOCK4.append(block4)
        BLOCK5.append(block5)
        BLOCK6.append(block6)
        BLOCK7.append(block7)
        BLOCK8.append(block8)
    return BLOCK1, BLOCK2, BLOCK3, BLOCK4, BLOCK5, BLOCK6, BLOCK7, BLOCK8

In [35]:
def OUR(iA): # bBW = 8
    global diff1, diff2, diff3, diff4, diff5, diff6, diff7, diff8, sign
  
    # sorting random input bit-stream(positive/negative)
    pos_IN = list(filter(pos, iA))
    neg_IN = list(filter(neg, iA))

    # Block devision
    pBLOCK1, pBLOCK2, pBLOCK3, pBLOCK4, pBLOCK5, pBLOCK6, pBLOCK7, pBLOCK8 = block(pos_IN)
    nBLOCK1, nBLOCK2, nBLOCK3, nBLOCK4, nBLOCK5, nBLOCK6, nBLOCK7, nBLOCK8 = block(neg_IN)

    # generate list of number of accumulated 1s
    Ap1_list = gen_acclist(5, pBLOCK1)
    An1_list = gen_acclist(5, nBLOCK1)
    Ap2_list = gen_acclist(5, pBLOCK2)
    An2_list = gen_acclist(5, nBLOCK2)
    Ap3_list = gen_acclist(5, pBLOCK3)
    An3_list = gen_acclist(5, nBLOCK3)
    Ap4_list = gen_acclist(5, pBLOCK4)
    An4_list = gen_acclist(5, nBLOCK4)
    Ap5_list = gen_acclist(5, pBLOCK5)
    An5_list = gen_acclist(5, nBLOCK5)
    Ap6_list = gen_acclist(5, pBLOCK6)
    An6_list = gen_acclist(5, nBLOCK6)
    Ap7_list = gen_acclist(5, pBLOCK7)
    An7_list = gen_acclist(5, nBLOCK7)
    Ap8_list = gen_acclist(5, pBLOCK8)
    An8_list = gen_acclist(5, nBLOCK8)

    # determine sign of output
    if Ap1_list[-1]+Ap2_list[-1]+Ap3_list[-1]+Ap4_list[-1] > An1_list[-1]+An2_list[-1]+An3_list[-1]+An4_list[-1]:
        diff1 = [x-y for x,y in zip(Ap1_list, An1_list)]
        diff2 = [x-y for x,y in zip(Ap2_list, An2_list)]
        diff3 = [x-y for x,y in zip(Ap3_list, An3_list)]
        diff4 = [x-y for x,y in zip(Ap4_list, An4_list)]
        diff5 = [x-y for x,y in zip(Ap5_list, An5_list)]
        diff6 = [x-y for x,y in zip(Ap6_list, An6_list)]
        diff7 = [x-y for x,y in zip(Ap7_list, An7_list)]
        diff8 = [x-y for x,y in zip(Ap8_list, An8_list)]
        sign  = ['0']        
    elif Ap1_list[-1]+Ap2_list[-1]+Ap3_list[-1]+Ap4_list[-1] < An1_list[-1]+An2_list[-1]+An3_list[-1]+An4_list[-1]:
        diff1 = [x-y for x,y in zip(An1_list, Ap1_list)]
        diff2 = [x-y for x,y in zip(An2_list, Ap2_list)]
        diff3 = [x-y for x,y in zip(An3_list, Ap3_list)]
        diff4 = [x-y for x,y in zip(An4_list, Ap4_list)]
        diff5 = [x-y for x,y in zip(An5_list, Ap5_list)]
        diff6 = [x-y for x,y in zip(An6_list, Ap6_list)]
        diff7 = [x-y for x,y in zip(An7_list, Ap7_list)]
        diff8 = [x-y for x,y in zip(An8_list, Ap8_list)]
        sign  = ['1']
    
    So1 = gen_So(5, diff1)
    So2 = gen_So(5, diff2)
    So3 = gen_So(5, diff3)
    So4 = gen_So(5, diff4)
    So5 = gen_So(5, diff5)
    So6 = gen_So(5, diff6)
    So7 = gen_So(5, diff7)
    So8 = gen_So(5, diff8)
    So_list = So1 + So2 + So3 + So4 + So5 + So6 + So7 + So8

    result = list(map(str, So_list))
    tempout = ''.join(result)

    p = abs((Ap1_list[-1]+Ap2_list[-1]+Ap3_list[-1]+Ap4_list[-1]+Ap5_list[-1]+Ap6_list[-1]+Ap7_list[-1]+Ap8_list[-1])-(An1_list[-1]+An2_list[-1]+An3_list[-1]+An4_list[-1]+An5_list[-1]+An6_list[-1]+An7_list[-1]+An8_list[-1]))
    q = counter(tempout)

    p_list=[]
    q_list=[counter(tempout)]
    out=[]
    for k in range(2**7):
        p_list.append(p)
        if q < p:
            q += 1
            out.append('1')
        elif q > p:
            q -= 1
            out.append('0')
        else:
            q = p
            out.append(tempout[k])
        q_list.append(q)
    sout = sign + out
    output = ''.join(sout)
    
    return output

In [36]:
def defSign1(nIn):
    if nIn[0] == '1' :
        return -1
    else :
        return 1

In [37]:
def CountOne1(nIn):
    n = 0
    for num in nIn:
        if num == '1' :
            n += 1
    if nIn[0] == '1' :
        return n-1
    else :
        return n

In [38]:
def S2None(sIn,SF): # not using
    s = defSign1(sIn)
    o = (CountOne1(sIn)/(2**args.bBW))*SF*s
    return o

In [39]:
def macNumpy(aIn,bIn,aSF,wSF):
    mList = []
    out = 0
    result = []
    start = time.time()
    for i in range(aIn.shape[0]): # 64
        for j in range(bIn.T.shape[1]): # 32 
            acc = []
            for k in range(aIn.shape[1]): # 784
                acc.append(mul((aIn[i][k].astype(str)),(bIn.T)[k][j].astype(str))) 
            mList.append(acc)
    for k in range(aIn.shape[0]*bIn.T.shape[1]):
        out = S2None(OUR(mList[k]),aSF*wSF)
        result.append(out)
    end = time.time()
    sec = (end-start)
    result_list = str(datetime.timedelta(seconds=sec)).split(".")
    print(aIn.shape, bIn.shape)
    print(f'macNumpy : {result_list[0]}')
    print(len(mList)) # 64*32=2048
    return torch.tensor(result).view(aIn.shape[0], bIn.T.shape[1]) 

In [40]:
def bin_macNumpy(aIn,bIn,aSF,wSF):
    start = time.time()
    mList = []
    for i in range(aIn.shape[0]): # 64
        for j in range(bIn.T.shape[1]): #32
            sum = 0
            for k in range(aIn.shape[1]): #784
                sum += S2None(mul((aIn[i][k].astype(str)),(bIn.T)[k][j].astype(str)),aSF*wSF)
            mList.append(sum)
    end = time.time()
    sec = (end-start)
    result_list = str(datetime.timedelta(seconds=sec)).split(".")
    print(f'mulNumpy : {result_list[0]}')
    print(aIn.shape, bIn.shape)
    print(len(mList))
    return torch.tensor(mList).view(aIn.shape[0],bIn.T.shape[1])

In [41]:
model.fc1.weight.shape[1], model.fc1.weight.T.shape[1]

(784, 32)

In [42]:
model.fc1.weight.shape

torch.Size([32, 784])

In [43]:
model.fc3.bias.size()

torch.Size([10])

In [44]:
model

MLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=784, out_features=32, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=32, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)

## Find max, min 

In [45]:
def findMaxMin(data):
    start = time.time()
    
    max = torch.max(data)
    min = torch.min(data)
    SF=torch.max(abs(max),abs(min)).item()
    
    end = time.time()
    sec = (end-start)
    result_list = str(datetime.timedelta(seconds=sec)).split(".")
    print(f'findMaxMin : {result_list[0]}')
    
    return SF

## Fixed model

In [46]:
def model2fix(model, args):
	for name, _ in model.named_parameters():
		exec(f'model.{name}.data = flp2fixTensor(model.{name}.data, {args.full_bits}, {args.frac_bits})')
	return model

In [47]:
def N2S2N(model, iX, iW, iB, args):
    lfsr = LFSRlist7()
    
    xSF = findMaxMin(iX)
    wSF = findMaxMin(iW)
    
    x = SNGnumpy(iX/xSF,lfsr)
    w = SNGpnumpy(iW/wSF,lfsr)
    
    sout = macNumpy(x,w,xSF,wSF) + iB
    
    return sout

In [48]:
def bin_N2S2N(model, iX, iW, iB, args):
    lfsr = LFSRlist7()
    
    xSF = findMaxMin(iX)
    wSF = findMaxMin(iW)
    
    x = SNGnumpy(iX/xSF,lfsr)
    w = SNGpnumpy(iW/wSF,lfsr)
    
    sout = bin_macNumpy(x,w,xSF,wSF) + iB
    
    return sout

In [49]:
def quantFixForward(model, x, args):
    cmodel = copy.deepcopy(model).to(args.device)
    
    with torch.no_grad():
        i0 = cmodel.flatten(x)
        act0 = bin_N2S2N(cmodel,i0,cmodel.fc1.weight,cmodel.fc1.bias,args)
        print("act0 successed")
        
        i1 = model.relu1(act0)
        act1 = N2S2N(cmodel,i1,cmodel.fc2.weight,cmodel.fc2.bias,args)
        print("act1 successed")
        
        i2 = cmodel.relu2(act1)
        act2 = bin_N2S2N(cmodel,i2,cmodel.fc3.weight,cmodel.fc3.bias,args)
        print("act2 successed")
        
        act3 = flp2fixTensor(act2, args.full_bits, args.frac_bits)
        print("act3 successed")
    return cmodel, act0, act1, act2, act3 

In [50]:
def testQuant(model, test_loader, args):
    
    start = time.time()
    
    qmodel = copy.deepcopy(model).to(args.device)
    qmodel = model2fix(qmodel, args)
    qmodel.eval()
    
    with torch.no_grad():
        loss_func = genLossFunc(args)
        loss, correct = 0, 0
        for batch_index, (image, label) in enumerate(tq(test_loader,desc='Test',leave=False)):
            start = time.time()
            image, label = image.to(args.device), label.to(args.device)
            qmodel, act0, act1, act2, act3  = quantFixForward(qmodel, image, args)
            y = act3
            loss += loss_func(y, label).item()#*image.size(0)
            correct += (y.argmax(1) == label).type(torch.int).sum().item()
            end = time.time()
            sec = end - start
            result_list = str(datetime.timedelta(seconds=sec)).split(".")
            print(f'image {batch_index} time  : {result_list[0]}')
    correct_rate = 100 * correct / len(test_loader.dataset)
    print(f'Accuracy: {correct}/{len(test_loader.dataset)} ({correct_rate:>.1f}%) Loss: {loss/len(test_loader.dataset):.2f}')
    
    end = time.time()
    sec = (end-start)
    result_list = str(datetime.timedelta(seconds=sec)).split(".")
    print(f'Total time is : {result_list[0]}')
    return qmodel, act0, act1, act2, act3

In [51]:
qmodel, act0, act1, act2, act3 = testQuant(model, test_loader, args)

Test:   0%|          | 0/157 [00:00<?, ?it/s]

findMaxMin : 0:00:00
findMaxMin : 0:00:00
SNGnumpy : 0:00:05
SNGpnumpy : 0:00:07
mulNumpy : 0:01:28
(64, 784) (32, 784)
2048
act0 successed
findMaxMin : 0:00:00
findMaxMin : 0:00:00
SNGnumpy : 0:00:00
SNGpnumpy : 0:00:00
(64, 32) (64, 32)
macNumpy : 0:00:08
4096
act1 successed
findMaxMin : 0:00:00
findMaxMin : 0:00:00
SNGnumpy : 0:00:00
SNGpnumpy : 0:00:00
mulNumpy : 0:00:02
(64, 64) (10, 64)
640
act2 successed
act3 successed
image 0 time  : 0:01:53
findMaxMin : 0:00:00
findMaxMin : 0:00:00
SNGnumpy : 0:00:05
SNGpnumpy : 0:00:07
mulNumpy : 0:01:28
(64, 784) (32, 784)
2048
act0 successed
findMaxMin : 0:00:00
findMaxMin : 0:00:00
SNGnumpy : 0:00:00
SNGpnumpy : 0:00:00
(64, 32) (64, 32)
macNumpy : 0:00:08
4096
act1 successed
findMaxMin : 0:00:00
findMaxMin : 0:00:00
SNGnumpy : 0:00:00
SNGpnumpy : 0:00:00
mulNumpy : 0:00:02
(64, 64) (10, 64)
640
act2 successed
act3 successed
image 1 time  : 0:01:53
findMaxMin : 0:00:00
findMaxMin : 0:00:00
SNGnumpy : 0:00:05
SNGpnumpy : 0:00:07
mulNumpy : 

## Check

### ReLU_2D

In [52]:
def ReLU_2D(X):
    n, c = X.shape
    
    out = torch.zeros(n,c)
    
    for i in range(n):
        for ch in range(c):
            x = X[i,ch]
            if x > 0:
                out[i, ch] = x
            else:
                out[i, ch] = 0
    return out

In [53]:
qmodel = copy.deepcopy(model).to(args.device)
for batch_index, (image, label) in enumerate(tq(test_loader,desc='Test',leave=False)):
    image, label = image.to(args.device), label.to(args.device)
    i0 = qmodel.flatten(image)

Test:   0%|          | 0/157 [00:00<?, ?it/s]

In [54]:
i0.shape

torch.Size([16, 784])

In [55]:
a_input = image
a_input.size()

torch.Size([16, 1, 28, 28])

In [56]:
fc1_weight = qmodel.fc1.weight
fc2_weight = qmodel.fc2.weight
fc3_weight = qmodel.fc3.weight

In [57]:
fc1_bias = qmodel.fc1.bias
fc2_bias = qmodel.fc2.bias
fc3_bias = qmodel.fc3.bias

In [58]:
fc1_weight.shape

torch.Size([32, 784])

In [59]:
fc2_weight.shape

torch.Size([64, 32])

In [60]:
fc3_weight.shape

torch.Size([10, 64])

In [61]:
act0.shape

torch.Size([16, 32])

In [62]:
act1.shape

torch.Size([16, 64])

In [63]:
act2.shape

torch.Size([16, 10])

In [64]:
act3.shape

torch.Size([16, 10])

### act0

In [65]:
X = a_input.view(16, 784)

In [66]:
X.shape # input값

torch.Size([16, 784])

In [67]:
layer_act0 = torch.matmul(X, fc1_weight.t()) + fc1_bias  #(16 x 784) x (784 X 32) = (16 x 32)

In [68]:
layer_act0.shape

torch.Size([16, 32])

In [69]:
layer_act0_ReLU1 = ReLU_2D(layer_act0)

In [70]:
fix_act0 = torch.zeros(16, 32)

total = 0
num = 0

for a in range(16):
    for b in range(32):
        fix_act0[a][b] = flp2fixTensor(layer_act0_ReLU1[a][b], args.full_bits, args.frac_bits) #ideal value
        if (torch.equal(fix_act0[a][b], flp2fixTensor(qmodel.relu1(act0[a][b]), args.full_bits, args.frac_bits))==False): #ideal value, calculated value
            num += 1
        total += 1
        
print(f"number of different value is : {num}/{total}")

number of different value is : 376/512


In [71]:
print(torch.__version__)

1.10.2+cu102


### act1

In [72]:
layer_act1 = torch.matmul(fix_act0, fc2_weight.t()) + fc2_bias  #(16 x 32) x (32 X 64) = (16 x 64)

In [73]:
layer_act1.shape

torch.Size([16, 64])

In [74]:
layer_act1_ReLU2 = ReLU_2D(layer_act1)

In [75]:
fix_act1 = torch.zeros(16, 64)

total = 0
num = 0

for a in range(16):
    for b in range(64):
        fix_act1[a][b] = flp2fixTensor(layer_act1_ReLU2[a][b], args.full_bits, args.frac_bits)
        if (torch.equal(fix_act1[a][b], flp2fixTensor(qmodel.relu2(act1[a][b]),args.full_bits, args.frac_bits)) == False):
            num += 1
        total += 1
        
print(f'number of different value is : {num}/{total}')

number of different value is : 906/1024


### act2

In [76]:
layer_act2 = torch.matmul(fix_act1, fc3_weight.t()) + fc3_bias #(16 x 64) x (64 X 10) = (16 x 10)

In [77]:
layer_act2.shape

torch.Size([16, 10])

In [78]:
layer_act2_ReLU3 = ReLU_2D(layer_act2)

In [79]:
fix_act2 = torch.zeros(16, 10)

total = 0
num = 0

for a in range(16):
    for b in range(10):
        fix_act2[a][b] = flp2fixTensor(layer_act2_ReLU3[a][b], args.full_bits, args.frac_bits)
        if(torch.equal(fix_act2[a][b], act3[a][b]) == False):
            num += 1
        total += 1
        
print(f'number of different value is : {num}/{total}')

number of different value is : 160/160


In [80]:
print(X)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


### Input value's SCC

In [81]:
def delta(a,b):
    Pand = Percent(ANDstr(a,b))
    Por = Percent(a[1:])*Percent(b[1:])
    return Pand - Por

In [82]:
def SCC(a,b):
    lfsr = LFSRlist()
    
    sA = SNG(a,lfsr)
    sB = SNG(b,lfsr)
    
    PerA = Percent(sA[1:])
    PerB = Percent(sB[1:])
    delt = delta(sA,sB)
    if delt > 0 :
        return delt / (min(PerA,PerB)-(PerA*PerB))
    elif delt < 0 :
        return delt / ((PerA*PerB)-max(PerA+PerB-1,0))
    else :
        return 0

In [83]:
def SCCp(a,b):
    lfsr = LFSRlist()
    
    sA = SNG(a,lfsr)
    sB = SNG_P(b,lfsr)
    
    PerA = Percent(sA[1:])
    PerB = Percent(sB[1:])
    delt = delta(sA,sB)
    if delt > 0 :
        return delt / (min(PerA,PerB)-(PerA*PerB))
    elif delt < 0 :
        return delt / ((PerA*PerB)-max(PerA+PerB-1,0))
    else :
        return 0

In [84]:
def SCCpTensor(A,B):
    SCClist = []
    for aTensor,bTensor in zip(A.view(-1),B.view(-1)):
        SCClist.append(SCCp(aTensor,bTensor))
    return torch.tensor(SCClist).view(A.size())

In [85]:
def SCCTensor(A,B):
    SCClist = []
    for aTensor,bTensor in zip(A.view(-1),B.view(-1)):
        SCClist.append(SCC(aTensor,bTensor))
    return torch.tensor(SCClist).view(A.size())