In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
# import torch.nn.utils.prune as prune
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch import Tensor
from torch.nn.parameter import Parameter

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.engine = nn.Sequential(
                nn.Linear(784,512,bias=False),
                nn.BatchNorm1d(512),
                nn.ReLU6(inplace=True),
                nn.Linear(512,256,bias=False),
                nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                nn.Linear(256, 256,bias=False),
                nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                nn.Linear(256, 10,bias=False)
        )    
    def forward(self, x):
        x = x.view(-1, 784)
        ret = self.engine(x)
        return ret
    

def get_datasets(*args, **kwargs):
    transform = transforms.Compose(
        [
            transforms.ToTensor()
#             transforms.Normalize((0.1307,), (0.3081,))
        ]
    )

    trainset = torchvision.datasets.MNIST(train=True, transform=transform, *args, **kwargs)
    testset = torchvision.datasets.MNIST(train=False, transform=transform, *args, **kwargs)
    return trainset, testset

def get_dataloaders(trainset, testset, batch_size=100, num_worker=4):
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_worker)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_worker)

    return trainloader, testloader

mlp = MLP()
criterion = nn.CrossEntropyLoss()
lr_ = 0.002
optimizer = optim.SGD(mlp.engine.parameters(), lr=lr_)


trainset, testset = get_datasets(root='./data', download=True)
trainloader, testloader = get_dataloaders(trainset, testset, batch_size=100, num_worker=16)
loss_ = np.zeros(15)
gradient = np.zeros((4,15))

for epoch in range(20):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()
        outputs = mlp(inputs)

        
        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    
print('Finished Training')

PATH = './mnist_mlp.pth'
torch.save(mlp.state_dict(), PATH)

Finished Training


In [3]:
mlp.load_state_dict(torch.load(PATH), strict=False)
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = mlp(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 97 %


In [None]:
BW=6
BX=6
VBLMAX = 0.8 
T0 = 100e-12
kn = 220e-6
Vt = 0.4
alpha = 1.8
CBL = 270e-15
VWL = 0.9
Icell = kn*np.power(VWL-Vt,alpha) # Ideal cell current of the discharge path
delta_VBL_LSB = T0*Icell/CBL #The voltage difference on VBL created by the LSB
kclip = VBLMAX/delta_VBL_LSB 
#kclip = 10000+VBLMAX/delta_VBL_LSB
sigma_Vt = 23.8e-3
sigma_D = alpha*sigma_Vt/(VWL-Vt)

class MLP_DIMA(nn.Module):
    def __init__(self):
        super(MLP_DIMA, self).__init__()
        self.engine = nn.Sequential(
                nn.Linear(784,512,bias=False),
                nn.BatchNorm1d(512),
                nn.ReLU6(inplace=True),
                DIMALinear(512,256,bias=False,var=sigma_D,layer_index = 2),
                nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                nn.Linear(256, 256,bias=False),
                nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                DIMALinear(256, 10,bias=False,var=sigma_D,layer_index = 3)
        )    
    def forward(self, x):
        x = x.view(-1, 784)
        ret = self.engine(x)
        return ret
    
# def get_datasets(*args, **kwargs):
#     transform = transforms.Compose(
#         [
#             transforms.ToTensor(),
# #             transforms.Normalize((0.1307,), (0.3081,))
#         ]
#     )

#     trainset = torchvision.datasets.MNIST(train=True, transform=transform, *args, **kwargs)
#     testset = torchvision.datasets.MNIST(train=False, transform=transform, *args, **kwargs)
#     return trainset, testset

# def get_dataloaders(trainset, testset, batch_size=100, num_worker=4):
#     trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_worker)
#     testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_worker)

#     return trainloader, testloader

def quantizeInput(X,BX):
    X = np.minimum(X,1.0-np.power(2.0,-BX))
    Xbs = []
    for i in range(BX):
        Xbi = np.greater_equal(X,0.5).astype(float)
        Xbs.append(Xbi)
        X = 2.0*X - Xbi
    carry = np.greater_equal(X,0.5).astype(float)
    for i in range(BX):
        j=BX-1-i
        Xbs[j] = Xbs[j]+carry
        carry = np.greater(Xbs[j],1.5).astype(float)
        Xbs[j] = Xbs[j]*np.not_equal(Xbs[j],2.0)
    return Xbs
def reconstructInput(Xbs,BX):
    X=np.zeros_like(Xbs[0])
    for l in range(BX):
        multiplier = np.power(0.5,l+1.0)
        X+=Xbs[l]*multiplier
        X.astype(float)
    return X

# def quantizeWeight(W,BW):
#     W = np.minimum(W,1.0-np.power(2.0,-(BW-1.0)))
#     Wbs = []
#     Wbi = np.less(W,0).astype(float)
#     Wbs.append(Wbi)
#     W = (W + Wbi)
#     for i in range(BW-1):
#         Wbi = np.greater_equal(W,0.5).astype(float)
#         Wbs.append(Wbi)
#         W = 2.0*W - Wbi
#     carry = np.greater_equal(W,0.5).astype(float)
#     for i in range(BW):#-1):
#         j=BW-1-i
#         Wbs[j] = Wbs[j]+carry
#         carry = np.greater(Wbs[j],1.5).astype(float)
#         Wbs[j] = Wbs[j]*np.not_equal(Wbs[j],2.0)
#     return Wbs
def quantizeWeight(W,BW):
    W = torch.min(W,(1.0-(2**(-(BW-1.0))))*torch.ones_like(W))
    Wbs = []
    Wbi = torch.lt(W,torch.zeros_like(W)).float()
    Wbs.append(Wbi)
    W = (W + Wbi)
    for i in range(BW-1):
        Wbi = torch.ge(W,0.5*torch.ones_like(W)).float()
        Wbs.append(Wbi)
        W = 2.0*W - Wbi
    carry = torch.ge(W,0.5*torch.ones_like(W)).float()
    for i in range(BW):#-1):
        j = BW-1-i
        Wbs[j] = Wbs[j]+carry
        carry = torch.gt(Wbs[j],1.5*torch.ones_like(Wbs[j])).float()
        Wbs[j] = Wbs[j]*torch.ne(Wbs[j],2.0*torch.ones_like(Wbs[j]))
    return Wbs

# def reconstructWeight(Wbs,BW):
#     W=np.zeros_like(Wbs[0])
#     for j in range(BW):
#         multiplier = np.power(0.5,j)
#         if (j==0):
#             multiplier=-1.0
#         W+=Wbs[j]*multiplier
#     return W
def reconstructWeight(Wbs,BW):
    W = torch.zeros_like(Wbs[0])
    for j in range(BW):
        multiplier = (0.5)**j
        if (j == 0):
            multiplier = -1.0
        W += Wbs[j] * multiplier
    return W

class DIMALinear(nn.Linear):
    __constants__ = ['in_features', 'out_features']
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True, var: sigma_D = 0, layer_index = 0) -> None:
        super(DIMALinear, self).__init__(in_features,out_features)
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
        self.noise = np.random.normal(0,sigma_D,(BW,self.weight.size()[0],self.weight.size()[1]))
        print(self.weight.size())
        self.layer_index = layer_index

    def quantize_activations(self,input):
        if(self.layer_index != 0):
            input = torch.clamp(input,0,6) / 6
            input = 6 * torch.min(torch.round(input*(2**BX))*(2**(-BX)) ,(1.0-(2**(-BX)))*torch.ones_like(input))
            
        else:
            input = torch.min(torch.round(input*(2**BX))*(2**(-BX)) ,(1.0-(2**(-BX)))*torch.ones_like(input))
        return input
    def quantize_outputs(self,output):
        output = torch.clamp(output,-6,6)
        output = torch.min(torch.round((output/6)*(2**(BW-1.0)))*(2.0**(1.0-BW)),(1.0-(2.0**(1.0-BW)))*torch.ones_like(output))
        output = output * 6
        return output 
               
    def quantize_weights(self):
#         weight = self.weight.data.numpy()
        weight_q = quantizeWeight(self.weight.data,BW)
        for b in range(BW-1):
            weight_q[b+1] = weight_q[b+1]*(1+self.noise[b])
        weight = reconstructWeight(weight_q,BW)
        Wmax = kclip*np.power(2.0,-(BW-1))
#         weight = np.clip(weight,-Wmax,Wmax)
        weight = torch.clamp(weight,-Wmax,Wmax)
        self.weight = nn.Parameter(weight)
        return weight 
#         self.weight = nn.Parameter(torch.from_numpy(weight))
#         self.weight = nn.Parameter(self.weight.float())
        
    def forward(self, input: Tensor) -> Tensor:
        weight = self.quantize_weights()
        input = self.quantize_activations(input)
        if(self.weight.size()[1] > 256):
            inputs = []
            weights = []
            val = (self.weight.size()[1] // 256) + 1
            for i in range (val):
                if(i != val-1):
                    inputs.append(input[:,(i*256):(255+(i*256))])
                    weights.append(self.weight[:,(i*256):(255+(i*256))])
                else:
                    m = nn.ZeroPad2d((0,self.weight.size()[1]-(256*i),0,0))
                    temp_w = m(self.weight[:,(i*256):])
                    temp_i = m(input[:,(i*256):])
                    inputs.append(temp_i)
                    weights.append(temp_w)
            output = torch.zeros(input.size()[0],self.weight.size()[0])
            for i in range (val):
                output += self.quantize_outputs(F.linear(inputs[i],weights[i]))
#         print(self.weight.size(),input.size())
#         print(torch.max(input))
        else:
            output = F.linear(input, self.weight)
            output = self.quantize_outputs(output)
        return output

In [4]:
BW=6
BX=6
VBLMAX = 0.8 
T0 = 100e-12
kn = 220e-6
Vt = 0.4
alpha = 1.8
CBL = 270e-15
VWL = 0.9
Icell = kn*np.power(VWL-Vt,alpha) # Ideal cell current of the discharge path
delta_VBL_LSB = T0*Icell/CBL #The voltage difference on VBL created by the LSB
kclip = VBLMAX/delta_VBL_LSB 
#kclip = 10000+VBLMAX/delta_VBL_LSB
sigma_Vt = 23.8e-3
sigma_D = alpha*sigma_Vt/(VWL-Vt)

def quantizeWeight(W,BW):
    W = torch.min(W,(1.0-(2**(-(BW-1.0))))*torch.ones_like(W))
    Wbs = []
    Wbi = torch.lt(W,torch.zeros_like(W)).float()
    Wbs.append(Wbi)
    W = (W + Wbi)
    for i in range(BW-1):
        Wbi = torch.ge(W,0.5*torch.ones_like(W)).float()
        Wbs.append(Wbi)
        W = 2.0*W - Wbi
    carry = torch.ge(W,0.5*torch.ones_like(W)).float()
    for i in range(BW):#-1):
        j = BW-1-i
        Wbs[j] = Wbs[j]+carry
        carry = torch.gt(Wbs[j],1.5*torch.ones_like(Wbs[j])).float()
        Wbs[j] = Wbs[j] * torch.ne(Wbs[j],(2*torch.ones_like(Wbs[j])))
    return Wbs

def reconstructWeight(Wbs,BW):
    W = torch.zeros_like(Wbs[0])
    for j in range(BW):
        multiplier = (0.5)**j
        if (j == 0):
            multiplier = -1.0
        W += Wbs[j] * multiplier
    return W

class DIMALinear(nn.Linear):
    __constants__ = ['in_features', 'out_features']
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True, var: sigma_D = 0, layer_index = 0) -> None:
        super(DIMALinear, self).__init__(in_features,out_features)
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
        self.noise = np.random.normal(0,sigma_D,(BW,self.weight.size()[0],self.weight.size()[1]))
        print(self.weight.size())
        self.layer_index = layer_index

    def quantize_activations(self,input):
        if(self.layer_index != 0):
            input = torch.clamp(input,0,6) / 6
            input = 6 * torch.min(self.round_f(input*(2**BX))*(2**(-BX)) ,(1.0-(2**(-BX)))*torch.ones_like(input))
        else:
            input = torch.min(self.round_f(input*(2**BX))*(2**(-BX)) ,(1.0-(2**(-BX)))*torch.ones_like(input))
        return input
    def quantize_outputs(self,output):
        output = torch.clamp(output,-6,6)
        output = torch.min(self.round_f((output/6)*(2**(BW-1.0)))*(2.0**(1.0-BW)),(1.0-(2.0**(1.0-BW)))*torch.ones_like(output))
        output = output * 6
        return output 
               
    def round_f(self, x): #rounds a number to the nearest integer with STE for gradients
        x_r = torch.round(x)
        x_g = x
        return (x_r - x_g).detach() + x_g
    
    def quantize_weights(self):
        weight_q = quantizeWeight(self.weight.data,BW)
        for b in range(BW-1):
            weight_q[b+1] = weight_q[b+1]*(1+self.noise[b])
        weight = reconstructWeight(weight_q,BW)
        Wmax = kclip*np.power(2.0,-(BW-1))
        weight = torch.clamp(weight,-Wmax,Wmax)
        return (weight - self.weight).detach() + self.weight
        
    def forward(self, input: Tensor) -> Tensor:
        weight = self.quantize_weights()
#         self.weight = nn.Parameter(weight)
        input = self.quantize_activations(input)
        if(self.weight.size()[1] > 256):
            inputs = []
            weights = []
            val = (self.weight.size()[1] // 256) + 1
            for i in range (val):
                if(i != val-1):
                    inputs.append(input[:,(i*256):(256+(i*256))])
                    weights.append(weight[:,(i*256):(256+(i*256))])
                else:
                    m = nn.ZeroPad2d((0,(256*(i+1))-self.weight.size()[1],0,0))
                    temp_w = m(weight[:,(i*256):])
                    temp_i = m(input[:,(i*256):])
                    inputs.append(temp_i)
                    weights.append(temp_w)
            output = torch.zeros(input.size()[0],self.weight.size()[0])
            for i in range (val):
                output += self.quantize_outputs(F.linear(inputs[i],weights[i]))
        else:
            output = F.linear(input, weight)
            output = self.quantize_outputs(output)
        return output
    
class MLP_DIMA(nn.Module):
    def __init__(self):
        super(MLP_DIMA, self).__init__()
        self.engine = nn.Sequential(
                DIMALinear(784,512,bias=False,var=sigma_D,layer_index=0),
                nn.BatchNorm1d(512),
                nn.ReLU6(inplace=True),
                DIMALinear(512,256,bias=False,var=sigma_D,layer_index=1),
                nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                DIMALinear(256, 256,bias=False,var=sigma_D,layer_index=2),
                nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                DIMALinear(256, 10,bias=False,var=sigma_D,layer_index = 3)
        )    
    def forward(self, x):
        x = x.view(-1, 784)
        ret = self.engine(x)
        return ret
    
# def get_datasets(*args, **kwargs):
#     transform = transforms.Compose(
#         [
#             transforms.ToTensor(),
# #             transforms.Normalize((0.1307,), (0.3081,))
#         ]
#     )

#     trainset = torchvision.datasets.MNIST(train=True, transform=transform, *args, **kwargs)
#     testset = torchvision.datasets.MNIST(train=False, transform=transform, *args, **kwargs)
#     return trainset, testset

# def get_dataloaders(trainset, testset, batch_size=100, num_worker=4):
#     trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_worker)
#     testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_worker)

#     return trainloader, testloader





In [5]:
import time
# start_time = time.time()
mlp_DIMA = MLP_DIMA()
mlp_DIMA.load_state_dict(torch.load(PATH), strict=False)
# BW=6
# BX=6
# VBLMAX = 0.8 
# T0 = 100e-12
# kn = 220e-6
# Vt = 0.4
# alpha = 1.8
# CBL = 270e-15
# VWL = 0.9
# Icell = kn*np.power(VWL-Vt,alpha) # Ideal cell current of the discharge path
# delta_VBL_LSB = T0*Icell/CBL #The voltage difference on VBL created by the LSB
# kclip = VBLMAX/delta_VBL_LSB 
# #kclip = 10000+VBLMAX/delta_VBL_LSB
# sigma_Vt = 23.8e-3
# sigma_D = alpha*sigma_Vt/(VWL-Vt)

time_ = []
acc = []
# for i in range (1):
start_time = time.time()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = mlp_DIMA(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
time_.append(time.time() - start_time)
acc.append(100 * correct / total)
#     print('Accuracy of the network on the 10000 test images: %d %%' % (
#         100 * correct / total))

#     print("--- %s seconds ---" % (time.time() - start_time))

print('ACC:',np.average(np.array(acc)))
print('Time:',np.average(np.array(time_)))

torch.Size([512, 784])
torch.Size([256, 512])
torch.Size([256, 256])
torch.Size([10, 256])
ACC: 51.38
Time: 29.699488878250122


In [6]:
lr_ = 0.0005
optimizer = optim.SGD(mlp_DIMA.engine.parameters(), lr=lr_)

for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()
        outputs = mlp_DIMA(inputs)

        
        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    
print('Finished Training')

PATH_ = './mnist_mlp_dima.pth'
torch.save(mlp_DIMA.state_dict(), PATH_)

Finished Training


In [7]:
# mlp_DIMA = MLP_DIMA()
# mlp_DIMA.load_state_dict(torch.load(PATH_), strict=False)

time_ = []
acc = []
# for i in range (1):
start_time = time.time()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = mlp_DIMA(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
time_.append(time.time() - start_time)
acc.append(100 * correct / total)
#     print('Accuracy of the network on the 10000 test images: %d %%' % (
#         100 * correct / total))

#     print("--- %s seconds ---" % (time.time() - start_time))

print('ACC:',np.average(np.array(acc)))
print('Time:',np.average(np.array(time_)))

ACC: 88.49
Time: 28.37289333343506
