In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
# import torch.nn.utils.prune as prune
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch import Tensor
from torch.nn.parameter import Parameter

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import math
import warnings

In [41]:
BW=6
BX=6
VBLMAX = 0.8 
T0 = 100e-12
kn = 220e-6
Vt = 0.4
alpha = 1.8
CBL = 270e-15
VWL = 0.9
Icell = kn*np.power(VWL-Vt,alpha) # Ideal cell current of the discharge path
delta_VBL_LSB = T0*Icell/CBL #The voltage difference on VBL created by the LSB
kclip = VBLMAX/delta_VBL_LSB 
#kclip = 10000+VBLMAX/delta_VBL_LSB
sigma_Vt = 23.8e-3
sigma_D = alpha*sigma_Vt/(VWL-Vt)

def quantizeWeight(W,BW):
    W = torch.min(W,(1.0-(2**(-(BW-1.0))))*torch.ones_like(W))
    Wbs = []
    Wbi = torch.lt(W,torch.zeros_like(W)).float()
    Wbs.append(Wbi)
    W = (W + Wbi)
    for i in range(BW-1):
        Wbi = torch.ge(W,0.5*torch.ones_like(W)).float()
        Wbs.append(Wbi)
        W = 2.0*W - Wbi
    carry = torch.ge(W,0.5*torch.ones_like(W)).float()
    for i in range(BW):#-1):
        j = BW-1-i
        Wbs[j] = Wbs[j]+carry
        carry = torch.gt(Wbs[j],1.5*torch.ones_like(Wbs[j])).float()
        Wbs[j] = Wbs[j]*torch.ne(Wbs[j],2.0*torch.ones_like(Wbs[j]))
    return Wbs

def reconstructWeight(Wbs,BW):
    W = torch.zeros_like(Wbs[0])
    for j in range(BW):
        multiplier = (0.5)**j
        if (j == 0):
            multiplier = -1.0
        W += Wbs[j] * multiplier
    return W

class DIMAConv2d(nn.Conv2d):
    def __init__(
        self,
        sigma_D = 0,
        layer_index = 0,
        *kargs,
        **kwargs
    ):
#         self.groups = ((self.kernel_size[0]*self.kernel_size[1]*self.in_channels) // 256) + 1
        super(DIMAConv2d, self).__init__(*kargs,**kwargs)
#         self.groups = ((self.kernel_size[0]*self.kernel_size[1]*self.in_channels) // 256) + 1
#         self.output_h = floor( ((h_w[0] + (2 * pad) - ( dilation * (kernel_size[0] - 1) ) - 1 )/ stride) + 1)
#         self.output_w = floor( ((h_w[1] + (2 * pad) - ( dilation * (kernel_size[1] - 1) ) - 1 )/ stride) + 1)
        self.layer_index = layer_index
        self.noise = np.random.normal(0,sigma_D,(BW,self.weight.size()[0],self.weight.size()[1]
                                                 ,self.weight.size()[2],self.weight.size()[3]))
        
    def quantize_activations(self,input):
        if(self.layer_index != 0):
            input = torch.clamp(input,0,6) / 6
            input = 6 * torch.min(self.round_f(input*(2**BX))*(2**(-BX)) ,(1.0-(2**(-BX)))*torch.ones_like(input))
        else:
            input = torch.min(self.round_f(input*(2**BX))*(2**(-BX)) ,(1.0-(2**(-BX)))*torch.ones_like(input))
        return input
    
    def quantize_outputs(self,output):
        output = torch.clamp(output,-6,6)
        output = torch.min(self.round_f((output/6)*(2**(BW-1.0)))*(2.0**(1.0-BW)),(1.0-(2.0**(1.0-BW)))*torch.ones_like(output))
        output = output * 6
        return output 
               
    def round_f(self, x): #rounds a number to the nearest integer with STE for gradients
        x_r = torch.round(x)
        x_g = x
        return (x_r - x_g).detach() + x_g
    
    def quantize_weights(self):
        weight_q = quantizeWeight(self.weight.data,BW)
        for b in range(BW-1):
            weight_q[b+1] = weight_q[b+1]*(1+self.noise[b])
        weight = reconstructWeight(weight_q,BW)
        Wmax = kclip*np.power(2.0,-(BW-1))
        weight = torch.clamp(weight,-Wmax,Wmax)
        return (weight - self.weight).detach() + self.weight
        
    def _conv_forward(self, input, weight):
        if self.padding_mode != 'zeros':
            return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
                            weight, self.bias, self.stride,
                            _pair(0), self.dilation, self.groups)
        return F.conv2d(input, weight, self.bias, self.stride,
                        self.padding, self.dilation, self.groups)

    def forward(self, input: Tensor) -> Tensor:
        weight = self.quantize_weights()
        input = self.quantize_activations(input)
        output_h = int((((input.size()[2] + (2 * self.padding[0]) - 
                            ( self.dilation[0] * (self.kernel_size[0] - 1) ) - 1 )/ self.stride[0]) + 1) // 1)
        output_w = int((((input.size()[3] + (2 * self.padding[1]) - 
                            ( self.dilation[1] * (self.kernel_size[1] - 1) ) - 1 )/ self.stride[1]) + 1) // 1)
        
        if(self.kernel_size[0]*self.kernel_size[1]*self.in_channels > 256):
            weights = []
            inputs = []
            val = ((self.kernel_size[0]*self.kernel_size[1]*self.in_channels) // 256) + 1
            coeff = self.in_channels // val
            for i in range (val):
                if(i != val-1):
                    temp_weight = torch.zeros_like(weight)
                    temp_input = torch.zeros_like(input)
                    temp_weight[:,i*coeff:(i+1)*coeff,:,:] = weight[:,i*coeff:(i+1)*coeff,:,:]
                    weights.append(temp_weight)
                    temp_input[:,i*coeff:(i+1)*coeff,:,:] = input[:,i*coeff:(i+1)*coeff,:,:]
                    inputs.append(temp_input)
                else:
                    temp_weight = torch.zeros_like(weight)
                    temp_input = torch.zeros_like(input)
                    temp_weight[:,i*coeff:,:,:] = weight[:,i*coeff:,:,:]
                    weights.append(temp_weight)
                    temp_input[:,i*coeff:,:,:] = input[:,i*coeff:,:,:]
                    inputs.append(temp_input)
            output = torch.zeros((input.size()[0],self.out_channels,output_h,output_w))
            for i in range (val):
                out = self._conv_forward(inputs[i],weights[i])
                output += self.quantize_outputs(out)  
#                 output += out
            
#         self.groups = ((self.kernel_size[0]*self.kernel_size[1]*self.in_channels) // 256) + 1
#         print(self.groups)
#         if(self.groups != 1):
#             weight = weight.reshape(self.groups,self.out_channels//self.groups, self.in_channels, weight.size()[2], weight.size()[3])
#         output =  self._conv_forward(input, weight)
#         output = self.quantize_outputs(output)
#         print(output.size())
#         if(self.groups != 1):
#             output = torch.sum(output, 2)
        else:
            output = self._conv_forward(input, weight)
            output = self.quantize_outputs(output)
        return output

In [2]:
class LeNet5(nn.Module):

    def __init__(self, n_classes):
        super(LeNet5, self).__init__()
        
        self.feature_extractor = nn.Sequential(            
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1,bias=False),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1,bias=False),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1,bias=False),
            nn.Tanh()
        )

        self.classifier = nn.Sequential(
            nn.Linear(in_features=120, out_features=84,bias=False),
            nn.Tanh(),
            nn.Linear(in_features=84, out_features=n_classes,bias=False),
        )


    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.flatten(x, 1)
        logits = self.classifier(x)
        probs = F.softmax(logits, dim=1)
        return logits, probs
    
# torch.manual_seed(RANDOM_SEED)

# model = LeNet5(N_CLASSES).to(DEVICE)
# optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
# criterion = nn.CrossEntropyLoss()
    
# model, optimizer, _ = training_loop(model, criterion, optimizer, train_loader, valid_loader, N_EPOCHS, DEVICE)

def get_datasets(*args, **kwargs):
    transform = transforms.Compose(
        [
            transforms.Resize((32, 32)),
            transforms.ToTensor()
#             transforms.Normalize((0.1307,), (0.3081,))
        ]
    )

    trainset = torchvision.datasets.MNIST(train=True, transform=transform, *args, **kwargs)
    testset = torchvision.datasets.MNIST(train=False, transform=transform, *args, **kwargs)
    return trainset, testset

def get_dataloaders(trainset, testset, batch_size=100, num_worker=4):
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_worker)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_worker)

    return trainloader, testloader

LeNET = LeNet5(10)
criterion = nn.CrossEntropyLoss()
lr_ = 0.002
optimizer = torch.optim.SGD(LeNET.parameters(), lr=lr_)


trainset, testset = get_datasets(root='./data', download=True)
trainloader, testloader = get_dataloaders(trainset, testset, batch_size=100, num_worker=16)
loss_ = np.zeros(15)
gradient = np.zeros((4,15))

for epoch in range(20):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()
        outputs, _ = LeNET(inputs)

        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()

    
print('Finished Training')

PATH = './mnist_LeNET.pth'
torch.save(model.state_dict(), PATH)

Finished Training


NameError: name 'model' is not defined

In [4]:
LeNET.load_state_dict(torch.load(PATH), strict=False)

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs, _ = LeNET(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 98 %


In [44]:
class LeNet5_DIMA(nn.Module):

    def __init__(self, n_classes):
        super(LeNet5_DIMA, self).__init__()
        
        self.feature_extractor = nn.Sequential(            
            DIMAConv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1,bias=False, sigma_D = sigma_D, layer_index = 0),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            DIMAConv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1,bias=False,sigma_D = sigma_D, layer_index = 1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            DIMAConv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1,bias=False, sigma_D = sigma_D, layer_index = 2),
            nn.Tanh()
        )

        self.classifier = nn.Sequential(
            nn.Linear(in_features=120, out_features=84,bias=False),
            nn.Tanh(),
            nn.Linear(in_features=84, out_features=n_classes,bias=False),
        )


    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.flatten(x, 1)
        logits = self.classifier(x)
        probs = F.softmax(logits, dim=1)
        return logits, probs

LeNET_DIMA = LeNet5_DIMA(10)
LeNET_DIMA.load_state_dict(torch.load(PATH), strict=False)

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs, _ = LeNET_DIMA(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 56 %


In [47]:
lr_ = 0.0005
optimizer = torch.optim.SGD(LeNET_DIMA.parameters(), lr=lr_)

for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()
        outputs, _ = LeNET_DIMA(inputs)

        
        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    
print('Finished Training')

PATH_ = './mnist_LeNET_dima.pth'
torch.save(LeNET_DIMA.state_dict(), PATH_)

Finished Training


In [49]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs, _ = LeNET_DIMA(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
# time_.append(time.time() - start_time)
# acc.append(100 * correct / total)
#     print('Accuracy of the network on the 10000 test images: %d %%' % (
#         100 * correct / total))

#     print("--- %s seconds ---" % (time.time() - start_time))

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))
# print('Time:',np.average(np.array(time_)))

Accuracy of the network on the 10000 test images: 94 %
