In [67]:
# Load in relevant libraries, and alias where appropriate
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np

In [68]:
def zeropoint_quantize(X):
    print("shapeX=",X.shape)
    # Calculate value range (denominator)
    x_range = torch.max(X) - torch.min(X)
    x_range = 1 if x_range == 0 else x_range

    # Calculate scale
    scale = 255 / x_range


    # Scale and round the inputs
    X_quant = torch.clip((X * scale ).round(), 0, 255)
    print("shapeX_quant=",X_quant.shape)
    return X_quant.to(torch.int32),scale


In [69]:
def zeropoint_dequantize(X,scale):

    # Dequantize
    X_dequant = X / scale
    print("shapeX_dequant=",X_dequant.shape)
    return  X_dequant

In [70]:
XBit=8
QBit=8
QTZRange=2**QBit

def DetectTheMidValueOfRange(WeightBuffer):
    print("WeightBuffer=",WeightBuffer.shape)
    F = WeightBuffer.shape[0]
    C = WeightBuffer.shape[1]
    K = WeightBuffer.shape[2]

    SumArrayF = np.zeros(F*C*K*K)
    WeightBuffer=WeightBuffer.reshape(-1)
    #print("WeightBuffer=",WeightBuffer)
    SumArrayF=WeightBuffer


    #changing value of weightBuffer with the first value of each range
    for w in range(F*C*K*K):
      for x in range(QTZRange):
          Pfirst=((x/QTZRange)*(2**XBit))
          Plast=((x+1)/QTZRange)*(2**XBit)

          if (SumArrayF[w]>=Pfirst) &  (SumArrayF[w]<Plast):
              #the first value of each range
              SumArrayF[w]=(Pfirst+Plast)/2
              break
    WeightBuffer=SumArrayF
    WeightBuffer=WeightBuffer.reshape(F,C,K,K)
    #print("WeightBuffer=",WeightBuffer)

    return WeightBuffer

In [71]:
from sklearn.cluster import KMeans
QBit=2
QTZRange=2**QBit

def QuantizeWithKmeansEachKernel(WeightBuffer):
    F = WeightBuffer.shape[0]
    C = WeightBuffer.shape[1]
    K = WeightBuffer.shape[2]
    for i in range(F):
        for j in range(C):
            layer=WeightBuffer[i,j,:].reshape(-1)
            data =layer
            #print("layer=",layer)
            kmeans = KMeans(n_clusters=QTZRange)
            kmeans.fit(data.reshape(-1,1))
            label=kmeans.labels_
            #print("label=",label)
            layerk=torch.zeros(K*K)
            for k in range(K*K):
                layerk[k]=torch.tensor(kmeans.cluster_centers_[label[k]])

            layerk=layerk.reshape(K,K)
            WeightBuffer[i,j,:]=layerk
            #print("layerk=",layerk)
    return WeightBuffer

In [72]:
from sklearn.cluster import KMeans
QBit=2
QTZRange=2**QBit

def QuantizeWithKmeansWholeFilter(WeightBuffer):
    F = WeightBuffer.shape[0]
    C = WeightBuffer.shape[1]
    K = WeightBuffer.shape[2]
    SumArrayF = np.zeros(F*C*K*K)
    layer=WeightBuffer.reshape(-1)
    data =layer
    print("layer=",layer)
    kmeans = KMeans(n_clusters=QTZRange)
    kmeans.fit(data.reshape(-1,1))
    label=kmeans.labels_
    print("label=",label)
    layerk=torch.zeros(F*C*K*K)
    for k in range(F*C*K*K):
        layerk[k]=torch.tensor(kmeans.cluster_centers_[label[k]])

    layerk=layerk.reshape(F,C,K,K)
    WeightBuffer=layerk
    print("layerk=",layerk)
    return WeightBuffer

In [73]:

# Define relevant variables for the ML task
batch_size = 64
num_classes = 10
learning_rate = 0.001
num_epochs = 10

# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


#Loading the dataset and preprocessing
train_dataset = torchvision.datasets.MNIST(root = './data',
                                           train = True,
                                           transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download = True)


test_dataset = torchvision.datasets.MNIST(root = './data',
                                          train = False,
                                          transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1325,), std = (0.3105,))]),
                                          download=True)


train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)





#Defining the convolutional neural network
class LeNet5(nn.Module):
    def __init__(self, num_classes):
        super(LeNet5, self).__init__()

        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.relu1=nn.ReLU()
        self.maxpool1=nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.relu2=nn.ReLU()
        self.maxpool2=nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.conv3 = nn.Conv2d(16, 120, kernel_size=5)

        self.fc1 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc2 = nn.Linear(84, num_classes)

    def forward(self, x):

        out = self.conv1(x)
        out = self.relu1(out)
        out = self.maxpool1(out)
        out = self.conv2(out)
        out = self.relu2(out)
        out = self.maxpool2(out)
        out = self.conv3(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.relu4(out)
        out = self.fc2(out)

        return out


model = LeNet5(num_classes).to(device)

#Setting the loss function
cost = nn.CrossEntropyLoss()

#Setting the optimizer with the model parameters and learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#this is defined to print how many steps are remaining when training
total_step = len(train_loader)

total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        #Forward pass
        outputs = model(images)
        loss = cost(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()



        if (i+1) % 400 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
        		           .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
    #torch.save(model.state_dict(),'/content/drive/MyDrive/lenet-weight/weightsLenetPytorch3Conv-2.pt')




    #print("model.state_dict()['conv1.weight']=",model.state_dict()['conv1.weight'])
    layer_weights1 = model.state_dict()['conv1.weight']
    Conv1=DetectTheMidValueOfRange(layer_weights1)
    model.state_dict()['conv1.weight'].data.copy_(Conv1)
    #print("model.state_dict()['conv1.weight']=",model.state_dict()['conv1.weight'])


    layer_weights2 = model.state_dict()['conv2.weight']
    Conv2=DetectTheMidValueOfRange(layer_weights2)
    model.state_dict()['conv2.weight'].data.copy_(Conv2)


    layer_weights3 = model.state_dict()['conv3.weight']
    Conv3=DetectTheMidValueOfRange(layer_weights3)
    model.state_dict()['conv3.weight'].data.copy_(Conv3)
    #input()

    #model.load_state_dict(torch.load('/content/drive/MyDrive/lenet-weight/weightsLenetPytorch3Conv-2.pt'))



Epoch [1/10], Step [400/938], Loss: 0.0389
Epoch [1/10], Step [800/938], Loss: 0.0473
WeightBuffer= torch.Size([6, 1, 5, 5])
WeightBuffer= torch.Size([16, 6, 5, 5])
WeightBuffer= torch.Size([120, 16, 5, 5])
Epoch [2/10], Step [400/938], Loss: 2.3046
Epoch [2/10], Step [800/938], Loss: 2.2988
WeightBuffer= torch.Size([6, 1, 5, 5])
WeightBuffer= torch.Size([16, 6, 5, 5])
WeightBuffer= torch.Size([120, 16, 5, 5])
Epoch [3/10], Step [400/938], Loss: 2.3169
Epoch [3/10], Step [800/938], Loss: 2.2987
WeightBuffer= torch.Size([6, 1, 5, 5])
WeightBuffer= torch.Size([16, 6, 5, 5])
WeightBuffer= torch.Size([120, 16, 5, 5])
Epoch [4/10], Step [400/938], Loss: 2.2973
Epoch [4/10], Step [800/938], Loss: 2.3024
WeightBuffer= torch.Size([6, 1, 5, 5])
WeightBuffer= torch.Size([16, 6, 5, 5])
WeightBuffer= torch.Size([120, 16, 5, 5])
Epoch [5/10], Step [400/938], Loss: 2.3038
Epoch [5/10], Step [800/938], Loss: 2.3049
WeightBuffer= torch.Size([6, 1, 5, 5])
WeightBuffer= torch.Size([16, 6, 5, 5])
WeightB

In [74]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 11.35 %


In [75]:
torch.save(model.state_dict(),'/content/drive/MyDrive/lenet-weight/weightsLenetPytorch3Conv-retrain-Kmeans8Q.pt')

RuntimeError: ignored