In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchsummaryX import summary
import matplotlib.pyplot as plt
import time
import os

from UnarySim.sw.kernel.nn_utils import *

In [3]:
cwd = os.getcwd()
print(cwd)

D:\project\Anaconda3\Lib\site-packages\UnarySim\sw\test


In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [5]:
# MNIST data loader
transform=transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])

trainset = torchvision.datasets.MNIST(root=cwd+'/data/mnist', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4)

testset = torchvision.datasets.MNIST(root=cwd+'/data/mnist', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, num_workers=4)

In [6]:
# model = LeNet()
# model = LeNet_clamp()
# model = MLP3()
# model = MLP3_clamp()
model = MLP3_clamp_train()
model.to(device)
summary(model, torch.zeros((1, 1, 32, 32)).to(device))

      Kernel Shape Output Shape  Params  Mult-Adds
Layer                                             
0_fc1  [1024, 512]     [1, 512]  524800     524288
1_fc2   [512, 512]     [1, 512]  262656     262144
2_fc3    [512, 10]      [1, 10]    5130       5120
--------------------------------------------------
                      Totals
Total params          792586
Trainable params      792586
Non-trainable params       0
Mult-Adds             791552


Unnamed: 0_level_0,Kernel Shape,Output Shape,Params,Mult-Adds
Layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_fc1,"[1024, 512]","[1, 512]",524800,524288
1_fc2,"[512, 512]","[1, 512]",262656,262144
2_fc3,"[512, 10]","[1, 10]",5130,5120


In [7]:
bitwidth = 8
clipper = NN_SC_Weight_Clipper(bitwidth=bitwidth)

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00000001)

In [13]:
for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()
        
    model.apply(clipper)
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Train - Epoch %d, Loss: %f, Test Accuracy: %f %%' \
          % (epoch, loss.detach().cpu().item(), 100 * correct / total))

print('Finished Training')

Train - Epoch 0, Loss: 2.302593, Test Accuracy: 11.350000 %
Train - Epoch 1, Loss: 2.302552, Test Accuracy: 11.350000 %
Train - Epoch 2, Loss: 2.302569, Test Accuracy: 11.350000 %
Train - Epoch 3, Loss: 2.302586, Test Accuracy: 11.350000 %
Train - Epoch 4, Loss: 2.302588, Test Accuracy: 11.350000 %
Train - Epoch 5, Loss: 2.302573, Test Accuracy: 11.350000 %
Train - Epoch 6, Loss: 2.302582, Test Accuracy: 11.350000 %
Train - Epoch 7, Loss: 2.302578, Test Accuracy: 11.350000 %
Train - Epoch 8, Loss: 2.302577, Test Accuracy: 11.350000 %
Train - Epoch 9, Loss: 2.302576, Test Accuracy: 11.350000 %
Train - Epoch 10, Loss: 2.302583, Test Accuracy: 11.350000 %
Train - Epoch 11, Loss: 2.302598, Test Accuracy: 11.350000 %
Train - Epoch 12, Loss: 2.302569, Test Accuracy: 11.350000 %
Train - Epoch 13, Loss: 2.302585, Test Accuracy: 11.350000 %
Train - Epoch 14, Loss: 2.302580, Test Accuracy: 11.350000 %
Train - Epoch 15, Loss: 2.302591, Test Accuracy: 11.350000 %
Train - Epoch 16, Loss: 2.302575, 

In [10]:
model_path = cwd+"\saved_model_state_dict"+"_"+str(bitwidth)+"_no_clamp"
torch.save(model.state_dict(), model_path)

# test load from state_dict

In [11]:
model = MLP3()
model.load_state_dict(torch.load(model_path))
model.eval()
model.to(device)
model.apply(clipper)
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %f %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 96.870000 %


In [12]:
print(model.fc1.weight.max())
print(model.fc1.weight.min())

tensor(1., device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-1., device='cuda:0', grad_fn=<MinBackward1>)
