In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchsummaryX import summary
import matplotlib.pyplot as plt
import time
import os

from UnarySim.sw.kernel.nn_utils import *

In [3]:
cwd = os.getcwd()
cwd = "D:/project/Anaconda3/Lib/site-packages/UnarySim/sw/app/mlp/"
print(cwd)

D:/project/Anaconda3/Lib/site-packages/UnarySim/sw/app/mlp/


In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [5]:
# MNIST data loader
transform=transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])

trainset = torchvision.datasets.MNIST(root=cwd+'/data/mnist', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4)

testset = torchvision.datasets.MNIST(root=cwd+'/data/mnist', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, num_workers=4)

In [6]:
layer_width = 512
total_epoch = 40
lr = 0.001
print(total_epoch)

40


In [7]:
class ScaleReLU1(nn.Hardtanh):
    """
    clip the input when it is larger than 1.
    """
    def __init__(self, inplace: bool = False):
        super(ScaleReLU1, self).__init__(0., 1., inplace)

    def extra_repr(self) -> str:
        inplace_str = 'inplace=True' if self.inplace else ''
        return inplace_str

In [8]:
class ScaleHardsigmoid(nn.Module):
    """
    valid input range is (-1, +1).
    """
    def __init__(self, scale=3):
        super(ScaleHardsigmoid, self).__init__()
        self.scale = scale

    def forward(self, x) -> str:
        return nn.Hardsigmoid()(x * self.scale)

In [9]:
class MLP3_hardsig(nn.Module):
    def __init__(self, width=512, p=0.5):
        super(MLP3_hardsig, self).__init__()
        self.fc1 = nn.Linear(32*32, width)
        self.fc2 = nn.Linear(width, width)
        self.fc3 = nn.Linear(width, 10)
        
        self.fc1_out = torch.zeros(1)
        self.do1 = nn.Dropout(p=p)
        self.relu1_out = torch.zeros(1)
        self.fc2_out = torch.zeros(1)
        self.do2 = nn.Dropout(p=p)
        self.relu2_out = torch.zeros(1)
        self.fc3_out = torch.zeros(1)

    def forward(self, x):
        x = x.view(-1, 32*32)
        self.fc1_out = self.fc1(x)
#         self.relu1_out = ScaleHardsigmoid()(self.do1(self.fc1_out))
        self.relu1_out = nn.Sigmoid()(self.do1(self.fc1_out))
#         self.relu1_out = nn.Hardtanh()(self.do1(self.fc1_out))
#         self.relu1_out = nn.Tanh()(self.do1(self.fc1_out))
#         self.relu1_out = F.relu6(self.do1(self.fc1_out))
#         self.relu1_out = ScaleReLU1()(self.do1(self.fc1_out))
#         self.relu1_out = F.relu(self.do1(self.fc1_out))
        self.fc2_out = self.fc2(self.relu1_out)
#         self.relu2_out = ScaleHardsigmoid()(self.do2(self.fc2_out))
        self.relu2_out = nn.Sigmoid()(self.do2(self.fc2_out))
#         self.relu2_out = nn.Hardtanh()(self.do2(self.fc2_out))
#         self.relu2_out = nn.Tanh()(self.do2(self.fc2_out))
#         self.relu2_out = F.relu6(self.do2(self.fc2_out))
#         self.relu2_out = ScaleReLU1()(self.do2(self.fc2_out))
#         self.relu2_out = F.relu(self.do2(self.fc2_out))
        self.fc3_out = self.fc3(self.relu2_out)
        return F.softmax(self.fc3_out, dim=1)

In [10]:
model = MLP3_hardsig(layer_width)
model.to(device)
summary(model, torch.zeros((1, 1, 32, 32)).to(device))

      Kernel Shape Output Shape    Params Mult-Adds
Layer                                              
0_fc1  [1024, 512]     [1, 512]    524.8k  524.288k
1_do1            -     [1, 512]         -         -
2_fc2   [512, 512]     [1, 512]  262.656k  262.144k
3_do2            -     [1, 512]         -         -
4_fc3    [512, 10]      [1, 10]     5.13k     5.12k
----------------------------------------------------
                        Totals
Total params          792.586k
Trainable params      792.586k
Non-trainable params       0.0
Mult-Adds             791.552k


Unnamed: 0_level_0,Kernel Shape,Output Shape,Params,Mult-Adds
Layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_fc1,"[1024, 512]","[1, 512]",524800.0,524288.0
1_do1,-,"[1, 512]",,
2_fc2,"[512, 512]","[1, 512]",262656.0,262144.0
3_do2,-,"[1, 512]",,
4_fc3,"[512, 10]","[1, 10]",5130.0,5120.0


In [11]:
bitwidth = 8
clipper = NN_SC_Weight_Clipper(bitwidth=bitwidth)

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [13]:
for epoch in range(total_epoch):  # loop over the dataset multiple times
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()
    
    model.eval()
#     model.apply(clipper)
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Train - Epoch %d, Loss: %f, Test Accuracy: %f %%' \
          % (epoch, loss.detach().cpu().item(), 100 * correct / total))

print('Finished Training')

tensor([[0.1021, 0.0882, 0.1281,  ..., 0.1267, 0.0745, 0.0849],
        [0.1023, 0.0850, 0.1332,  ..., 0.1257, 0.0736, 0.0853],
        [0.1007, 0.0831, 0.1374,  ..., 0.1280, 0.0677, 0.0901],
        ...,
        [0.1048, 0.0859, 0.1315,  ..., 0.1236, 0.0720, 0.0863],
        [0.1022, 0.0942, 0.1277,  ..., 0.1220, 0.0728, 0.0861],
        [0.1037, 0.0959, 0.1314,  ..., 0.1275, 0.0723, 0.0860]],
       device='cuda:0', grad_fn=<SoftmaxBackward>)
tensor([6, 6, 6, 5, 4, 0, 8, 9, 0, 6, 5, 2, 2, 2, 1, 0, 6, 6, 2, 2, 3, 6, 6, 3,
        4, 8, 1, 7, 2, 4, 5, 1, 1, 6, 4, 1, 6, 7, 6, 0, 9, 1, 0, 9, 4, 1, 3, 7,
        8, 6, 0, 6, 2, 8, 8, 7, 7, 7, 1, 5, 4, 3, 2, 8, 9, 7, 9, 4, 0, 2, 9, 9,
        4, 6, 0, 7, 9, 9, 9, 8, 0, 5, 6, 7, 8, 8, 4, 6, 8, 2, 6, 0, 7, 2, 2, 9,
        4, 5, 2, 9, 7, 8, 0, 0, 0, 2, 3, 2, 6, 8, 9, 0, 8, 2, 0, 5, 0, 4, 1, 5,
        2, 0, 1, 4, 6, 8, 8, 3], device='cuda:0')
tensor([[0.1405, 0.0569, 0.1867,  ..., 0.0815, 0.1020, 0.0904],
        [0.1424, 0.0577, 0.1934,  ...

KeyboardInterrupt: 

ScaleHardsigmoid: 95

sigmoid: 94

Hardtanh: 97

Tanh: 97

relu6: 97

ScaleReLU1: 97

relu: 97