In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
# import torch.nn.utils.prune as prune
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch import Tensor
from torch.nn.parameter import Parameter

In [18]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.engine = nn.Sequential(
                nn.Linear(784,512,bias=True),
#                 nn.BatchNorm1d(512),
                nn.ReLU6(inplace=True),
                nn.Linear(512,256,bias=True),
#                 nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                nn.Linear(256, 256,bias=True),
#                 nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                nn.Linear(256, 10, bias=True)
        )    
    def forward(self, x):
        x = x.view(-1, 784)
        ret = self.engine(x)
        return ret
    

def get_datasets(*args, **kwargs):
    transform = transforms.Compose(
        [
            transforms.ToTensor()
#             transforms.Normalize((0.1307,), (0.3081,))
        ]
    )

    trainset = torchvision.datasets.MNIST(train=True, transform=transform, *args, **kwargs)
    testset = torchvision.datasets.MNIST(train=False, transform=transform, *args, **kwargs)
    return trainset, testset

def get_dataloaders(trainset, testset, batch_size=100, num_worker=4):
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_worker)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_worker)

    return trainloader, testloader

mlp = MLP()
criterion = nn.CrossEntropyLoss()
lr_ = 0.002
optimizer = optim.SGD(mlp.engine.parameters(), lr=lr_)


trainset, testset = get_datasets(root='./data', download=True)
trainloader, testloader = get_dataloaders(trainset, testset, batch_size=100, num_worker=16)
loss_ = np.zeros(15)
gradient = np.zeros((4,15))

for epoch in range(20):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()
        outputs = mlp(inputs)

        
        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    
print('Finished Training')

PATH = './mnist_mlp.pth'
torch.save(mlp.state_dict(), PATH)

Finished Training


In [19]:
mlp.load_state_dict(torch.load(PATH), strict=False)
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = mlp(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 87 %


In [6]:
BW=6
BX=6
VBLMAX = 0.8 
T0 = 100e-12
kn = 220e-6
Vt = 0.4
alpha = 1.8
CBL = 270e-15
VWL = 0.9
Icell = kn*np.power(VWL-Vt,alpha) # Ideal cell current of the discharge path
delta_VBL_LSB = T0*Icell/CBL #The voltage difference on VBL created by the LSB
kclip = VBLMAX/delta_VBL_LSB 
#kclip = 10000+VBLMAX/delta_VBL_LSB
sigma_Vt = 23.8e-3
sigma_D = alpha*sigma_Vt/(VWL-Vt)

class MLP_DIMA(nn.Module):
    def __init__(self):
        super(MLP_DIMA, self).__init__()
        self.engine = nn.Sequential(
                nn.Linear(784,512,bias=False),
                nn.BatchNorm1d(512),
                nn.ReLU6(inplace=True),
                DIMALinear(512,256,bias=False,var=sigma_D,layer_index = 2),
                nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                nn.Linear(256, 256,bias=True),
#                 nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                DIMALinear(256, 10,bias=False,var=sigma_D,layer_index = 3)
        )    
    def forward(self, x):
        x = x.view(-1, 784)
        ret = self.engine(x)
        return ret
    
# def get_datasets(*args, **kwargs):
#     transform = transforms.Compose(
#         [
#             transforms.ToTensor(),
# #             transforms.Normalize((0.1307,), (0.3081,))
#         ]
#     )

#     trainset = torchvision.datasets.MNIST(train=True, transform=transform, *args, **kwargs)
#     testset = torchvision.datasets.MNIST(train=False, transform=transform, *args, **kwargs)
#     return trainset, testset

# def get_dataloaders(trainset, testset, batch_size=100, num_worker=4):
#     trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_worker)
#     testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_worker)

#     return trainloader, testloader

def quantizeInput(X,BX):
    X = np.minimum(X,1.0-np.power(2.0,-BX))
    Xbs = []
    for i in range(BX):
        Xbi = np.greater_equal(X,0.5).astype(float)
        Xbs.append(Xbi)
        X = 2.0*X - Xbi
    carry = np.greater_equal(X,0.5).astype(float)
    for i in range(BX):
        j=BX-1-i
        Xbs[j] = Xbs[j]+carry
        carry = np.greater(Xbs[j],1.5).astype(float)
        Xbs[j] = Xbs[j]*np.not_equal(Xbs[j],2.0)
    return Xbs
def reconstructInput(Xbs,BX):
    X=np.zeros_like(Xbs[0])
    for l in range(BX):
        multiplier = np.power(0.5,l+1.0)
        X+=Xbs[l]*multiplier
        X.astype(float)
    return X

# def quantizeWeight(W,BW):
#     W = np.minimum(W,1.0-np.power(2.0,-(BW-1.0)))
#     Wbs = []
#     Wbi = np.less(W,0).astype(float)
#     Wbs.append(Wbi)
#     W = (W + Wbi)
#     for i in range(BW-1):
#         Wbi = np.greater_equal(W,0.5).astype(float)
#         Wbs.append(Wbi)
#         W = 2.0*W - Wbi
#     carry = np.greater_equal(W,0.5).astype(float)
#     for i in range(BW):#-1):
#         j=BW-1-i
#         Wbs[j] = Wbs[j]+carry
#         carry = np.greater(Wbs[j],1.5).astype(float)
#         Wbs[j] = Wbs[j]*np.not_equal(Wbs[j],2.0)
#     return Wbs
def quantizeWeight(W,BW):
    W = torch.min(W,(1.0-(2**(-(BW-1.0))))*torch.ones_like(W))
    Wbs = []
    Wbi = torch.lt(W,torch.zeros_like(W)).float()
    Wbs.append(Wbi)
    W = (W + Wbi)
    for i in range(BW-1):
        Wbi = torch.ge(W,0.5*torch.ones_like(W)).float()
        Wbs.append(Wbi)
        W = 2.0*W - Wbi
    carry = torch.ge(W,0.5*torch.ones_like(W)).float()
    for i in range(BW):#-1):
        j = BW-1-i
        Wbs[j] = Wbs[j]+carry
        carry = torch.gt(Wbs[j],1.5*torch.ones_like(Wbs[j])).float()
        Wbs[j] = Wbs[j]*torch.ne(Wbs[j],2.0*torch.ones_like(Wbs[j]))
    return Wbs

# def reconstructWeight(Wbs,BW):
#     W=np.zeros_like(Wbs[0])
#     for j in range(BW):
#         multiplier = np.power(0.5,j)
#         if (j==0):
#             multiplier=-1.0
#         W+=Wbs[j]*multiplier
#     return W
def reconstructWeight(Wbs,BW):
    W = torch.zeros_like(Wbs[0])
    for j in range(BW):
        multiplier = (0.5)**j
        if (j == 0):
            multiplier = -1.0
        W += Wbs[j] * multiplier
    return W

class DIMALinear(nn.Linear):
    __constants__ = ['in_features', 'out_features']
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True, var: sigma_D = 0, layer_index = 0) -> None:
        super(DIMALinear, self).__init__(in_features,out_features)
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
        self.noise = np.random.normal(0,sigma_D,(BW,self.weight.size()[0],self.weight.size()[1]))
        print(self.weight.size())
        self.layer_index = layer_index

    def quantize_activations(self,input):
        if(self.layer_index != 0):
            input = torch.clamp(input,0,6) / 6
            input = 6 * torch.min(torch.round(input*(2**BX))*(2**(-BX)) ,(1.0-(2**(-BX)))*torch.ones_like(input))
            
        else:
            input = torch.min(torch.round(input*(2**BX))*(2**(-BX)) ,(1.0-(2**(-BX)))*torch.ones_like(input))
        return input
    def quantize_outputs(self,output):
        output = torch.clamp(output,-6,6)
        output = torch.min(torch.round((output/6)*(2**(BW-1.0)))*(2.0**(1.0-BW)),(1.0-(2.0**(1.0-BW)))*torch.ones_like(output))
        output = output * 6
        return output 
               
    def quantize_weights(self):
#         weight = self.weight.data.numpy()
        weight_q = quantizeWeight(self.weight.data,BW)
        for b in range(BW-1):
            weight_q[b+1] = weight_q[b+1]*(1+self.noise[b])
        weight = reconstructWeight(weight_q,BW)
        Wmax = kclip*np.power(2.0,-(BW-1))
#         weight = np.clip(weight,-Wmax,Wmax)
        weight = torch.clamp(weight,-Wmax,Wmax)
        self.weight = nn.Parameter(weight)
        return weight 
#         self.weight = nn.Parameter(torch.from_numpy(weight))
#         self.weight = nn.Parameter(self.weight.float())
        
    def forward(self, input: Tensor) -> Tensor:
        weight = self.quantize_weights()
        input = self.quantize_activations(input)
        if(self.weight.size()[1] > 256):
            inputs = []
            weights = []
            val = (self.weight.size()[1] // 256) + 1
            for i in range (val):
                if(i != val-1):
                    inputs.append(input[:,(i*256):(255+(i*256))])
                    weights.append(self.weight[:,(i*256):(255+(i*256))])
                else:
                    m = nn.ZeroPad2d((0,self.weight.size()[1]-(256*i),0,0))
                    temp_w = m(self.weight[:,(i*256):])
                    temp_i = m(input[:,(i*256):])
                    inputs.append(temp_i)
                    weights.append(temp_w)
            output = torch.zeros(input.size()[0],self.weight.size()[0])
            for i in range (val):
                output += self.quantize_outputs(F.linear(inputs[i],weights[i]))
#         print(self.weight.size(),input.size())
#         print(torch.max(input))
        else:
            output = F.linear(input, self.weight)
            output = self.quantize_outputs(output)
        return output

In [32]:
BW=6
BX=6
VBLMAX = 0.8 
T0 = 100e-12
kn = 220e-6
Vt = 0.4
alpha = 1.8
CBL = 270e-15
VWL = 0.9
Icell = kn*np.power(VWL-Vt,alpha) # Ideal cell current of the discharge path
delta_VBL_LSB = T0*Icell/CBL #The voltage difference on VBL created by the LSB
kclip = VBLMAX/delta_VBL_LSB 
#kclip = 10000+VBLMAX/delta_VBL_LSB
sigma_Vt = 23.8e-3
sigma_D = alpha*sigma_Vt/(VWL-Vt)

def quantizeWeight(W,BW):
    W = torch.min(W,(1.0-(2**(-(BW-1.0))))*torch.ones_like(W))
    Wbs = []
    Wbi = torch.lt(W,torch.zeros_like(W)).float()
    Wbs.append(Wbi)
    W = (W + Wbi)
    for i in range(BW-1):
        Wbi = torch.ge(W,0.5*torch.ones_like(W)).float()
        Wbs.append(Wbi)
        W = 2.0*W - Wbi
    carry = torch.ge(W,0.5*torch.ones_like(W)).float()
    for i in range(BW):#-1):
        j = BW-1-i
        Wbs[j] = Wbs[j]+carry
        carry = torch.gt(Wbs[j],1.5*torch.ones_like(Wbs[j])).float()
        Wbs[j] = Wbs[j] * torch.ne(Wbs[j],(2*torch.ones_like(Wbs[j])))
    return Wbs

def reconstructWeight(Wbs,BW):
    W = torch.zeros_like(Wbs[0])
    for j in range(BW):
        multiplier = (0.5)**j
        if (j == 0):
            multiplier = -1.0
        W += Wbs[j] * multiplier
    return W

class DIMALinear(nn.Linear):
    __constants__ = ['in_features', 'out_features']
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True, var: sigma_D = 0, layer_index = 0) -> None:
        super(DIMALinear, self).__init__(in_features,out_features)
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
        self.noise = np.random.normal(0,sigma_D,(BW,self.weight.size()[0],self.weight.size()[1]))
        self.bias_noise = np.random.normal(0,sigma_D,(BW, self.bias.size()[0]))
        print(self.bias.size())
        print(self.weight.size())
        self.layer_index = layer_index

    def quantize_activations(self,input):
        if(self.layer_index != 0):
            input = torch.clamp(input,0,6) / 6
            input = 6 * torch.min(self.round_f(input*(2**BX))*(2**(-BX)) ,(1.0-(2**(-BX)))*torch.ones_like(input))
        else:
            input = torch.min(self.round_f(input*(2**BX))*(2**(-BX)) ,(1.0-(2**(-BX)))*torch.ones_like(input))
        return input
    def quantize_outputs(self,output):
        output = torch.clamp(output,-6,6)
        output = torch.min(self.round_f((output/6)*(2**(BW-1.0)))*(2.0**(1.0-BW)),(1.0-(2.0**(1.0-BW)))*torch.ones_like(output))
        output = output * 6
        return output 
               
    def round_f(self, x): #rounds a number to the nearest integer with STE for gradients
        x_r = torch.round(x)
        x_g = x
        return (x_r - x_g).detach() + x_g
    
    def quantize_weights(self):
        weight_q = quantizeWeight(self.weight.data,BW)
        for b in range(BW-1):
            weight_q[b+1] = weight_q[b+1]*(1+self.noise[b])
        weight = reconstructWeight(weight_q,BW)
        Wmax = kclip*np.power(2.0,-(BW-1))
        weight = torch.clamp(weight,-Wmax,Wmax)
        return (weight - self.weight).detach() + self.weight
    
    def quantize_bias(self):
        bias_q = quantizeWeight(self.bias.data,BW)
        for b in range(BW-1):
            bias_q[b+1] = bias_q[b+1]*(1+self.bias_noise[b])
        bias = reconstructWeight(bias_q,BW)
        Bmax = kclip*np.power(2.0,-(BW-1))
        bias = torch.clamp(bias,-Bmax,Bmax)
        return (bias - self.bias).detach() + self.bias
        
    def forward(self, input: Tensor) -> Tensor:
        weight = self.quantize_weights()
        if(self.bias != None):
            bias = self.quantize_bias()
        input = self.quantize_activations(input)
        if(self.weight.size()[1] > 256):
            inputs = []
            weights = []
            val = (self.weight.size()[1] // 256) + 1
            for i in range (val):
                if(i != val-1):
                    inputs.append(input[:,(i*256):(256+(i*256))])
                    weights.append(weight[:,(i*256):(256+(i*256))])
                else:
                    m = nn.ZeroPad2d((0,(256*(i+1))-self.weight.size()[1],0,0))
                    temp_w = m(weight[:,(i*256):])
                    temp_i = m(input[:,(i*256):])
                    inputs.append(temp_i)
                    weights.append(temp_w)
            output = torch.zeros(input.size()[0],self.weight.size()[0])
            for i in range (val):
                if(i == val - 1 and self.bias != None):
                    output += self.quantize_outputs(F.linear(inputs[i],weights[i], bias))
                else:
                    output += self.quantize_outputs(F.linear(inputs[i],weights[i]))
#             output_ = F.linear(input,weight, bias)
#             print((output_ - output)/output_)
                    
    
        else:
            output = F.linear(input, weight, bias)
            output = self.quantize_outputs(output)
        return output
    
class MLP_DIMA(nn.Module):
    def __init__(self):
        super(MLP_DIMA, self).__init__()
        self.engine = nn.Sequential(
                nn.Linear(784,512,bias=True),
#                 nn.BatchNorm1d(512),
                nn.ReLU6(inplace=True),
                DIMALinear(512,256,bias=True,var=sigma_D,layer_index = 1),
#                 nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                DIMALinear(256, 256,bias=True,var=sigma_D,layer_index = 2),
#                 nn.BatchNorm1d(256),
                nn.ReLU6(inplace=True),
                DIMALinear(256, 10,bias=True,var=sigma_D,layer_index = 3)
        )    
    def forward(self, x):
        x = x.view(-1, 784)
        ret = self.engine(x)
        return ret
    
# def get_datasets(*args, **kwargs):
#     transform = transforms.Compose(
#         [
#             transforms.ToTensor(),
# #             transforms.Normalize((0.1307,), (0.3081,))
#         ]
#     )

#     trainset = torchvision.datasets.MNIST(train=True, transform=transform, *args, **kwargs)
#     testset = torchvision.datasets.MNIST(train=False, transform=transform, *args, **kwargs)
#     return trainset, testset

# def get_dataloaders(trainset, testset, batch_size=100, num_worker=4):
#     trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_worker)
#     testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_worker)

#     return trainloader, testloader





In [33]:
import time
# start_time = time.time()
mlp_DIMA = MLP_DIMA()
mlp_DIMA.load_state_dict(torch.load(PATH), strict=False)
# BW=6
# BX=6
# VBLMAX = 0.8 
# T0 = 100e-12
# kn = 220e-6
# Vt = 0.4
# alpha = 1.8
# CBL = 270e-15
# VWL = 0.9
# Icell = kn*np.power(VWL-Vt,alpha) # Ideal cell current of the discharge path
# delta_VBL_LSB = T0*Icell/CBL #The voltage difference on VBL created by the LSB
# kclip = VBLMAX/delta_VBL_LSB 
# #kclip = 10000+VBLMAX/delta_VBL_LSB
# sigma_Vt = 23.8e-3
# sigma_D = alpha*sigma_Vt/(VWL-Vt)

time_ = []
acc = []
# for i in range (1):
start_time = time.time()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = mlp_DIMA(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
time_.append(time.time() - start_time)
acc.append(100 * correct / total)
#     print('Accuracy of the network on the 10000 test images: %d %%' % (
#         100 * correct / total))

#     print("--- %s seconds ---" % (time.time() - start_time))

print('ACC:',np.average(np.array(acc)))
print('Time:',np.average(np.array(time_)))

torch.Size([256])
torch.Size([256, 512])
torch.Size([256])
torch.Size([256, 256])
torch.Size([10])
torch.Size([10, 256])
tensor([[-0.1709,  0.1564,  0.0200,  ..., -0.0026,  0.0697,  0.0341],
        [ 1.0000, -0.1640,  0.0830,  ..., -0.2818, -0.1110, -0.0348],
        [ 1.0000,  0.2533,  0.1853,  ...,  0.4531,  1.0000,  0.3212],
        ...,
        [ 0.0195,  0.0819,  0.0316,  ...,  0.1696, -0.1886, -0.0337],
        [-0.5132,  0.2371,  0.0574,  ..., -0.4033, -0.0387, -0.0234],
        [ 0.0679, -0.1255, -0.0501,  ...,  0.0870,  1.0000, -0.0508]])
tensor([[ 1.0000, -0.2181,  0.0603,  ..., -1.3076, -0.9441,  0.0222],
        [-0.3641, -0.0628,  0.0267,  ...,  0.1727, -0.0992,  0.1877],
        [-0.2592, -0.0161,  0.0063,  ..., -0.0282, -0.3386,  0.1106],
        ...,
        [-0.5262,  0.2460,  0.0247,  ..., -0.4320, -0.2543,  0.0204],
        [-0.0354, -0.0178,  0.1222,  ...,  0.4646, -0.1285,  0.0829],
        [-0.3113, -0.6743,  0.0761,  ..., -0.0272,  0.2829,  0.0208]])
tensor([[-0

          1.0000e+00, -6.1192e-02]])
tensor([[-0.0434, -0.0272,  0.0198,  ..., -0.0310,  0.1088,  1.0000],
        [ 1.0000, -0.0496,  0.2846,  ...,  0.4494, -0.6017,  0.0828],
        [-0.5622, -0.0159,  0.1384,  ...,  0.1487, -0.7035,  0.0551],
        ...,
        [-0.2128, -0.0794, -0.0383,  ..., -0.0316,  0.1335,  0.3226],
        [-0.0560, -0.0258, -0.1699,  ..., -0.0585, -0.3183,  0.2109],
        [-0.1742,  0.0481,  0.0412,  ...,  0.0564, -0.7824, -0.1890]])
tensor([[ 0.0123,  0.0778,  0.0119,  ...,  1.0000,  1.0000,  0.0054],
        [-0.1130,  0.1034,  0.0728,  ..., -0.0649,  0.0540,  0.0114],
        [-0.2284,  0.3959,  0.1343,  ..., -1.7979,  0.3744,  0.0361],
        ...,
        [-0.1963, -0.6156, -0.0360,  ...,  0.1188,  1.0000, -0.1228],
        [ 0.1355,  0.0170,  0.0721,  ...,  1.0000, -0.1582,  0.2597],
        [ 1.0000, -0.0152,  0.1232,  ...,  0.1806, -0.6020,  0.0932]])
tensor([[ 5.8245e-02, -3.8487e-01,  1.4759e-02,  ...,  1.0000e+00,
          1.0000e+00,  1.122

tensor([[ 1.0000,  0.0439,  0.2042,  ...,  0.4741, -0.2435,  0.1637],
        [-1.5058, -0.3217, -0.0947,  ...,  1.0000,  0.0913, -0.0105],
        [-0.0512,  0.0046,  0.1109,  ...,  1.0000, -0.5283,  0.0853],
        ...,
        [-0.1142, -0.0850, -0.0451,  ...,  1.0000, -0.5114,  0.0195],
        [ 1.0000,  0.3967,  0.0020,  ..., -0.5952, -0.2627,  0.0063],
        [ 0.0803,  0.1012,  0.0062,  ..., -0.0045,  0.0089, -0.2810]])
tensor([[-0.1702,  0.0606,  0.0106,  ...,  0.0641, -1.1497, -0.3559],
        [ 1.0000,  0.0678,  0.0515,  ..., -0.4458,  0.1736,  0.0748],
        [-1.1666, -0.0560,  0.1397,  ..., -0.0309, -0.2281, -0.0114],
        ...,
        [ 0.2903, -0.1506,  0.0609,  ...,  1.0000, -2.3699,  0.1494],
        [ 0.0921,  0.0857, -0.0204,  ..., -0.0284,  1.0000, -0.0198],
        [ 0.3017,  0.1177, -0.0132,  ...,  0.1190, -0.4914,  0.0428]])
tensor([[ 0.0790,  0.0938, -0.0014,  ..., -0.5422,  1.0000,  0.0148],
        [ 0.3095, -0.0053, -0.0729,  ...,  0.0432, -0.3123,  0

tensor([[ 1.0000e+00,  2.2547e-01, -9.0668e-02,  ..., -6.6274e-02,
         -2.8623e-01,  6.1982e-02],
        [-2.3985e-01,  7.2366e-02,  6.1700e-02,  ...,  1.9106e-01,
         -2.5487e-01,  4.3271e-02],
        [-1.6122e-01,  1.0037e-01,  1.0816e-01,  ...,  3.4168e-02,
          1.7148e-01, -4.0781e-04],
        ...,
        [-3.2896e-01,  5.9241e-02,  7.8663e-03,  ...,  3.4501e-01,
         -1.9902e-02,  2.9492e-02],
        [-2.7570e-01,  2.9761e-02,  9.2756e-02,  ...,  1.0784e-01,
         -1.8893e-01, -4.3455e-02],
        [ 1.0000e+00, -2.0398e-01,  7.1617e-02,  ...,  2.5457e-01,
         -7.2826e-01,  1.4397e-02]])
tensor([[-3.6210e-01,  4.5748e-03,  7.5893e-03,  ...,  7.6717e-02,
         -5.4019e-01,  2.1015e-01],
        [ 1.0000e+00,  6.1166e-02, -1.3510e-01,  ...,  2.7085e-01,
          2.5419e-02,  4.5232e-02],
        [ 6.2161e-02,  1.7137e-01,  4.8831e-03,  ...,  1.3799e-01,
          2.2357e-02, -3.2877e-01],
        ...,
        [ 8.8598e-02, -7.1930e-03,  2.5033e-02

tensor([[ 1.0000,  0.3362,  0.0278,  ..., -0.0941, -1.5049,  0.3131],
        [ 1.0000,  1.0000, -0.0030,  ..., -0.1081, -0.0781,  0.0904],
        [-0.2428,  0.1055, -0.0246,  ...,  1.0000, -0.0899, -0.0281],
        ...,
        [-0.0122,  0.1183,  0.0212,  ..., -0.1401,  0.2569,  0.0421],
        [-0.2273,  0.0632,  0.1038,  ..., -0.0133, -0.0167, -0.0947],
        [-0.0601,  0.0872, -0.1196,  ...,  0.1130, -0.4862,  0.0427]])
tensor([[-0.4051,  1.0000, -0.0538,  ...,  1.0000, -0.0068,  0.1980],
        [-0.0480,  0.0940,  0.0684,  ...,  0.1278,  1.0000,  0.0171],
        [-0.4316,  0.2016,  0.0689,  ...,  0.1772,  1.0000,  0.1282],
        ...,
        [-0.2659,  0.1315,  0.1965,  ...,  0.1167,  0.1753,  1.0000],
        [-0.1226,  0.4259, -0.0432,  ...,  0.1961, -0.4263,  0.0197],
        [-0.1106, -0.0419,  0.0402,  ...,  0.2015, -0.0748,  0.1877]])
tensor([[ 0.0567,  0.0311,  0.0200,  ..., -0.1181, -0.1133,  0.0660],
        [-0.3373,  0.1036, -0.1694,  ..., -0.0275,  1.0000,  0

          1.0000e+00,  4.2666e-03]])
tensor([[ 1.0000e+00,  6.2844e+01, -2.7699e-02,  ...,  4.0386e-02,
         -4.7592e-04,  6.3835e-02],
        [-4.7652e-01, -8.0417e-03,  5.9526e-02,  ...,  1.0679e-01,
          7.3645e-03,  2.8978e-02],
        [-3.1513e-02,  2.5624e-02, -1.4736e-02,  ...,  1.0000e+00,
         -4.2686e-01,  9.1122e-02],
        ...,
        [-7.7180e-02,  2.1645e-02, -9.4187e-03,  ...,  3.0766e-01,
         -2.2650e-01,  1.2924e-01],
        [ 4.1696e-01, -3.7626e-02,  1.0988e-01,  ...,  1.0000e+00,
         -4.4778e-01, -1.5663e-03],
        [ 1.4172e-01,  3.3972e-02,  6.4973e-02,  ..., -4.0635e-02,
         -7.4508e-02,  7.2774e-02]])
tensor([[-0.2301,  0.0429, -0.0120,  ..., -0.1285, -1.5939,  0.1046],
        [ 0.2689,  0.0432, -0.0038,  ...,  0.3150, -0.2697,  0.0190],
        [ 0.2301,  0.2844,  0.0086,  ..., -0.4666, -0.2244,  0.0884],
        ...,
        [ 0.0371,  0.0028,  0.0670,  ...,  1.0000,  0.0212,  0.0461],
        [ 0.0021,  0.0784, -0.0137,  .

In [6]:
lr_ = 0.0005
optimizer = optim.SGD(mlp_DIMA.engine.parameters(), lr=lr_)

for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()
        outputs = mlp_DIMA(inputs)

        
        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    
print('Finished Training')

PATH_ = './mnist_mlp_dima.pth'
torch.save(mlp_DIMA.state_dict(), PATH_)

Finished Training


In [7]:
# mlp_DIMA = MLP_DIMA()
# mlp_DIMA.load_state_dict(torch.load(PATH_), strict=False)

time_ = []
acc = []
# for i in range (1):
start_time = time.time()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = mlp_DIMA(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
time_.append(time.time() - start_time)
acc.append(100 * correct / total)
#     print('Accuracy of the network on the 10000 test images: %d %%' % (
#         100 * correct / total))

#     print("--- %s seconds ---" % (time.time() - start_time))

print('ACC:',np.average(np.array(acc)))
print('Time:',np.average(np.array(time_)))

ACC: 88.49
Time: 28.37289333343506
