In [2]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np
import torch.optim as optim

In [12]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias = True, dropout = 0.3,is_output =False):
        super(LinearBNAC,self).__init__()
        if is_output and out_channels ==1:
            self.linear ==nn.Sequential(
                nn.Linear(in_channels,out_channels,bias =bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(nn.Linear(in_channels,out_channels, bias = bias),
                        nn.Softmax(dim = 1)
            )
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels,out_channels, bias = bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace = True)
            )
    def forward(self, x):
        out = self.linear(x)
        return out

In [13]:
class Model(nn.Module):
    def __init__(self, input_dimension,output_classes = 1):
        super(Model,self).__init__()
        self.layer1 = LinearBNAC(input_dimension,128)
        self.layer2 = LinearBNAC(128,64)
        self.layer3 = LinearBNAC(64,32)
        self.output = LinearBNAC(32,output_classes, is_output = True)
    def forward(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x

In [14]:
model = Model(input_dimension = 256,output_classes = 10)
optimizer = optim.Adam(params = model.parameters())

In [16]:
batch_size = 4
input_features = 256
dummy_input = torch.randn(batch_size,input_features)
target = torch.tensor([9.,5.,4.,4.],dtype =torch.long)

In [19]:
output = model(dummy_input)
print(output)

tensor([[0.1024, 0.1546, 0.0892, 0.1649, 0.0778, 0.0962, 0.0914, 0.1122, 0.0570,
         0.0542],
        [0.0560, 0.1267, 0.0485, 0.1055, 0.1204, 0.1097, 0.0739, 0.2145, 0.0831,
         0.0617],
        [0.1279, 0.1137, 0.0737, 0.0759, 0.0620, 0.1002, 0.0716, 0.1317, 0.1512,
         0.0922],
        [0.0818, 0.2158, 0.1194, 0.1474, 0.0805, 0.0651, 0.1217, 0.0614, 0.0638,
         0.0432]], grad_fn=<SoftmaxBackward>)


In [20]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [21]:
criterion = CrossEntropyLoss()
loss = criterion(torch.log(output),target)

In [22]:
loss.backward()

In [23]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0597, -0.0338,  0.0356,  ..., -0.0240,  0.0238, -0.0577],
        [-0.0329, -0.0046,  0.0065,  ...,  0.0557,  0.0048,  0.0243],
        [-0.0542,  0.0012, -0.0263,  ..., -0.0415,  0.0573, -0.0622],
        ...,
        [ 0.0257,  0.0041,  0.0276,  ...,  0.0189, -0.0202, -0.0189],
        [-0.0609, -0.0333, -0.0160,  ...,  0.0046, -0.0371, -0.0122],
        [ 0.0574, -0.0518,  0.0545,  ..., -0.0020, -0.0300,  0.0072]],
       requires_grad=True)


grad : tensor([[ 0.0004,  0.0009, -0.0003,  ...,  0.0019, -0.0021,  0.0017],
        [-0.0143, -0.0106, -0.0040,  ...,  0.0020, -0.0077,  0.0074],
        [-0.0342,  0.0344,  0.0189,  ..., -0.0214,  0.0172,  0.0139],
        ...,
        [-0.0934, -0.0122, -0.0095,  ..., -0.0139, -0.0013,  0.0062],
        [-0.0213, -0.0314, -0.0022,  ..., -0.0121, -0.0003,  0.0091],
        [-0.0149, -0.0524,  0.0185,  ..., -0.0533,  0.0306,  0.0165]])


In [24]:
optimizer.step()

In [25]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0587, -0.0348,  0.0366,  ..., -0.0250,  0.0248, -0.0587],
        [-0.0319, -0.0036,  0.0075,  ...,  0.0547,  0.0058,  0.0233],
        [-0.0532,  0.0002, -0.0273,  ..., -0.0405,  0.0563, -0.0632],
        ...,
        [ 0.0267,  0.0051,  0.0286,  ...,  0.0199, -0.0192, -0.0199],
        [-0.0599, -0.0323, -0.0150,  ...,  0.0056, -0.0361, -0.0132],
        [ 0.0584, -0.0508,  0.0535,  ..., -0.0010, -0.0310,  0.0062]],
       requires_grad=True)


grad : tensor([[ 0.0004,  0.0009, -0.0003,  ...,  0.0019, -0.0021,  0.0017],
        [-0.0143, -0.0106, -0.0040,  ...,  0.0020, -0.0077,  0.0074],
        [-0.0342,  0.0344,  0.0189,  ..., -0.0214,  0.0172,  0.0139],
        ...,
        [-0.0934, -0.0122, -0.0095,  ..., -0.0139, -0.0013,  0.0062],
        [-0.0213, -0.0314, -0.0022,  ..., -0.0121, -0.0003,  0.0091],
        [-0.0149, -0.0524,  0.0185,  ..., -0.0533,  0.0306,  0.0165]])


In [26]:
optimizer.zero_grad()

In [27]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[ 0.0587, -0.0348,  0.0366,  ..., -0.0250,  0.0248, -0.0587],
        [-0.0319, -0.0036,  0.0075,  ...,  0.0547,  0.0058,  0.0233],
        [-0.0532,  0.0002, -0.0273,  ..., -0.0405,  0.0563, -0.0632],
        ...,
        [ 0.0267,  0.0051,  0.0286,  ...,  0.0199, -0.0192, -0.0199],
        [-0.0599, -0.0323, -0.0150,  ...,  0.0056, -0.0361, -0.0132],
        [ 0.0584, -0.0508,  0.0535,  ..., -0.0010, -0.0310,  0.0062]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
