## Neural Network

In [1]:
import torch
import torch.nn as nn
from torchviz import make_dot, make_dot_from_trace

In [2]:
# creating a linear layer
fc = nn.Linear(10,2)
print(fc)

Linear(in_features=10, out_features=2, bias=True)


In [3]:
inp = torch.randn(3,10)
print(inp)
print(inp.shape)

tensor([[ 0.8222, -1.2160,  1.0806,  1.0623,  1.0681,  0.7773,  0.8428, -0.5415,
         -0.1704, -1.5846],
        [-0.1070, -0.6044, -0.0212,  1.3340,  1.6091,  0.8053,  0.7550,  0.9276,
          0.8516,  0.8873],
        [ 1.5932,  0.1221,  2.1838,  0.8292,  1.5914, -0.4277, -0.9490,  0.7399,
          1.2625,  0.0245]])
torch.Size([3, 10])


In [4]:
outp = fc(inp)

In [5]:
print(outp)
print(outp.grad_fn)

tensor([[ 0.5288,  0.5150],
        [ 0.3385,  0.0246],
        [-1.0714,  1.3634]], grad_fn=<AddmmBackward>)
<AddmmBackward object at 0x000001F5E8EEED00>


In [6]:
#activation function: takes any real number and outputs a number in a certain range. The function is non-linear differentiable.
relu = nn.ReLU()
relu_outp = relu(outp)
print(relu_outp)

tensor([[0.5288, 0.5150],
        [0.3385, 0.0246],
        [0.0000, 1.3634]], grad_fn=<ReluBackward0>)


In [7]:
sig = nn.Sigmoid()
sig_outp = sig(outp)
print(sig_outp)

tensor([[0.6292, 0.6260],
        [0.5838, 0.5061],
        [0.2551, 0.7963]], grad_fn=<SigmoidBackward>)


In [31]:
# mlp = nn.Sequential( ("W0", nn.Linear(10,2)), ("relu", nn.ReLU()))
mlp = nn.Sequential()
mlp.add_module("W0", nn.Linear(10,2))
mlp.add_module("relu", nn.ReLU())

In [32]:
print(mlp)

Sequential(
  (W0): Linear(in_features=10, out_features=2, bias=True)
  (relu): ReLU()
)


In [33]:
#make_dot(mlp(inp), params=dict(mlp.named_parameters()))
mlp(inp) #forward pass

tensor([[0.5275, 0.0000],
        [0.0000, 0.1020],
        [0.3626, 0.3920]], grad_fn=<ReluBackward0>)

In [34]:
##optimizers: {SGD, Adam,}
import torch.optim as optim
adam_opt = optim.Adam(mlp.parameters(), lr=1e-1)

In [35]:
print(adam_opt)

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.1
    weight_decay: 0
)


In [36]:
## training loop
# train_ex = torch.randn(100,10) + 1
# print(train_ex)

In [37]:
adam_opt.zero_grad()

In [38]:
outpp = mlp(inp)

In [39]:
loss = torch.abs(1-outpp).mean()

In [40]:
loss.backward()

In [41]:
p = mlp.parameters()

In [42]:
mlp.state_dict()

OrderedDict([('W0.weight',
              tensor([[-0.2551,  0.1458,  0.2463,  0.1426,  0.2499,  0.1214, -0.2068, -0.2181,
                       -0.1807, -0.1357],
                      [-0.0268,  0.2871, -0.1330,  0.2699, -0.0475, -0.2209, -0.2293,  0.1705,
                        0.1033,  0.1367]])),
             ('W0.bias', tensor([-0.0540, -0.0303]))])

In [43]:
adam_opt.step()

In [44]:
print(loss)

tensor(0.7693, grad_fn=<MeanBackward0>)


In [45]:
total_params = sum(param.numel() for param in mlp.parameters())

In [46]:
print(total_params)

22


In [47]:
for name,param in mlp.named_parameters():
    print(name)
#print(param.numel() for param in mlp.parameters())

W0.weight
W0.bias


In [51]:
mlp.W0.weight.grad

tensor([[-0.4026,  0.1823, -0.5441, -0.3153, -0.4433, -0.0583,  0.0177, -0.0331,
         -0.1820,  0.2600],
        [-0.2477,  0.0804, -0.3604, -0.3605, -0.5334, -0.0629,  0.0323, -0.2779,
         -0.3523, -0.1520]])

In [52]:
mlp.W0.weight

Parameter containing:
tensor([[-0.1551,  0.0458,  0.3463,  0.2426,  0.3499,  0.2214, -0.3068, -0.1181,
         -0.0807, -0.2357],
        [ 0.0732,  0.1871, -0.0330,  0.3699,  0.0525, -0.1209, -0.3293,  0.2705,
          0.2033,  0.2367]], requires_grad=True)

In [53]:
mlp.W0.weight.grad

tensor([[-0.4026,  0.1823, -0.5441, -0.3153, -0.4433, -0.0583,  0.0177, -0.0331,
         -0.1820,  0.2600],
        [-0.2477,  0.0804, -0.3604, -0.3605, -0.5334, -0.0629,  0.0323, -0.2779,
         -0.3523, -0.1520]])

In [54]:
mlp.zero_grad()

In [55]:
mlp.W0.weight

Parameter containing:
tensor([[-0.1551,  0.0458,  0.3463,  0.2426,  0.3499,  0.2214, -0.3068, -0.1181,
         -0.0807, -0.2357],
        [ 0.0732,  0.1871, -0.0330,  0.3699,  0.0525, -0.1209, -0.3293,  0.2705,
          0.2033,  0.2367]], requires_grad=True)

In [56]:
mlp.W0.weight.grad

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])