### Layers : Fundamental blocks of Neural Network

In [16]:
import numpy as np

import torch
from torch.nn import Linear, ReLU
import torch.nn as nn
import torch.nn.functional as F

In [2]:
torch.__version__

'1.0.0'

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Linear (dense/fully connected) layer

In [5]:
inp = torch.randn(1,10)
myLayer = Linear(in_features=10,out_features=5,bias=True) 
myLayer(inp), myLayer(inp).shape

(tensor([[-0.8729, -0.5990,  0.1757,  0.6025, -0.7942]],
        grad_fn=<AddmmBackward>), torch.Size([1, 5]))

In [7]:
myLayer.weight, myLayer.weight.shape

(Parameter containing:
 tensor([[-0.2369,  0.0671,  0.2368,  0.0310,  0.1671,  0.1415,  0.0013,  0.2144,
          -0.1217, -0.0554],
         [ 0.1078,  0.2038,  0.1662, -0.0033,  0.2745, -0.1021,  0.0875,  0.2908,
           0.1262,  0.2703],
         [-0.2449,  0.0744,  0.1469, -0.2009, -0.2778,  0.2306,  0.1444, -0.1536,
           0.0673, -0.0356],
         [ 0.2840, -0.2210, -0.2120,  0.0253, -0.0694, -0.2035,  0.0589,  0.1176,
          -0.3014,  0.0294],
         [-0.1369,  0.0943,  0.0795,  0.2927, -0.0427, -0.1963,  0.1683, -0.2761,
           0.1941, -0.2954]], requires_grad=True), torch.Size([5, 10]))

In [10]:
myLayer.bias, myLayer.bias.type()

(Parameter containing:
 tensor([ 0.0230, -0.1650,  0.2917, -0.2488, -0.2449], requires_grad=True),
 'torch.FloatTensor')

### Stacking Linear layers

In [12]:
myLayer1 = Linear(10,5)
myLayer2 = Linear(5,2)
myLayer2(myLayer1(inp)), myLayer2(myLayer1(inp)).shape

(tensor([[-0.3072,  0.5236]], grad_fn=<AddmmBackward>), torch.Size([1, 2]))

### PyTorch Non-linear Activations

In [13]:
sample_data = torch.Tensor([[1,2,-1,-1]])
sample_data.shape

torch.Size([1, 4])

In [17]:
myRelu = ReLU()
myRelu(sample_data)

tensor([[1., 2., 0., 0.]])

In [18]:
sample_data = torch.Tensor([[1,2,-1,-1]])
f = F.relu(sample_data) # Much simpler.
f

tensor([[1., 2., 0., 0.]])

### Neural Network 

In [19]:
class MyFirstNetwork(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(MyFirstNetwork,self).__init__() 
        self.layer1 = nn.Linear(input_size,hidden_size) 
        self.layer2 = nn.Linear(hidden_size,output_size)
        
    def __forward__(self,input): 
        out = self.layer1(input) 
        out = nn.ReLU(out)
        out = self.layer2(out) 
        return out

### Loss

In [20]:
loss = nn.MSELoss()
input = torch.randn(3, 5, requires_grad=True) 
target = torch.randn(3, 5)
output = loss(input, target)
output.backward()

In [21]:
def cross_entropy(true_label, prediction):
    #loss will be high when predictions are bad and low when predictions are good
    if true_label == 1:
        return -log(prediction)
    else:
        return -log(1 - prediction)

In [23]:
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True) 
target = torch.LongTensor(3).random_(5)
output = loss(input, target)
output.backward()

### Optimizer

In [25]:
# for demo
import torch.optim as optim
optimizer = optim.SGD(model.parameters(), lr = 0.01)
for input, target in dataset:
    #call zero_grad() inside our loop, as the parameters will accumulate the gradients created during the previous optimizer call
    optimizer.zero_grad()
    output = model(input)
    loss = loss_fn(output, target)
    loss.backward()
    optimizer.step()

'\nimport torch.optim as optim\noptimizer = optim.SGD(model.parameters(), lr = 0.01)\nfor input, target in dataset:\n    #call zero_grad() inside our loop, as the parameters will accumulate the gradients created during the previous optimizer call\n    optimizer.zero_grad()\n    output = model(input)\n    loss = loss_fn(output, target)\n    loss.backward()\n    optimizer.step()\n'