# NN Building Blocks

In [3]:
import torch.nn as nn
import torch

In [8]:
l = nn.Linear(2,5) # 2 input, 5 output
v = torch.Tensor([1,2]) # 2 inputs

l1 = l(v)

In [9]:
print(l1.grad)
print(l1)

None
tensor([ 1.1355, -0.9446, -0.3920, -1.6683,  0.0601], grad_fn=<AddBackward0>)


In [10]:
l

Linear(in_features=2, out_features=5, bias=True)

### Useful Methods
These are some useful methods that all ```nn.Module``` children provide

* ```parameters()```: A method that returns iterator of all variables which require gradient computation 
* ```zero_grad()```: This method initializes all gradients of all parameters to zero
* ```to(device)```: This moves all module parameters to a given (CPU or GPU)
* ```state_dict()```: This returns the dictionary with all module parameters and is useful for model serialization
* ```load_state_dict()```: This initializes the module with the state dictionary
* **```Sequential()```**: Allows you to combine other layers into the pipe

In [13]:
"""
3 layer neural network with softmax output, applied along dimension 1, ReLU non-linear functions and dropout. 
"""
s = nn.Sequential(
    nn.Linear(2,5),
    nn.ReLU(),
    nn.Linear(5,20),
    nn.ReLU(),
    nn.Linear(20, 10),
    nn.Dropout(p=0.3),
    nn.Softmax(dim=1)
)

In [14]:
s

Sequential(
  (0): Linear(in_features=2, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=20, bias=True)
  (3): ReLU()
  (4): Linear(in_features=20, out_features=10, bias=True)
  (5): Dropout(p=0.3)
  (6): Softmax()
)

In [15]:
"""
Forward prop example
"""
# input
x = torch.Tensor([[1,2]])

# feeding input to our model
s(x)

tensor([[0.0969, 0.0419, 0.0900, 0.0969, 0.1271, 0.0969, 0.1642, 0.1227, 0.1111,
         0.0522]], grad_fn=<SoftmaxBackward>)

At it's core, ```nn.Module``` provides quite rich functionality to its children:
* It tracks all submodules that the current module includes. For example, your building block can have two feed-forward layers used somehow to perform the block's transformation
* It provides functions to deal with all parameters of the registered submodules. 
    * ```parameters()```: obtain a full list of the module's parameters
    * ```zero_grads()```: zero its gradients
    * ```to(decide)```: move to CPU or GPU
    * ```apply()```: perform generic transformation using your own callable method
* Establishes the convention of module application to data. Every module needs to perform its data transformation in the ```forward()``` method by overriding it. 

In [20]:
# Our Model A
class OurModel_A(nn.Module):
    def __init__(self, input_dim, num_classes, dropout_prob=0.3):
        super(OurModel_A, self).__init__()
        self.pipe = nn.Sequential(
            nn.Linear(input_dim, 5),
            nn.ReLU(),
            nn.Linear(5,20),
            nn.ReLU(),
            nn.Linear(20, num_classes),
            nn.Dropout(p=dropout_prob),
            nn.Softmax(dim=1)
        )
        
    def forward(self, x):
        return self.pipe(x)
    
if __name__ == "__main__":
    net = OurModel_A(input_dim=2, num_classes=3)
    v = torch.Tensor([[2,3]])
    out = net(v)
    print(net)
    print(out)

OurModel_A(
  (pipe): Sequential(
    (0): Linear(in_features=2, out_features=5, bias=True)
    (1): ReLU()
    (2): Linear(in_features=5, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
    (5): Dropout(p=0.3)
    (6): Softmax()
  )
)
tensor([[0.2626, 0.4146, 0.3229]], grad_fn=<SoftmaxBackward>)


In [30]:
# Our Model B
class OurModel_B(nn.Module):
    def __init__(self, input_dim, num_classes, dropout_prob=0.3):
        super(OurModel_B, self).__init__()
        
        # FC layers
        self.fc1 = nn.Linear(input_dim, 5)
        self.fc2 = nn.Linear(5, 20)
        self.fc3 = nn.Linear(20, num_classes)
        
        # Non-Linear Functions
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

        # regularization 
        self.drop_layer = nn.Dropout(p=dropout_prob)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.drop_layer(out)
        out = self.softmax(out)
        
        return out
    
if __name__ == "__main__":
    net = OurModel_B(input_dim=2, num_classes=3)
    v = torch.Tensor([[2,3]])
    out = net(v)
    print(net)
    print(out)

OurModel_B(
  (fc1): Linear(in_features=2, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=3, bias=True)
  (relu): ReLU()
  (softmax): Softmax()
  (drop_layer): Dropout(p=0.3)
)
tensor([[0.3162, 0.3162, 0.3676]], grad_fn=<SoftmaxBackward>)
