# PyTorch Models Intro
### Building Deep Learning Models using nn.Module

In [1]:
import torch
import torch.nn as nn

# Basic Linear Classifier
* $y = w_1x_1 + w_2x_2 + w_3x_3 + \dots + w_0$
* $w_i$ and $w_0$ are the trainable parameters
* $x_i$ is the input (data), $y$ is the output (prediction)
* All models (and layers) are subclass of **nn.Module** in PyTorch
* nn.Parameter() is the basic learnable parameter inside the module

## Using Dot Product

In [2]:
class BasicLinearDot(nn.Module):
    def __init__(self, features_dim):
        # YOU NEED TO RUN FOR INITIALIZATION OF MODULE
        super().__init__() 

        # bias
        self.w_0 = nn.Parameter(torch.zeros(1))
        # weights
        self.w_1 = nn.Parameter(torch.randn(features_dim))

    def forward(self, x):
        y = torch.dot(self.w_1, x) + self.w_0
        return y

In [3]:
model_dot = BasicLinearDot(features_dim=20)

In [4]:
for name, parameter in model_dot.named_parameters():
    print(f'{name}: {parameter}')
    print() # empty line to improve the readability

w_0: Parameter containing:
tensor([0.], requires_grad=True)

w_1: Parameter containing:
tensor([ 1.7503, -0.7612, -1.3693,  0.1056, -1.1083,  0.0432,  1.7436,  0.1492,
        -0.6401, -0.3198, -0.6363,  2.6257, -0.3170,  0.5642, -0.4097, -0.8689,
        -1.0067,  1.8920, -1.3034, -0.8915], requires_grad=True)



### Test with dummy data

In [5]:
features = 20

dummy_data = torch.randn(features)
dummy_data.shape

torch.Size([20])

In [6]:
pred = model_dot(dummy_data)
pred.shape

torch.Size([1])

### Test with batched dummy data
* Shape: (N, F)
* *N* is the batch size, *F* is the features dim

In [7]:
batch_size = 32
features = 20

dummy_data_batch = torch.randn(batch_size, features)
dummy_data_batch.shape

torch.Size([32, 20])

### Dot product doesn't work anymore!
* https://pytorch.org/docs/stable/generated/torch.dot.html
* Dot product only work 1D tensors

In [24]:
#pred = model_dot(dummy_data_batch)
#pred.shape

## Using Matrix Multiplication

In [9]:
class BasicLinearMM(nn.Module):
    def __init__(self, features_dim):
        # YOU NEED TO RUN FOR INITIALIZATION OF MODULE
        super().__init__() 

        self.w_0 = nn.Parameter(torch.zeros(1))
        # 2D parameter are required for matrix multiplication!
        self.w_1 = nn.Parameter(torch.randn(1, features_dim))

    def forward(self, x):
        # x.shape: (N, F)
        # w_1.shape: (1, F)
        # x * w_1.T -> (N, F) * (F, 1)
        # .T mean transpose
        y = torch.mm(x, self.w_1.T) + self.w_0
        return y

In [10]:
model_mm = BasicLinearMM(features_dim=20)

In [11]:
# dummy_data_batch.shape: (N, F)
pred = model_mm(dummy_data_batch)
pred.shape

torch.Size([32, 1])

## What if we need a specific output feature dim?

In [12]:
class BasicLinearMMImproved(nn.Module):
    def __init__(self, in_features, out_features):
        # YOU NEED TO RUN FOR INITIALIZATION OF MODULE
        super().__init__() 
        
        self.w_0 = nn.Parameter(torch.zeros(out_features))
        # 2D parameter are required for matrix multiplication!
        self.w_1 = nn.Parameter(torch.randn(out_features, in_features))

    def forward(self, x):
        # x.shape: (N, F_in)
        # w_1.shape: (F_out, F_in)
        # x * w_1.T -> (N, F_in) * (F_in, F_out)
        # .T mean transpose
        y = torch.mm(x, self.w_1.T) + self.w_0
        return y

In [29]:
model_mm_improved = BasicLinearMMImproved(in_features=20, out_features=4)

In [30]:
# dummy_data_batch.shape: (N, F)
pred = model_mm_improved(dummy_data_batch)
pred.shape

torch.Size([32, 4])

# What if we have high dimentional data (3 or more)?

### Test with higher dimentional batched dummy data
* Shape: (N, S, F)
* *N* is the batch size, *S* sequence length, *F* is the features dim

In [15]:
batch_size = 32
sequence_length = 10
features = 20

dummy_multidim_data_batch = torch.randn(batch_size, sequence_length, features)
dummy_multidim_data_batch.shape

torch.Size([32, 10, 20])

### Matrix multiplication doesn't work anymore!
* https://pytorch.org/docs/stable/generated/torch.mm.html
* Matrix multiplication only work 2D tensors

In [32]:
# dummy_data_batch.shape: (N, S, F)
#pred = model_mm_improved(dummy_multidim_data_batch)
#pred.shape

## Using Broadcastable Matrix Multiplication
### matmul is the most generic function that can perform everything above and more!
* https://pytorch.org/docs/stable/generated/torch.matmul.html
* Performs a different operation depending on the input dimensions!

In [17]:
class BasicLinearBroadcastable(nn.Module):
    def __init__(self, in_features, out_features):
        # YOU NEED TO RUN FOR INITIALIZATION OF MODULE
        super().__init__() 
        
        self.w_0 = nn.Parameter(torch.zeros(out_features))
        # 2D parameter are required for matrix multiplication!
        self.w_1 = nn.Parameter(torch.randn(out_features, in_features))

    def forward(self, x):
        # x.shape: (N, S, F_in)
        # w_1.shape: (F_out, F_in)
        # x * w_1.T -> (N, S, F_in) * (F_in, F_out)
        # .T mean transpose
        # OPERATION IS BROADCASTED OVER "S" DIMENSION
        y = torch.matmul(x, self.w_1.T) + self.w_0
        return y

In [18]:
model_broadcastable = BasicLinearBroadcastable(in_features=20, out_features=4)

In [19]:
pred = model_broadcastable(dummy_multidim_data_batch)
pred.shape

torch.Size([32, 10, 4])

In [None]:
(N, S, F) # text
(N, C, H, W) # image
(N, S, C, H, W) # video