# PyTorch Models Intro
### Building Deep Learning Models using nn.Module

In [1]:
import torch
import torch.nn as nn

# Basic Linear Classifier
* $y = w_1x_1 + w_2x_2 + w_3x_3 + \dots + w_0$
* $w_i$ and $w_0$ are the trainable parameters
* $x_i$ is the input (data), $y$ is the output (prediction)
* All models (and layers) are subclass of `nn.Module` in PyTorch
* `nn.Parameter()` is the basic learnable parameter inside the module

## Using Dot Product

In [None]:
class BasicLinearDot(nn.Module):
    def __init__(self, features_dim):
        # YOU NEED TO RUN FOR INITIALIZATION OF MODULE
        super().__init__()

        # bias
        self.w_0 = nn.Parameter(torch.zeros(1))
        # weights
        self.w_1 = nn.Parameter(torch.randn(features_dim))

    def forward(self, x):
        y = torch.dot(self.w_1, x) + self.w_0
        return y

In [3]:
model_dot = BasicLinearDot(features_dim=20)

In [4]:
for name, parameter in model_dot.named_parameters():
    print(f'{name}: {parameter}')
    print() # empty line to improve the readability

w_0: Parameter containing:
tensor([0.], requires_grad=True)

w_1: Parameter containing:
tensor([-4.6346e-01, -7.3105e-01, -4.6454e-01,  9.5200e-01, -1.4984e+00,
         2.6727e-02, -1.6820e+00, -3.8680e-01,  1.2621e+00,  8.9513e-02,
         4.5511e-01,  7.5176e-01,  1.9143e+00,  1.0037e+00,  1.6121e-01,
        -1.6499e+00, -1.8510e+00, -1.6325e+00,  1.0073e+00,  2.5170e-04],
       requires_grad=True)



### Test with dummy data

In [5]:
features = 20

dummy_data = torch.randn(features)
dummy_data.shape

torch.Size([20])

In [6]:
pred = model_dot(dummy_data)
pred.shape

torch.Size([1])

### Test with batched dummy data
* Shape: (N, F)
* *N* is the batch size, *F* is the features dim

In [7]:
batch_size = 32
features = 20

dummy_data_batch = torch.randn(batch_size, features)
dummy_data_batch.shape

torch.Size([32, 20])

### Dot product doesn't work anymore!
* https://pytorch.org/docs/stable/generated/torch.dot.html
* Dot product only work 1D tensors

In [8]:
pred = model_dot(dummy_data_batch)
pred.shape

RuntimeError: 1D tensors expected, but got 1D and 2D tensors

## Using Matrix Multiplication

In [None]:
class BasicLinearMM(nn.Module):
    def __init__(self, features_dim):
        # YOU NEED TO RUN FOR INITIALIZATION OF MODULE
        super().__init__()

        self.w_0 = nn.Parameter(torch.zeros(1))
        # 2D parameter are required for matrix multiplication!
        self.w_1 = nn.Parameter(torch.randn(1, features_dim))

    def forward(self, x):
        # x.shape: (N, F)
        # w_1.shape: (1, F)
        # x * w_1.T -> (N, F) * (F, 1)
        # .T mean transpose
        y = torch.mm(x, self.w_1.T) + self.w_0
        return y

In [10]:
model_mm = BasicLinearMM(features_dim=20)

In [11]:
# dummy_data_batch.shape: (N, F)
pred = model_mm(dummy_data_batch)
pred.shape

torch.Size([32, 1])

## What if we need a specific output feature dim?

In [None]:
class BasicLinearMMImproved(nn.Module):
    def __init__(self, in_features, out_features):
        # YOU NEED TO RUN FOR INITIALIZATION OF MODULE
        super().__init__()

        self.w_0 = nn.Parameter(torch.zeros(out_features))
        # 2D parameter are required for matrix multiplication!
        self.w_1 = nn.Parameter(torch.randn(out_features, in_features))

    def forward(self, x):
        # x.shape: (N, F_in)
        # w_1.shape: (F_out, F_in)
        # x * w_1.T -> (N, F_in) * (F_in, F_out)
        # .T mean transpose
        y = torch.mm(x, self.w_1.T) + self.w_0
        return y

In [13]:
model_mm_improved = BasicLinearMMImproved(in_features=20, out_features=4)

In [14]:
# dummy_data_batch.shape: (N, F)
pred = model_mm_improved(dummy_data_batch)
pred.shape

torch.Size([32, 4])

# What if we have high dimentional data (3 or more)?

### Test with higher dimentional batched dummy data
* Shape: (N, S, F)
* *N* is the batch size, *S* sequence length, *F* is the features dim

#### Information:
Some data's shapes are presented as below with batches:

- (N, S, F) for `textual`
- (N, C, H, W) for `image`
- (N, S, C, H, W) for `video`

In [15]:
batch_size = 32
sequence_length = 10
features = 20

dummy_multidim_data_batch = torch.randn(batch_size, sequence_length, features)
dummy_multidim_data_batch.shape

torch.Size([32, 10, 20])

### Matrix multiplication doesn't work anymore!
* https://pytorch.org/docs/stable/generated/torch.mm.html
* Matrix multiplication only work 2D tensors

In [16]:
# dummy_data_batch.shape: (N, S, F)
pred = model_mm_improved(dummy_multidim_data_batch)
pred.shape

RuntimeError: self must be a matrix

In [17]:
dummy_data_batch

tensor([[-1.0213e+00, -4.5882e-02,  2.2614e+00, -4.0166e-01, -7.1017e-01,
          1.3010e+00, -1.0071e+00, -1.5348e+00, -4.1546e-02, -2.2139e-01,
         -2.3881e-01,  2.1980e-01,  7.7252e-02,  4.3983e-02, -4.8397e-02,
          1.8628e+00,  7.2579e-02,  6.4934e-01,  1.7202e+00, -9.9910e-01],
        [-1.6038e+00, -5.3442e-01, -1.3830e+00, -1.1762e+00,  5.0216e-01,
         -1.6949e+00,  1.0289e+00, -2.0343e+00, -3.0293e-01, -4.3212e-01,
         -5.8980e-01,  2.5014e-01,  8.2625e-01,  2.8508e-01,  5.2712e-01,
         -9.5925e-01,  1.0689e+00,  1.4853e+00, -2.5934e+00, -1.8201e+00],
        [-3.4345e-01, -9.9762e-01, -7.8091e-02, -7.7035e-01, -3.7791e-01,
         -3.3490e+00,  7.1416e-01, -3.5499e-01, -1.5817e+00,  5.5390e-01,
         -2.2393e+00,  1.3136e+00, -1.0834e+00, -3.1529e+00,  3.1122e+00,
          9.0029e-01,  7.0263e-01, -1.5140e-01,  4.7981e-01, -2.1705e-01],
        [ 5.7410e-01,  1.0395e+00, -4.4477e-01,  1.5567e+00,  1.3732e+00,
          9.5662e-01, -8.0153e-01, 

## Using Broadcastable Matrix Multiplication
### matmul is the most generic function that can perform everything above and more!
* https://pytorch.org/docs/stable/generated/torch.matmul.html
* Performs a different operation depending on the input dimensions!

In [None]:
class BasicLinearBroadcastable(nn.Module):
    def __init__(self, in_features, out_features):
        # YOU NEED TO RUN FOR INITIALIZATION OF MODULE
        super().__init__()

        self.w_0 = nn.Parameter(torch.zeros(out_features))
        # 2D parameter are required for matrix multiplication!
        self.w_1 = nn.Parameter(torch.randn(out_features, in_features))

    def forward(self, x):
        # x.shape: (N, S, F_in)
        # w_1.shape: (F_out, F_in)
        # x * w_1.T -> (N, S, F_in) * (F_in, F_out)
        # .T mean transpose
        # OPERATION IS BROADCASTED OVER "S" DIMENSION
        y = torch.matmul(x, self.w_1.T) + self.w_0
        return y

In [19]:
model_broadcastable = BasicLinearBroadcastable(in_features=20, out_features=4)

In [20]:
pred = model_broadcastable(dummy_multidim_data_batch)
pred.shape

torch.Size([32, 10, 4])