# Application: How Much Did it Rain? 2
https://www.kaggle.com/c/how-much-did-it-rain-ii/data

![image.png](attachment:image.png)

This notebook has been edited by Hróbjartur Höskuldsson (KU ID: VDH406)

Input: polarimetric radar values

Target: the hourly rain gauge total

In [20]:
# linear algebra
import numpy as np 
# data processing
import pandas as pd 
# io handling
import os

In [21]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 5. Create and train a recurrent Model

#### Define Model

In [22]:
# make sure torch-summary is installed
!pip install torch-summary



In [23]:
# import torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary

### Select the type of model

*   Original model = "LSTM"
*   Bidirectional model = "LSTM_bi"
*   2 layer model = "LSTM_2layer"
*   1-d convolution model = "LSTM_conv"
*   All-in-one = "LSTM_all"



In [35]:
model_type = "LSTM"
gradient_clipping = False
clipping_threshold = 0.5

In [25]:
if not os.path.exists('DsltUgfzAc'):
    # windows:
    if os.name == 'nt':
        !python -m wget https://sid.erda.dk/share_redirect/DsltUgfzAc
    # linux:
    else:
        !wget https://sid.erda.dk/share_redirect/DsltUgfzAc
data_dict = torch.load("DsltUgfzAc")
X_train = data_dict["X_train"]
y_train = data_dict["y_train"]
sl_train = data_dict["sl_train"]
X_val = data_dict["X_val"]
y_val = data_dict["y_val"]
sl_val = data_dict["sl_val"]
_, series_length, dims = X_train.shape

In [26]:
device = "cuda" if torch.cuda.is_available() else "cpu"

##### Fully Connected Model

In [27]:

class FCN(nn.Module):
    def __init__(self, dims):
        super().__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(dims * series_length, 64),
            nn.Tanh(),
            nn.Linear(64, 1),
        )
    
    def forward(self, x, seq_len=None):
        return self.model(x)

In [28]:
model_fc = FCN(dims)
fc_str = summary(model_fc, (series_length, dims))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 1]                   --
|    └─Flatten: 2-1                      [-1, 418]                 --
|    └─Linear: 2-2                       [-1, 64]                  26,816
|    └─Tanh: 2-3                         [-1, 64]                  --
|    └─Linear: 2-4                       [-1, 1]                   65
Total params: 26,881
Trainable params: 26,881
Non-trainable params: 0
Total mult-adds (M): 0.05
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.10
Estimated Total Size (MB): 0.10


###### Recurrent Model

In [36]:
# Original model
class LSTM(nn.Module):
    def __init__(self, dims):
        super().__init__()
        self.rnn = nn.LSTM(dims, 64, batch_first=True)
        self.out = nn.Linear(64, 1)
        
    def forward(self, x, seq_len=None):
        if seq_len is None:
            # ignore seq_len and process complete series
            seq_len = [x.size(1)] * x.size(0)
        
        # pack the sequence for efficient computation
        x_packed = nn.utils.rnn.pack_padded_sequence(
            x, seq_len, batch_first=True, enforce_sorted=False
        )

        # apply the rnn to the packed sequence, no need to keep the hidden and cell state here
        x, (h, c) = self.rnn(x_packed)
        
        out = h[-1] # get the last hidden state as an input to the final layer
        return self.out(out)

In [37]:
# Bi-directional model
class LSTM_bi(nn.Module):
    def __init__(self, dims):
        super().__init__()
        self.rnn = nn.LSTM(dims, 64, batch_first=True, bidirectional = True)
        self.out = nn.Linear(64, 1)
        
    def forward(self, x, seq_len=None):
        if seq_len is None:
            # ignore seq_len and process complete series
            seq_len = [x.size(1)] * x.size(0)
        
        # pack the sequence for efficient computation
        x_packed = nn.utils.rnn.pack_padded_sequence(
            x, seq_len, batch_first=True, enforce_sorted=False
        )

        # apply the rnn to the packed sequence, no need to keep the hidden and cell state here
        x, (h, c) = self.rnn(x_packed)
        
        out = h[-1] # get the last hidden state as an input to the final layer
        return self.out(out)

In [38]:
# 2 layer stacked model
class LSTM_2layer(nn.Module):
    def __init__(self, dims):
        super().__init__()
        self.rnn = nn.LSTM(dims, 64, batch_first=True, num_layers = 2)
        self.out = nn.Linear(64, 1)
        
    def forward(self, x, seq_len=None):
        if seq_len is None:
            # ignore seq_len and process complete series
            seq_len = [x.size(1)] * x.size(0)
        
        # pack the sequence for efficient computation
        x_packed = nn.utils.rnn.pack_padded_sequence(
            x, seq_len, batch_first=True, enforce_sorted=False
        )

        # apply the rnn to the packed sequence, no need to keep the hidden and cell state here
        x, (h, c) = self.rnn(x_packed)
        
        out = h[-1] # get the last hidden state as an input to the final layer
        return self.out(out)

In [39]:
# 1-d convolutional model
class LSTM_conv(nn.Module):
    def __init__(self, dims):
        super().__init__()
        self.rnn = nn.LSTM(dims, 64, batch_first=True)
        self.out = nn.Linear(64, 1)
        self.conv1 = nn.Conv1d(dims, 64, 3)
        
    def forward(self, x, seq_len=None):
        if seq_len is None:
            # ignore seq_len and process complete series
            seq_len = [x.size(1)] * x.size(0)
        
        # pack the sequence for efficient computation
        x_packed = nn.utils.rnn.pack_padded_sequence(
            x, seq_len, batch_first=True, enforce_sorted=False
        )
  
        x = x.transpose(1, 2)
        x = self.conv1(x)

        # apply the rnn to the packed sequence, no need to keep the hidden and cell state here
        x, (h, c) = self.rnn(x_packed)
        
        out = h[-1] # get the last hidden state as an input to the final layer
        return self.out(out)

In [40]:
# All-in-one model
class LSTM_all(nn.Module):
    def __init__(self, dims):
        super().__init__()
        self.rnn = nn.LSTM(dims, 64, batch_first=True, num_layers = 2, bidirectional = True)
        self.out = nn.Linear(64, 1)
        self.conv1 = nn.Conv1d(dims, 64, 3)
        
    def forward(self, x, seq_len=None):
        if seq_len is None:
            # ignore seq_len and process complete series
            seq_len = [x.size(1)] * x.size(0)
        
        # pack the sequence for efficient computation
        x_packed = nn.utils.rnn.pack_padded_sequence(
            x, seq_len, batch_first=True, enforce_sorted=False
        )
  
        x = x.transpose(1, 2)
        x = self.conv1(x)

        # apply the rnn to the packed sequence, no need to keep the hidden and cell state here
        x, (h, c) = self.rnn(x_packed)
        
        out = h[-1] # get the last hidden state as an input to the final layer
        return self.out(out)

In [41]:
# recurrent model
model = eval(model_type + "(dims)")
rnn_str = summary(model, (series_length, dims))

Layer (type:depth-idx)                   Output Shape              Param #
├─LSTM: 1-1                              [-1, 64]                  22,528
├─Linear: 1-2                            [-1, 1]                   65
Total params: 22,593
Trainable params: 22,593
Non-trainable params: 0
Total mult-adds (M): 0.02
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.09
Estimated Total Size (MB): 0.09


#### Define Training Function

In [42]:
from tqdm.auto import tqdm

def fit(X_train, y_train, model, optimizer, batch_size=32, epochs=5, loss_fn=F.mse_loss):
    n_iterations = len(X_train) // batch_size
    X_train = torch.tensor(X_train).float()
    y_train = torch.tensor(y_train).float()
    
    pbar = tqdm(total=n_iterations)
    for epoch in range(epochs):
        # shuffle iterator        
        idx = torch.randperm(len(X_train))
        for i in range(n_iterations):
            optimizer.zero_grad()
            
            batch_idx = idx[i * batch_size: (i + 1) * batch_size]
            X_batch = X_train[batch_idx].to(device)
            y_batch = y_train[batch_idx].to(device)
            y_pred = model(X_batch)
            loss = loss_fn(y_batch, y_pred)
            loss.backward()
            if gradient_clipping:
              nn.utils.clip_grad_norm(model.parameters(), clipping_threshold)
            optimizer.step()
            
            # logging
            if i % 10 == 0:
                pbar.set_description_str(f"Epoch {epoch + 1}/{epochs}")
                pbar.set_postfix_str(f"loss: {loss.item():.4f}")
                pbar.update(10)
        
        pbar.refresh()
        pbar.reset()
    return model
            
            

#### Define Loss Function

In [43]:
def mae(x, y):
    return torch.abs(x - y).mean()

### Train models

In [44]:
n_epochs = 5

#### Fully Connected Model

In [45]:
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, nesterov=True)
fit(X_train, y_train, model, optimizer, batch_size=64, epochs=n_epochs, loss_fn=mae)

HBox(children=(FloatProgress(value=0.0, max=9925.0), HTML(value='')))

LSTM(
  (rnn): LSTM(22, 64, batch_first=True)
  (out): Linear(in_features=64, out_features=1, bias=True)
)

In [46]:
model_fc.to(device)
optimizer_fc = optim.SGD(model_fc.parameters(), lr=0.01, momentum=0.9, nesterov=True)
fit(X_train, y_train, model_fc, optimizer_fc, batch_size=64, epochs=n_epochs, loss_fn=mae)

HBox(children=(FloatProgress(value=0.0, max=9925.0), HTML(value='')))

FCN(
  (model): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=418, out_features=64, bias=True)
    (2): Tanh()
    (3): Linear(in_features=64, out_features=1, bias=True)
  )
)

### Save the model

In [48]:
torch.save(model, "/content/drive/MyDrive/Colab Notebooks/AI_A2/model.pt")

## Evaluate Models

#### Define Prediction Function

In [47]:
def predict(X, model, batch_size=32):
    n_iterations = len(X) // batch_size
    X = torch.tensor(X).float()
    idx = torch.arange(len(X))
    
    with torch.no_grad():
        y_pred = []
        for i in tqdm(range(n_iterations + 1)):
            batch_idx = idx[i * batch_size: (i + 1) * batch_size]
            X_batch = X[batch_idx].to(device)
            y_pred.append(model(X_batch))

    y_pred = torch.cat(y_pred, 0)
    return y_pred.cpu().numpy()

In [49]:
model = torch.load("/content/drive/MyDrive/Colab Notebooks/AI_A2/model.pt")
val_preds = predict(X_val, model, batch_size=64)
val_preds_fc = predict(X_val, model_fc, batch_size=64)

HBox(children=(FloatProgress(value=0.0, max=2482.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2482.0), HTML(value='')))




In [50]:
absolute_error_fc = abs(y_val - val_preds_fc).flatten()
absolute_error = abs(y_val - val_preds).flatten()
print(np.mean(absolute_error_fc))
print(np.mean(absolute_error))

2.0527886672525684
1.9459202774184539


In [51]:
errors = pd.DataFrame(
    np.stack((absolute_error_fc, absolute_error), 1), 
    columns=["fully_connected", "new model"]
)

In [52]:
errors.describe()

Unnamed: 0,fully_connected,new model
count,158811.0,158811.0
mean,2.052789,1.94592
std,3.873537,3.718744
min,1.2e-05,5e-06
25%,0.30347,0.237078
50%,0.737794,0.689527
75%,1.901156,1.875194
max,36.495666,36.956431


Unnamed: 0,fully_connected,rnn,bidirectional,2-layers,conv1d,clipping,clipping2
count,158811.0,158811.0,158811.0,158811.0,158811.0,158811.0,158811.0
mean,2.05057,1.952269,1.960768,1.96295,1.941518,1.953154,1.963894
std,3.934394,3.751223,3.726526,3.656548,3.742184,3.770596,3.831893
min,5e-06,2.5e-05,1.4e-05,1.7e-05,9e-06,1.1e-05,5.960464e-08
25%,0.235434,0.255247,0.22965,0.254368,0.237938,0.220704,0.2085824
50%,0.683711,0.677375,0.669201,0.712593,0.643864,0.640949,0.6383122
75%,1.946392,1.821752,1.916545,1.959368,1.849658,1.859775,1.859
max,36.78876,36.787056,36.864229,36.801545,36.778338,36.715905,37.18522


More things to try:
* more layers (stacked LSTM)
* filtering the signal first via 1d convultion
* different optimizer