## predicting drought with deep learning

In [1]:
import numpy as np
from viz_report import *

## load the data

In [2]:
def np_load(filename):
    path = ''
    return np.load(path + filename + '.npy', allow_pickle=True)

In [3]:
X_train = np_load('X_train')
y_train = np_load('y_train')
X_valid = np_load('X_valid')
y_valid = np_load('y_valid')

In [4]:
input_size = X_train.shape[1]
output_size = 6

In [14]:
import torch
from torch.utils.data import TensorDataset, DataLoader

FORMAT = torch.float32

train_data = TensorDataset(
    torch.tensor(X_train, dtype=FORMAT),
    torch.tensor(y_train, dtype=FORMAT)
)
train_loader = DataLoader(
    train_data, shuffle=True, batch_size=1500, drop_last=False
)
valid_data = TensorDataset(
    torch.tensor(X_valid, dtype=FORMAT),
    torch.tensor(y_valid, dtype=FORMAT)
)
valid_loader = DataLoader(
    valid_data, shuffle=False, batch_size=1500, drop_last=False
)

In [6]:
torch.from_numpy(X_train).size()

torch.Size([103390, 3810])

In [20]:
from torch import nn
from torchvision import transforms
import pytorch_lightning as pl

from sklearn.metrics import f1_score, mean_absolute_error

def mae_multi(output, target):
    y_hat = torch.clamp(torch.round(output), 0, 5.0)
    y = torch.clamp(torch.round(target), 0, 5.0)
    
    return torch.mean(torch.abs(torch.sub(y, y_hat)))
        

class MLP(pl.LightningModule):
  
    def __init__(self, input_size, output_size):
        
        super().__init__()
        self.layers = nn.Sequential(
          nn.Linear(input_size, 1000),
          nn.ReLU(),
          nn.Linear(1000, 500),
          nn.ReLU(),
          nn.Linear(500, 1000),
          nn.ReLU(),
          nn.Linear(1000, output_size) #no drought, D0 - D5
        )
        self.loss = mae_multi #or F1 macro loss
    
    def forward(self, x):
        return self.layers(x)
  
    def training_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y_hat = self.layers(x)
        loss = self.loss(y_hat, y)
        self.log('train_loss', loss)
        return loss
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y_hat = self.layers(x)
        test_loss = self.loss(y_hat, y)
        self.log("test_loss", test_loss)
  
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
        return optimizer


In [21]:
pl.seed_everything(42)
mlp = MLP(input_size,output_size)
trainer = pl.Trainer(auto_scale_batch_size='power', gpus=0, deterministic=True, max_epochs=10)
trainer.fit(mlp, train_loader)

Global seed set to 42
  rank_zero_deprecation(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name   | Type       | Params
--------------------------------------
0 | layers | Sequential | 4.8 M 
--------------------------------------
4.8 M     Trainable params
0         Non-trainable params
4.8 M     Total params
19.274    Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [19]:
trainer.test(model=mlp, dataloaders=valid_loader)

  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss           0.5822092890739441
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.5822092890739441}]