In [1]:
import sys
sys.path.insert(0, "../src")

In [2]:
%reload_ext autoreload
%autoreload 2
%reload_ext nb_black

<IPython.core.display.Javascript object>

In [3]:
import gc
import functools
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from tqdm.notebook import tqdm

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
import pytorch_lightning as pl
from torch.utils.data import SequentialSampler, RandomSampler

import optim
from data import NowcastingDataset
from loss import LogCoshLoss
from utils import visualize, radar2precipitation

<IPython.core.display.Javascript object>

In [4]:
args = dict(
    dams=(6071, 6304, 7026, 7629, 7767, 8944, 11107),
    train_folds_csv=Path("../input/train_folds.csv"),
    train_data_path=Path("../input/train-128"),
    test_data_path=Path("../input/test-128"),
    num_workers=4,
    gpus=1,
    lr=1e-4,
    max_epochs=50,
    batch_size=64,
    precision=16,
    optimizer="adamw",
    scheduler="cosine",
    gradient_accumulation_steps=1,
)

<IPython.core.display.Javascript object>

# 🔥 RainNet ⚡️

## Resize data

In [7]:
def resize_data(path, folder="train-128"):
    data = np.load(path)
    img1 = data[:, :, :3]
    img2 = data[:, :, 2:]
    img1 = cv2.copyMakeBorder(img1, 4, 4, 4, 4, cv2.BORDER_REFLECT)
    img2 = cv2.copyMakeBorder(img2, 4, 4, 4, 4, cv2.BORDER_REFLECT)
    img2 = img2[:, :, 1:]
    data = np.concatenate([img1, img2], axis=-1)
    np.save(PATH / folder / path.name, data)

<IPython.core.display.Javascript object>

In [5]:
(PATH / "train-128").mkdir(exist_ok=True)

<IPython.core.display.Javascript object>

In [21]:
files = list((PATH / "train").glob("*.npy"))
with ThreadPoolExecutor(8) as e:
    e.map(resize_data, files)

<IPython.core.display.Javascript object>

In [8]:
(PATH / "test-128").mkdir(exist_ok=True)

<IPython.core.display.Javascript object>

In [9]:
test_files = list((PATH / "test").glob("*.npy"))
with ThreadPoolExecutor(8) as e:
    e.map(functools.partial(resize_data, folder="test-128"), test_files)

<IPython.core.display.Javascript object>

## Dataset

In [5]:
class NowcastingDataModule(pl.LightningDataModule):
    def __init__(
        self, train_df, val_df, batch_size=args["batch_size"], num_workers=args["num_workers"]
    ):
        super().__init__()
        self.train_df = train_df
        self.val_df = val_df
        self.batch_size = batch_size
        self.num_workers = num_workers

    def setup(self, stage="train"):
        if stage == "train":
            train_paths = [
                args["train_data_path"] / fn for fn in self.train_df.filename.values
            ]
            val_paths = [args["train_data_path"] / fn for fn in self.val_df.filename.values]
            self.train_dataset = NowcastingDataset(train_paths)
            self.val_dataset = NowcastingDataset(val_paths)
        else:
            test_paths = list(args["test_data_path"].glob("*.npy"))
            self.test_dataset = NowcastingDataset(test_paths, test=True)

    def train_dataloader(self):
        return torch.utils.data.DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            sampler=RandomSampler(self.train_dataset),
            pin_memory=True,
            num_workers=self.num_workers,
            drop_last=True,
        )

    def val_dataloader(self):
        return torch.utils.data.DataLoader(
            self.val_dataset,
            batch_size=2 * self.batch_size,
            sampler=SequentialSampler(self.val_dataset),
            pin_memory=True,
            num_workers=self.num_workers,
        )

    def test_dataloader(self):
        return torch.utils.data.DataLoader(
            self.test_dataset,
            batch_size=2 * self.batch_size,
            sampler=SequentialSampler(self.test_dataset),
            pin_memory=True,
            num_workers=self.num_workers,
        )

<IPython.core.display.Javascript object>

In [6]:
# df = pd.read_csv(args["train_folds_csv"])
# datamodule = NowcastingDataModule(df, fold=0, batch_size=2)
# datamodule.setup()
# for batch in datamodule.train_dataloader():
#     xs, ys = batch
#     idx = np.random.randint(len(xs))
#     x, y = xs[idx], ys[idx]
#     x = x.permute(1, 2, 0).numpy()
#     y = y.permute(1, 2, 0).numpy()
#     visualize(x, y)
#     break

<IPython.core.display.Javascript object>

## RainNet

### Layers

In [10]:
class Block(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=False),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(out_ch),
            nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1, bias=False),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(out_ch),
        )

    def forward(self, x):
        return self.net(x)


class Encoder(nn.Module):
    def __init__(self, chs=[4, 64, 128, 256, 512, 1024], drop_rate=0.5):
        super().__init__()
        self.blocks = nn.ModuleList(
            [Block(chs[i], chs[i + 1]) for i in range(len(chs) - 1)]
        )
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(p=drop_rate)

    def forward(self, x):
        ftrs = []
        for i, block in enumerate(self.blocks):
            x = block(x)
            ftrs.append(x)
            if i >= 3:
                x = self.dropout(x)
            if i < 4:
                x = self.pool(x)
        return ftrs


class Decoder(nn.Module):
    def __init__(self, chs=[1024, 512, 256, 128, 64]):
        super().__init__()
        self.chs = chs
        #         self.ups = nn.ModuleList(
        #             [nn.Upsample(scale_factor=2, mode="nearest") for i in range(len(chs) - 1)]
        #         )
        self.ups = nn.ModuleList(
            [
                nn.ConvTranspose2d(chs[i], chs[i + 1], kernel_size=2, stride=2)
                for i in range(len(chs) - 1)
            ]
        )
        self.convs = nn.ModuleList(
            [Block(chs[i] + chs[i + 1], chs[i + 1]) for i in range(len(chs) - 1)]
        )

    def forward(self, x, ftrs):
        for i in range(len(self.chs) - 1):
            x = self.ups[i](x)
            x = torch.cat([ftrs[i], x], dim=1)
            x = self.convs[i](x)
        return x

<IPython.core.display.Javascript object>

### RainNet

In [11]:
class RainNet(pl.LightningModule):
    def __init__(
        self,
        lr=1e-4,
        enc_chs=[4, 64, 128, 256, 512, 1024],
        dec_chs=[1024, 512, 256, 128, 64],
        num_train_steps=None,
    ):
        super().__init__()

        # Parameters
        self.lr = lr
        self.num_train_steps = num_train_steps

        #         self.criterion = LogCoshLoss()
        self.criterion = nn.L1Loss()

        # Layers
        self.encoder = Encoder(enc_chs)
        self.decoder = Decoder(dec_chs)
        self.out = nn.Sequential(
            nn.Conv2d(64, 2, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(2),
            nn.Conv2d(2, 1, kernel_size=1),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        ftrs = self.encoder(x)
        ftrs = ftrs[::-1]
        x = self.decoder(ftrs[0], ftrs[1:])
        out = self.out(x)
        return out

    def shared_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        return loss, y, y_hat

    def training_step(self, batch, batch_idx):
        loss, y, y_hat = self.shared_step(batch, batch_idx)
        self.log("train_loss", loss)
        return {"loss": loss}

    def validation_step(self, batch, batch_idx):
        loss, y, y_hat = self.shared_step(batch, batch_idx)
        return {"loss": loss, "y": y.detach(), "y_hat": y_hat.detach()}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["loss"] for x in outputs]).mean()
        self.log("val_loss", avg_loss)

        tfms = nn.Sequential(
            T.CenterCrop(120),
        )

        y = torch.cat([x["y"] for x in outputs])
        y = tfms(y)
        y = y.detach().cpu().numpy()
        y = y.reshape(-1, 120 * 120)

        y_hat = torch.cat([x["y_hat"] for x in outputs])
        y_hat = tfms(y_hat)
        y_hat = y_hat.detach().cpu().numpy()
        y_hat = y_hat.reshape(-1, 120 * 120)

        y = 255.0 * y[:, args["dams"]]
        y = np.round(y).clip(0, 255)
        y_hat = 255.0 * y_hat[:, args["dams"]]
        y_hat = np.round(y_hat).clip(0, 255)
        #         mae = metrics.mean_absolute_error(y, y_hat)

        y_true = radar2precipitation(y)
        y_true = np.where(y_true >= 0.1, 1, 0)
        y_pred = radar2precipitation(y_hat)
        y_pred = np.where(y_pred >= 0.1, 1, 0)

        y = y * y_true
        y_hat = y_hat * y_true
        #         mae = np.abs(y - y_hat).sum() / y_true.sum()
        mae = np.abs(y - y_hat).mean()

        tn, fp, fn, tp = metrics.confusion_matrix(
            y_true.reshape(-1), y_pred.reshape(-1)
        ).ravel()
        csi = tp / (tp + fn + fp)

        comp_metric = mae / (csi + 1e-12)

        print(
            f"Epoch {self.current_epoch} | MAE/CSI: {comp_metric} | MAE: {mae} | CSI: {csi} | Loss: {avg_loss}"
        )

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=self.num_train_steps
        )
        return [optimizer], [{"scheduler": scheduler, "interval": "step"}]

<IPython.core.display.Javascript object>

## Train

In [12]:
df = pd.read_csv(args["train_folds_csv"])

for fold in range(5):
    train_df = df[df.fold != fold]
    val_df = df[df.fold == fold]

    datamodule = NowcastingDataModule(
        train_df, val_df, batch_size=args["batch_size"], num_workers=args["num_workers"]
    )
    datamodule.setup()

    num_train_steps = (
        int(
            np.ceil(
                len(train_df)
                // args["batch_size"]
                / args["gradient_accumulation_steps"]
            )
        )
        * args["max_epochs"]
    )

    model = RainNet(num_train_steps=num_train_steps)

    trainer = pl.Trainer(
        gpus=args["gpus"],
        max_epochs=args["max_epochs"],
        precision=args["precision"],
        progress_bar_refresh_rate=50,
        benchmark=True,
        auto_lr_find=True,
    )

    trainer.fit(model, datamodule)
    trainer.save_checkpoint(f"rainnet_fold{fold}_bs64_epoch50.ckpt")

    del datamodule, model, trainer
    gc.collect()
    torch.cuda.empty_cache()
    break

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name      | Type       | Params
-----------------------------------------
0 | criterion | L1Loss     | 0     
1 | encoder   | Encoder    | 18 M  
2 | decoder   | Decoder    | 15 M  
3 | out       | Sequential | 1 K   


HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

RuntimeError: Given groups=1, weight of size [512, 1536, 3, 3], expected input[128, 1024, 16, 16] to have 1536 channels, but got 1024 channels instead

<IPython.core.display.Javascript object>

## Inference

In [11]:
model = RainNet.load_from_checkpoint("rainnet_fold0_bs64_epoch50.ckpt")
model.to("cuda")

RainNet(
  (criterion): L1Loss()
  (encoder): Encoder(
    (blocks): ModuleList(
      (0): Block(
        (net): Sequential(
          (0): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): ReLU(inplace=True)
          (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (4): ReLU(inplace=True)
          (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (1): Block(
        (net): Sequential(
          (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): ReLU(inplace=True)
          (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (4): ReLU(inplace=True)
          (5): 

<IPython.core.display.Javascript object>

In [13]:
datamodule = NowcastingDataModule(train_df, val_df, batch_size=2 * args["batch_size"])
datamodule.setup("test")

<IPython.core.display.Javascript object>

In [14]:
preds = []
model.eval()
with torch.no_grad():
    for batch in tqdm(datamodule.test_dataloader()):
        batch = batch.to("cuda")
        imgs = model(batch)
        imgs = imgs.detach().cpu().numpy()
        imgs = imgs[:, 0, 4:124, 4:124]
        imgs = 255.0 * imgs
        imgs = np.round(imgs)
        imgs = np.clip(imgs, 0, 255)
        preds.append(imgs)

preds = np.concatenate(preds)
preds = preds.astype(np.uint8)
preds = preds.reshape(len(preds), -1)

<IPython.core.display.Javascript object>

In [15]:
test_paths = datamodule.test_dataset.paths
test_filenames = [path.name for path in test_paths]

<IPython.core.display.Javascript object>

In [16]:
subm = pd.DataFrame()
subm["file_name"] = test_filenames
for i in tqdm(range(14400)):
    subm[str(i)] = preds[:, i]

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=14400.0), HTML(value='')))




<IPython.core.display.Javascript object>

In [17]:
subm.to_csv("rainnet_fold0_epoch50.csv", index=False)
subm.head()

Unnamed: 0,file_name,0,1,2,3,4,5,6,7,8,...,14390,14391,14392,14393,14394,14395,14396,14397,14398,14399
0,test_00402.npy,0,0,0,0,0,0,0,0,8,...,0,0,0,0,0,0,0,0,0,0
1,test_00365.npy,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,test_00122.npy,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,test_01822.npy,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,test_01769.npy,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


<IPython.core.display.Javascript object>

In [None]:
del model