<a href="https://colab.research.google.com/github/PradyumnaGupta/rainnet/blob/master/RainNet_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RainNet Training Notebook

Author:[Pradyumna Gupta](https://github.com/PradyumnaGupta)

This notebook is created and executed on Google Colaboratory with Google Drive Mounted.

* Mounting Google Drive



* Importing libraries

In [1]:
import pandas as pd
import numpy
from sklearn.model_selection import train_test_split
import os

import matplotlib.pyplot as plt 
import imageio
import PIL
from PIL import ImageFile
import cv2
import numpy as np
from IPython.display import display
from tqdm import tqdm
from imblearn.over_sampling import RandomOverSampler

import torch
import torch.nn as nn
from torch.nn import functional as F
# import pretrainedmodels
import torch.optim as optim
from sklearn.metrics import f1_score,accuracy_score,roc_auc_score
import math
import time
import albumentations
import random
from tqdm import tqdm
import tensorflow as tf

ImageFile.LOAD_TRUNCATED_IMAGES = True
%matplotlib inline

* Defining some data preparing and preprocessing functions

In [2]:
def Scaler(array):
    return np.log(array+0.01)


def invScaler(array):
    return np.exp(array) - 0.01


def pad_to_shape(array, from_shape=900, to_shape=928, how="mirror"):
    # calculate how much to pad in respect with native resolution
    padding = int( (to_shape - from_shape) / 2)
    # for input shape as (batch, W, H, channels)
    if how == "zero":
        array_padded = np.pad(array, ((0,0),(padding,padding),(padding,padding),(0,0)), mode="constant", constant_values=0)
    elif how == "mirror":
        array_padded = np.pad(array, ((0,0),(padding,padding),(padding,padding),(0,0)), mode="reflect")
    return array_padded


def pred_to_rad(pred, from_shape=928, to_shape=900):
    # pred shape 12,928,928
    padding = int( (from_shape - to_shape) / 2)
    return pred[::, padding:padding+to_shape, padding:padding+to_shape].copy()


def data_preprocessing(X):
    
    # 0. Right shape for batch
    X = np.moveaxis(X, 0, -1)
    X = X[np.newaxis, ::, ::, ::]
    # 1. To log scale
    X = Scaler(X)
    # 2. from 900x900 to 928x928
    X = pad_to_shape(X)
    
    return X


def data_postprocessing(nwcst):

    # 0. Squeeze empty dimensions
    nwcst = np.squeeze(np.array(nwcst))
    # 1. Convert back to rainfall depth
    nwcst = invScaler(nwcst)
    # 2. Convert from 928x928 back to 900x900
    nwcst = pred_to_rad(nwcst)
    # 3. Return only positive values
    nwcst = np.where(nwcst>0, nwcst, 0)
    return nwcst

* Constructing Dataloaders

In [3]:
class Dataset(tf.keras.utils.Sequence):
    
    def __init__(
            self, 
            dataset_dict,
            image_names,
            batch_size
    ):
        self.keys = image_names
        self.dataset = dataset_dict
        self.bs = batch_size

    def get_index(self,i):
      x = []
      for j in range(4):
        try:
          arr = np.array(self.dataset.get(self.keys[i+j]))
        except:
          print(i,j)
        x.append(arr)
      
      x = data_preprocessing(np.stack(x,0))
      # x = np.transpose(np.squeeze(x),(2,0,1)) 
      x = np.squeeze(x)
      y = np.squeeze(data_preprocessing(np.array(self.dataset[self.keys[i+3]])[np.newaxis,:,:]))

      return x.astype('float32'),y.astype('float32')
    
    def __getitem__(self, index):
        
      X = []
      Y = []

      for i in range(index*self.bs,(index+1)*self.bs):
        x,y = self.get_index(i)
        X.append(x[np.newaxis,:])
        Y.append(y[np.newaxis,:])

      return X,Y
        
    def __len__(self):
      return (len(self.keys) - 4)//self.bs

* Loading the dataset dictionary.

The dataset consists of a large dictionary where the keys are the image names and the value is the actual image in the form of a numpy array.The dataset can be downloaded from [here](https://drive.google.com/file/d/1sZI4TbFkgJcpkZDBfbuWR_JBZSNhKow4/view?usp=sharing).

In [4]:
import h5py
local_file_path = r'D:\rain_net\RYDL.hdf5'
with h5py.File(local_file_path, 'r') as dataset_dict:
    pass

* Loading the image names.

The image names are present in the keys txt file which can be downloaded from [here](https://drive.google.com/file/d/1DvVUyrUvL4P8TRr_y_r5NrOxlRjwgWQr/view?usp=sharing). 

The data from the year 2012-2016 will be used for training purposes and the data from the year 2017 will be used for validation purposes.

In [5]:
import ast
from tqdm import tqdm

file_path = r'D:\rain_net\key_file.txt'

try:
    with open(file_path, 'r') as f:
        image_names = f.readlines()
        #print(image_names[1])
except SyntaxError as e:
    print(f"Error in file content: {e}")
    image_names = []

image_names = [name for name in image_names if name[:4] > '2012']

train_images = [name for name in tqdm(image_names) if "2017" not in name]
val_images = [name for name in tqdm(image_names) if name[0:4] == "2017"]

#print(len(image_names))

100%|██████████| 221211/221211 [00:00<00:00, 5253531.41it/s]
100%|██████████| 221211/221211 [00:00<00:00, 4062855.49it/s]


* Instantiating the dataloaders.

In [6]:
train_dataset = Dataset(
    dataset_dict=dataset_dict,
    image_names=train_images,
    batch_size=1
)

valid_dataset = Dataset(
    dataset_dict=dataset_dict,
    image_names=val_images,
    batch_size=1
)

* Constructing the RainNet model.

In [7]:
from tensorflow.keras.models import *
from tensorflow.keras.layers import *

def rainnet(input_shape=(928, 928, 4), mode="regression"):

    """
    The function for building the RainNet (v1.0) model from scratch
    using Keras functional API.
    Parameters:
    input size: tuple(W x H x C), where W (width) and H (height)
    describe spatial dimensions of input data (e.g., 928x928 for RY data);
    and C (channels) describes temporal (depth) dimension of 
    input data (e.g., 4 means accounting four latest radar scans at time
    t-15, t-10, t-5 minutes, and t)
    
    mode: "regression" (default) or "segmentation". 
    For "regression" mode the last activation function is linear, 
    while for "segmentation" it is sigmoid.
    To train RainNet to predict continuous precipitation intensities use 
    "regression" mode. 
    RainNet could be trained to predict the exceedance of specific intensity 
    thresholds. For that purpose, use "segmentation" mode.
    """

    inputs = Input(input_shape)

    conv1f = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(inputs)
    conv1f = Activation("relu")(conv1f)
    conv1s = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(conv1f)
    conv1s = Activation("relu")(conv1s)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1s)

    conv2f = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(pool1)
    conv2f = Activation("relu")(conv2f)
    conv2s = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(conv2f)
    conv2s = Activation("relu")(conv2s)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2s)

    conv3f = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(pool2)
    conv3f = Activation("relu")(conv3f)
    conv3s = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(conv3f)
    conv3s = Activation("relu")(conv3s)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3s)

    conv4f = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(pool3)
    conv4f = Activation("relu")(conv4f)
    conv4s = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(conv4f)
    conv4s = Activation("relu")(conv4s)
    drop4 = Dropout(0.5)(conv4s)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    conv5f = Conv2D(1024, 3, padding='same', kernel_initializer='he_normal')(pool4)
    conv5f = Activation("relu")(conv5f)
    conv5s = Conv2D(1024, 3, padding='same', kernel_initializer='he_normal')(conv5f)
    conv5s = Activation("relu")(conv5s)
    drop5 = Dropout(0.5)(conv5s)

    up6 = concatenate([UpSampling2D(size=(2, 2))(drop5), conv4s], axis=3)
    conv6 = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(up6)
    conv6 = Activation("relu")(conv6)
    conv6 = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(conv6)
    conv6 = Activation("relu")(conv6)

    up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3s], axis=3)
    conv7 = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(up7)
    conv7 = Activation("relu")(conv7)
    conv7 = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(conv7)
    conv7 = Activation("relu")(conv7)

    up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2s], axis=3)
    conv8 = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(up8)
    conv8 = Activation("relu")(conv8)
    conv8 = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(conv8)
    conv8 = Activation("relu")(conv8)

    up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1s], axis=3)
    conv9 = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(up9)
    conv9 = Activation("relu")(conv9)
    conv9 = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(conv9)
    conv9 = Activation("relu")(conv9)
    conv9 = Conv2D(2, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
    
    if mode == "regression":
        outputs = Conv2D(1, 1, activation='linear')(conv9)
    elif mode == "segmentation":
        outputs = Conv2D(1, 1, activation='sigmoid')(conv9)

    model = Model(inputs=inputs, outputs=outputs)

    return model

* Instantiating and compiling the model with Adam optimizer and Log_Cosh loss function.


In [8]:
model = rainnet()

In [9]:
model.compile(optimizer=tf.keras.optimizers.Adam(lr=3e-4),loss='log_cosh')

  super().__init__(name, **kwargs)


In [10]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 928, 928, 4  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 928, 928, 64  2368        ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 activation (Activation)        (None, 928, 928, 64  0           ['conv2d[0][0]']                 
                                )                                                             

* Start Training

In [11]:
model.fit(x=train_dataset,validation_data=valid_dataset,epochs=10)

0 0


UnboundLocalError: local variable 'arr' referenced before assignment