<a href="https://colab.research.google.com/github/PradyumnaGupta/rainnet/blob/master/RainNet_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy
from sklearn.model_selection import train_test_split
import os

import matplotlib.pyplot as plt
import imageio
import PIL
from PIL import ImageFile
import cv2
import numpy as np
from IPython.display import display
from tqdm import tqdm
from imblearn.over_sampling import RandomOverSampler

import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
from sklearn.metrics import f1_score,accuracy_score,roc_auc_score
import math
import time
import albumentations
import random
from tqdm import tqdm
import tensorflow as tf

ImageFile.LOAD_TRUNCATED_IMAGES = True
%matplotlib inline



In [None]:
def Scaler(array):
    """
    Функция для логарифмического масштабирования массива.
    """
    return np.log(array + 0.01)


def invScaler(array):
    """
    Функция для обратного преобразования массива после логарифмического масштабирования.
    """
    return np.exp(array) - 0.01


def pad_to_shape(array, from_shape=900, to_shape=928, how="mirror"):
    """
    Функция для дополнения массива до указанной формы с использованием различных методов заполнения (например, "mirror" или "zero").
    """
    # Рассчитываем, сколько нужно добавить паддинга относительно исходного разрешения
    padding = int((to_shape - from_shape) / 2)
    # Для формы входных данных (batch, W, H, channels)
    if how == "zero":
        array_padded = np.pad(array, ((0, 0), (padding, padding), (padding, padding), (0, 0)), mode="constant", constant_values=0)
    elif how == "mirror":
        array_padded = np.pad(array, ((0, 0), (padding, padding), (padding, padding), (0, 0)), mode="reflect")
    return array_padded


def pred_to_rad(pred, from_shape=928, to_shape=900):
    """
    Функция для обрезки предсказаний модели до указанной формы.
    """
    # Форма предсказаний 12, 928, 928
    padding = int((from_shape - to_shape) / 2)
    return pred[::, padding:padding+to_shape, padding:padding+to_shape].copy()


def data_preprocessing(X):
    """
    Предобработка данных:
    0. Подгонка формы для пакета данных.
    1. Логарифмическое масштабирование.
    2. Дополнение до формы 928x928.
    """
    X = np.moveaxis(X, 0, -1)
    X = X[np.newaxis, ::, ::, ::]
    X = Scaler(X)
    X = pad_to_shape(X)
    return X


def data_postprocessing(nwcst):
    """
    Постобработка данных:
    0. Удаление пустых размерностей.
    1. Обратное преобразование от логарифмического масштабирования.
    2. Преобразование обратно от 928x928 к 900x900.
    3. Возвращение только положительных значений.
    """
    nwcst = np.squeeze(np.array(nwcst))
    nwcst = invScaler(nwcst)
    nwcst = pred_to_rad(nwcst)
    nwcst = np.where(nwcst > 0, nwcst, 0)
    return nwcst

In [None]:
class Dataset(tf.keras.utils.Sequence):

    def __init__(
            self,
            dataset_dict,
            image_names,
            batch_size
    ):
        self.keys = image_names
        self.dataset = dataset_dict
        self.bs = batch_size

    def get_index(self,i):
      x = []
      for j in range(4):
        try:
          arr = np.array(self.dataset.get(self.keys[i+j]))
        except:
          print(i,j)
        x.append(arr)

      x = data_preprocessing(np.stack(x,0))
      x = np.squeeze(x)
      y = np.squeeze(data_preprocessing(np.array(self.dataset[self.keys[i+3]])[np.newaxis,:,:]))

      return x.astype('float32'),y.astype('float32')

    def __getitem__(self, index):

      X = []
      Y = []

      for i in range(index*self.bs,(index+1)*self.bs):
        x,y = self.get_index(i)
        X.append(x[np.newaxis,:])
        Y.append(y[np.newaxis,:])

      return X,Y

    def __len__(self):
      return (len(self.keys) - 4)//self.bs

In [None]:
import h5py
dataset_dict = h5py.File('drive/MyDrive/RYDL.hdf5', 'r')



Вытаскиваем ключи и разбиваем данные на train и validation

In [None]:
import ast
with open('drive/MyDrive/RYDL_keys.txt','r') as f:
  image_names = ast.literal_eval(f.read())
image_names = [name for name in image_names if name[:4]>'2012']

train_images = [name for name in tqdm(image_names) if "2017" not in name]
val_images = [name for name in tqdm(image_names) if name[0:4]=="2017"]

100%|██████████| 221211/221211 [00:00<00:00, 2273532.11it/s]
100%|██████████| 221211/221211 [00:00<00:00, 1789822.69it/s]


In [None]:
train_dataset = Dataset(
    dataset_dict=dataset_dict,
    image_names=train_images,
    batch_size=1
)

valid_dataset = Dataset(
    dataset_dict=dataset_dict,
    image_names=val_images,
    batch_size=1
)

In [None]:
from tensorflow.keras.models import *
from tensorflow.keras.layers import *

def rainnet(input_shape=(928, 928, 4), mode="regression"):

    inputs = Input(input_shape)

    conv1f = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(inputs)
    conv1f = Activation("relu")(conv1f)
    conv1s = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(conv1f)
    conv1s = Activation("relu")(conv1s)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1s)

    conv2f = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(pool1)
    conv2f = Activation("relu")(conv2f)
    conv2s = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(conv2f)
    conv2s = Activation("relu")(conv2s)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2s)

    conv3f = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(pool2)
    conv3f = Activation("relu")(conv3f)
    conv3s = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(conv3f)
    conv3s = Activation("relu")(conv3s)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3s)

    conv4f = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(pool3)
    conv4f = Activation("relu")(conv4f)
    conv4s = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(conv4f)
    conv4s = Activation("relu")(conv4s)
    drop4 = Dropout(0.5)(conv4s)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    conv5f = Conv2D(1024, 3, padding='same', kernel_initializer='he_normal')(pool4)
    conv5f = Activation("relu")(conv5f)
    conv5s = Conv2D(1024, 3, padding='same', kernel_initializer='he_normal')(conv5f)
    conv5s = Activation("relu")(conv5s)
    drop5 = Dropout(0.5)(conv5s)

    up6 = concatenate([UpSampling2D(size=(2, 2))(drop5), conv4s], axis=3)
    conv6 = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(up6)
    conv6 = Activation("relu")(conv6)
    conv6 = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(conv6)
    conv6 = Activation("relu")(conv6)

    up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3s], axis=3)
    conv7 = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(up7)
    conv7 = Activation("relu")(conv7)
    conv7 = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(conv7)
    conv7 = Activation("relu")(conv7)

    up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2s], axis=3)
    conv8 = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(up8)
    conv8 = Activation("relu")(conv8)
    conv8 = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(conv8)
    conv8 = Activation("relu")(conv8)

    up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1s], axis=3)
    conv9 = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(up9)
    conv9 = Activation("relu")(conv9)
    conv9 = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(conv9)
    conv9 = Activation("relu")(conv9)
    conv9 = Conv2D(2, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)

    # Выходной слои

    if mode == "regression":
        outputs = Conv2D(1, 1, activation='linear')(conv9)
    elif mode == "segmentation":
        outputs = Conv2D(1, 1, activation='sigmoid')(conv9)

    model = Model(inputs=inputs, outputs=outputs)

    return model

In [None]:
model = rainnet()
model.compile(optimizer=tf.keras.optimizers.Adam(lr=3e-4),loss='log_cosh')

In [None]:
model.fit(x=train_dataset,validation_data=valid_dataset,epochs=10)