In [1]:
import pandas as pd
import numpy
from sklearn.model_selection import train_test_split
import os

import matplotlib.pyplot as plt 
import imageio
import PIL
from PIL import ImageFile
import cv2
import numpy as np
from IPython.display import display
from tqdm import tqdm
from imblearn.over_sampling import RandomOverSampler

#import torch
#import torch.nn as nn
#from torch.nn import functional as F
# import pretrainedmodels
#import torch.optim as optim
from sklearn.metrics import f1_score,accuracy_score,roc_auc_score
import math
import time
#import albumentations
import random
from tqdm import tqdm
import tensorflow as tf

ImageFile.LOAD_TRUNCATED_IMAGES = True
%matplotlib inline




In [2]:
def Scaler(array):
    return np.log(array+0.01)


def invScaler(array):
    return np.exp(array) - 0.01


def pad_to_shape(array, from_shape=160, to_shape=160, how="mirror"):
    padding = int( (to_shape - from_shape) / 2)
    if how == "zero":
        array_padded = np.pad(array, ((0,0),(padding,padding),(padding,padding),(0,0)), mode="constant", constant_values=0)
    elif how == "mirror":
        array_padded = np.pad(array, ((0,0),(padding,padding),(padding,padding),(0,0)), mode="reflect")
    return array_padded


def pred_to_rad(pred, from_shape=160, to_shape=160):
    padding = int( (from_shape - to_shape) / 2)
    return pred[::, padding:padding+to_shape, padding:padding+to_shape].copy()


def data_preprocessing(X):
    X = np.moveaxis(X, 0, -1)
    X = X[np.newaxis, ::, ::, ::]
    X = Scaler(X)
    X = pad_to_shape(X)
    
    return X


def data_postprocessing(nwcst):
    nwcst = np.squeeze(np.array(nwcst))
    nwcst = invScaler(nwcst)
    nwcst = pred_to_rad(nwcst)
    nwcst = np.where(nwcst>0, nwcst, 0)
    return nwcst

In [3]:
class Dataset(tf.keras.utils.Sequence):
    
    def __init__(
            self, 
            dataset_dict,
            image_names,
            batch_size
    ):
        self.keys = [name.strip() for name in image_names]
        self.dataset = dataset_dict
        self.bs = batch_size

    def get_index(self,i):
      x = []
      for j in range(20):
        try:
          arr = np.array(self.dataset.get(self.keys[i+j]))
        except:
          print(i,j)
        x.append(arr)
      
      x = data_preprocessing(np.stack(x,0))
      #x = np.transpose(np.squeeze(x),(2,1,0)) 
      x = np.squeeze(x)
      y = np.squeeze(data_preprocessing(np.array(self.dataset[self.keys[i+3]])[np.newaxis,:,:]))

      return x.astype('float32'),y.astype('float32')
    
    def __getitem__(self, index):
        
      X = []
      Y = []

      for i in range(index*self.bs,(index+1)*self.bs):
        x,y = self.get_index(i)
        X.append(x[np.newaxis,:])
        Y.append(y[np.newaxis,:])

      return X,Y
        
    def __len__(self):
      return (len(self.keys) - 20)//self.bs

In [4]:
import h5py
dataset_dict = h5py.File('temperature_data_part.h5', 'r')
print(dataset_dict)

<HDF5 file "temperature_data_part.h5" (mode r)>


In [5]:
import ast
with open('78888.txt','r') as f:
  image_names = f.readlines()
  # print(image_names)
# file = open("file_key.txt")
# print(file.read())  

image_names = [name for name in image_names if name[:4]>'2000']

train_images = [name.strip() for name in image_names if name[:4] > '2000']
val_images = [name.strip() for name in image_names if name[:4] > '2000'][:23000]
# train_images = [name for name in tqdm(image_names) if "2017" not in name]
# val_images = [name for name in tqdm(image_names) if name[0:4]=="2017"]

print(len(train_images))
print(len(val_images))
# print(train_images[10])

78888
23000


In [6]:
train_dataset = Dataset(
    dataset_dict=dataset_dict,
    image_names=train_images,
    batch_size=1
)

valid_dataset = Dataset(
    dataset_dict=dataset_dict,
    image_names=val_images,
    batch_size=1
)

In [7]:
from tensorflow.keras.models import *
from tensorflow.keras.layers import *

def rainnet(input_shape=(160, 160, 20), mode="regression"):

    inputs = Input(input_shape)

    conv1f = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(inputs)
    conv1f = Activation("relu")(conv1f)
    conv1s = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(conv1f)
    conv1s = Activation("relu")(conv1s)
    conv1t = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(conv1s)
    conv1t = Activation("relu")(conv1t)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1t)

    conv2f = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(pool1)
    conv2f = Activation("relu")(conv2f)
    conv2s = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(conv2f)
    conv2s = Activation("relu")(conv2s)
    conv2t = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(conv2s)
    conv2t = Activation("relu")(conv2t)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2t)

    conv3f = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(pool2)
    conv3f = Activation("relu")(conv3f)
    conv3s = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(conv3f)
    conv3s = Activation("relu")(conv3s)
    conv3t = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(conv3s)
    conv3t = Activation("relu")(conv3t)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3t)

    conv4f = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(pool3)
    conv4f = Activation("relu")(conv4f)
    conv4s = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(conv4f)
    conv4s = Activation("relu")(conv4s)
    conv4t = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(conv4s)
    conv4t = Activation("relu")(conv4t)
    drop4 = Dropout(0.5)(conv4t)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    conv5f = Conv2D(1024, 3, padding='same', kernel_initializer='he_normal')(pool4)
    conv5f = Activation("relu")(conv5f)
    conv5s = Conv2D(1024, 3, padding='same', kernel_initializer='he_normal')(conv5f)
    conv5s = Activation("relu")(conv5s)
    conv5t = Conv2D(1024, 3, padding='same', kernel_initializer='he_normal')(conv5s)
    conv5t = Activation("relu")(conv5t)
    drop5 = Dropout(0.5)(conv5t)

    up6 = concatenate([UpSampling2D(size=(2, 2))(drop5), conv4t], axis=3)
    conv6 = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(up6)
    conv6 = Activation("relu")(conv6)
    conv6 = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(conv6)
    conv6 = Activation("relu")(conv6)
    conv6 = Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(conv6)
    conv6 = Activation("relu")(conv6)

    up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3t], axis=3)
    conv7 = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(up7)
    conv7 = Activation("relu")(conv7)
    conv7 = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(conv7)
    conv7 = Activation("relu")(conv7)
    conv7 = Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(conv7)
    conv7 = Activation("relu")(conv7)

    up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2t], axis=3)
    conv8 = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(up8)
    conv8 = Activation("relu")(conv8)
    conv8 = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(conv8)
    conv8 = Activation("relu")(conv8)
    conv8 = Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(conv8)
    conv8 = Activation("relu")(conv8)

    up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1t], axis=3)
    conv9 = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(up9)
    conv9 = Activation("relu")(conv9)
    conv9 = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(conv9)
    conv9 = Activation("relu")(conv9)
    conv9 = Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(conv9)
    conv9 = Activation("relu")(conv9)
    conv9 = Conv2D(2, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
    
    if mode == "regression":
        outputs = Conv2D(1, 1, activation='relu')(conv9)
    elif mode == "segmentation":
        outputs = Conv2D(1, 1, activation='sigmoid')(conv9)

    model = Model(inputs=inputs, outputs=outputs)

    return model

In [8]:
temperature_model = rainnet()
temperature_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5),loss='log_cosh')





In [9]:
temperature_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 160, 160, 20)]       0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 160, 160, 64)         11584     ['input_1[0][0]']             
                                                                                                  
 activation (Activation)     (None, 160, 160, 64)         0         ['conv2d[0][0]']              
                                                                                                  
 conv2d_1 (Conv2D)           (None, 160, 160, 64)         36928     ['activation[0][0]']          
                                                                                              

In [10]:
for layer in temperature_model.layers:
    print(layer.name, layer.trainable, layer.output_shape)


input_1 True [(None, 160, 160, 20)]
conv2d True (None, 160, 160, 64)
activation True (None, 160, 160, 64)
conv2d_1 True (None, 160, 160, 64)
activation_1 True (None, 160, 160, 64)
conv2d_2 True (None, 160, 160, 64)
activation_2 True (None, 160, 160, 64)
max_pooling2d True (None, 80, 80, 64)
conv2d_3 True (None, 80, 80, 128)
activation_3 True (None, 80, 80, 128)
conv2d_4 True (None, 80, 80, 128)
activation_4 True (None, 80, 80, 128)
conv2d_5 True (None, 80, 80, 128)
activation_5 True (None, 80, 80, 128)
max_pooling2d_1 True (None, 40, 40, 128)
conv2d_6 True (None, 40, 40, 256)
activation_6 True (None, 40, 40, 256)
conv2d_7 True (None, 40, 40, 256)
activation_7 True (None, 40, 40, 256)
conv2d_8 True (None, 40, 40, 256)
activation_8 True (None, 40, 40, 256)
max_pooling2d_2 True (None, 20, 20, 256)
conv2d_9 True (None, 20, 20, 512)
activation_9 True (None, 20, 20, 512)
conv2d_10 True (None, 20, 20, 512)
activation_10 True (None, 20, 20, 512)
conv2d_11 True (None, 20, 20, 512)
activation_11

In [11]:
#model.fit(x=train_dataset, epochs=10)

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'model_weight.h5', save_best_only=True, monitor='val_loss', mode='min', verbose=1
)

early_stopping = tf.keras.callbacks.EarlyStopping(
    patience=10, monitor='val_loss', mode='min', restore_best_weights=True
)

temperature_model.fit(
    train_dataset,
    epochs=20,
    validation_data=valid_dataset,
    callbacks=[checkpoint, early_stopping]
)


Epoch 1/20

Epoch 1: val_loss improved from inf to 0.00012, saving model to model_weight.h5


  saving_api.save_model(


Epoch 2/20
Epoch 2: val_loss improved from 0.00012 to 0.00001, saving model to model_weight.h5
Epoch 3/20
Epoch 3: val_loss did not improve from 0.00001
Epoch 4/20
Epoch 4: val_loss did not improve from 0.00001
Epoch 5/20
Epoch 5: val_loss improved from 0.00001 to 0.00000, saving model to model_weight.h5
Epoch 6/20
Epoch 6: val_loss did not improve from 0.00000
Epoch 7/20
Epoch 7: val_loss did not improve from 0.00000
Epoch 8/20
Epoch 8: val_loss did not improve from 0.00000
Epoch 9/20
Epoch 9: val_loss improved from 0.00000 to 0.00000, saving model to model_weight.h5
Epoch 10/20
Epoch 10: val_loss improved from 0.00000 to 0.00000, saving model to model_weight.h5
Epoch 11/20
Epoch 11: val_loss did not improve from 0.00000
Epoch 12/20
Epoch 12: val_loss improved from 0.00000 to 0.00000, saving model to model_weight.h5
Epoch 13/20
Epoch 13: val_loss did not improve from 0.00000
Epoch 14/20
Epoch 14: val_loss did not improve from 0.00000
Epoch 15/20
Epoch 15: val_loss did not improve from

<keras.src.callbacks.History at 0x1c2c9b082e0>