V1: Keep R/C, improve

V2: Add R**2, no improve

V3: Update model from https://www.kaggle.com/dlaststark/gb-vpp-pulp-fiction. Not much improve compare V1

V4: Use whole db for test Fold-1
Fold-1 | OOF Score: 0.15912771348961205

V5: Add loading ScaleLayer from saved model

V6: Add TransformerEncoder. TransformerEncoder(128, 512, 8, name="transformer_layer")(x). Testing in GPU.

V7: Try to scale targe to 0-1, but it slow the train as MASE cannot be scaled.

V8: Removed target scale.


In [1]:
# Update pandas version for Colab TPU
#!pip install pandas==1.3.2


## Import libraries

In [2]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False
DEBUG = False

In [3]:
if IN_COLAB:
  from google.colab import drive
  drive.mount("/gdrive", force_remount=True)

In [4]:
if IN_COLAB:
  !pip install kaggle
  !mkdir /root/.kaggle
  !cp /gdrive/MyDrive/ventilator-pressure-prediction/kaggle.json /root/.kaggle
  !kaggle competitions download -c ventilator-pressure-prediction
  !mkdir -p ../input/ventilator-pressure-prediction
  !unzip '*.zip' -d ../input/ventilator-pressure-prediction
  !ls ../input/ventilator-pressure-prediction
  !mkdir /gdrive/MyDrive/ventilator-pressure-prediction/gb-rescaling-eda-v3-gpu
  #Map Google driver folder to local folder model to save model
  !ln -s /gdrive/MyDrive/ventilator-pressure-prediction/gb-rescaling-eda-v3-gpu model-gpu
else:
  #Download feature files
  !pip install gdown
  !gdown https://drive.google.com/uc?id=1-Ds4O7pYglC4rFNc11jbOAw-mQInqSGc
  !unzip features.zip
  !mkdir ./model-gpu

In [5]:
import gc
import numpy as np
import pandas as pd
import random
import os
from sklearn.model_selection import KFold

from sklearn.metrics import mean_absolute_error

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
  tf.config.experimental.set_memory_growth(gpu, True)

from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.layers import Concatenate, LSTM, GRU
from tensorflow.keras.layers import Bidirectional, Multiply


# Random Seed Initialize
RANDOM_SEED = 42

def seed_everything(seed=RANDOM_SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything()

## Load datasets

In [6]:
train_df = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')

all_pressure = np.sort(train_df['pressure'].unique())
pressure_min =  all_pressure[0].item()
pressure_max = all_pressure[-1].item()
pressure_step = (all_pressure[1] - all_pressure[0]).item()

pressure_min,pressure_max,pressure_step

In [7]:
train = np.load('train.npy')
test = np.load('test.npy')
targets = train_df[['pressure']].to_numpy().reshape(-1, 80)
del train_df
gc.collect()

print(f"train: {train.shape} \ntest: {test.shape} \ntargets: {targets.shape}")

## Hardware config

In [8]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
    BATCH_SIZE = tpu_strategy.num_replicas_in_sync * 64
    print("Running on TPU:", tpu.master())
    print(f"Batch Size: {BATCH_SIZE}")
    
except ValueError:
    tpu_strategy = tf.distribute.get_strategy()
    BATCH_SIZE = 512
    print(f"Running on {tpu_strategy.num_replicas_in_sync} replicas")
    print(f"Batch Size: {BATCH_SIZE}")

## Rescaling layer

In [9]:
@tf.custom_gradient
def round_with_gradients(x):
    def grad(dy):
        return dy
    return tf.round(x), grad

class ScaleLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(ScaleLayer, self).__init__(**kwargs)
        self.min = tf.constant(pressure_min, dtype=np.float32)
        self.max = tf.constant(pressure_max, dtype=np.float32)
        self.step = tf.constant(pressure_step, dtype=np.float32)

    def call(self, inputs):
        steps = tf.math.divide(tf.math.add(inputs, -self.min), self.step)
        int_steps = round_with_gradients(steps)
        rescaled_steps = tf.math.add(tf.math.multiply(int_steps, self.step), self.min)
        clipped = tf.clip_by_value(rescaled_steps, self.min, self.max)
        return clipped
    # def get_config(self):
    #     return super(ScaleLayer, self).get_config()


In [10]:
class TransformerEncoder(tf.keras.layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention = tf.keras.layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim, dropout=0.3
        )
        self.dense_proj = tf.keras.Sequential(
            [tf.keras.layers.Dense(dense_dim, activation=tf.nn.gelu), tf.keras.layers.Dense(embed_dim),]
        )
        self.layernorm_1 = tf.keras.layers.LayerNormalization()
        self.layernorm_2 = tf.keras.layers.LayerNormalization()

    def call(self, inputs, mask=None):
        if mask is not None:
            mask = mask[:, tf.newaxis, :]

        attention_output = self.attention(inputs, inputs, attention_mask=mask)
        proj_input = self.layernorm_1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)
    def get_config(self):
        config = super(TransformerEncoder, self).get_config()
        config.update({"embed_dim": self.embed_dim,
            "dense_dim": self.dense_dim,
            "num_heads": self.num_heads})
        return config

## Keras DNN Model

In [11]:
def dnn_model():
    
    x_input = Input(shape=(train.shape[-2:]))
    
    x1 = Bidirectional(LSTM(units=768, return_sequences=True))(x_input)
    x2 = Bidirectional(LSTM(units=512, return_sequences=True))(x1)
    x3 = Bidirectional(LSTM(units=384, return_sequences=True))(x2)
    x4 = Bidirectional(LSTM(units=256, return_sequences=True))(x3)
    x5 = Bidirectional(LSTM(units=128, return_sequences=True))(x4)
    
    z2 = Bidirectional(GRU(units=384, return_sequences=True))(x2)
    
    z31 = Multiply()([x3, z2])
    z31 = BatchNormalization()(z31)
    z3 = Bidirectional(GRU(units=256, return_sequences=True))(z31)
    
    z41 = Multiply()([x4, z3])
    z41 = BatchNormalization()(z41)
    z4 = Bidirectional(GRU(units=128, return_sequences=True))(z41)
    
    z51 = Multiply()([x5, z4])
    z51 = BatchNormalization()(z51)
    z5 = Bidirectional(GRU(units=64, return_sequences=True))(z51)
    
    x = Concatenate(axis=2)([x5, z2, z3, z4, z5])

    x = Dense(units=128)(x)

    x_transformer = BatchNormalization()(x)
    x_transformer = TransformerEncoder(128, 128, 4, name="transformer_layer")(x_transformer)
    x = Multiply()([x, x_transformer])
    
    x = Dense(units=128, activation='selu')(x)
    
    x_output = Dense(units=1)(x)
    
    x_output = ScaleLayer()(x_output)

    model = Model(inputs=x_input, outputs=x_output, 
                  name='DNN_Model')
    return model

In [12]:
model = dnn_model()
model.summary()

In [13]:
plot_model(
    model, 
    to_file='Google_Brain_Keras_Model.png', 
    show_shapes=True,
    show_layer_names=True
)

In [14]:
del model
gc.collect()

In [15]:
#reset Keras Session
def reset_keras():
    sess = tf.compat.v1.keras.backend.get_session()
    tf.compat.v1.keras.backend.clear_session()
    sess.close()
    sess = tf.compat.v1.keras.backend.get_session()

    # use the same config as you used to create the session
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1
    config.gpu_options.visible_device_list = "0"
    tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))
    gc.collect()

reset_keras()

In [16]:
train_pred = np.zeros(targets.shape)

In [17]:
#skip_folds = np.arange(1)
skip_folds = []

In [None]:
with tpu_strategy.scope():
    
    VERBOSE = 0
    EPOCHS = 200
    FOLDS = 7
    if DEBUG:
      EPOCHS = 100
    test_preds = []
    kf = KFold(n_splits=FOLDS, shuffle=True, random_state=2021)
    
    for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
        if len(test_preds) > fold:
          print(f'Ignore corrent fold {fold+1} as we already predicted test data')

        if DEBUG and fold > 0:
          continue
        X_train, X_valid = train[train_idx], train[test_idx]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        
        model = dnn_model()
        model.compile(optimizer="adam", loss="mae")

        lr = ReduceLROnPlateau(monitor="val_loss", factor=0.75, 
                               patience=10, verbose=VERBOSE)
        checkpoint_name = f'./model-gpu/Bidirect_LSTM_model_{fold+1}C.h5'
        chk_point = ModelCheckpoint(checkpoint_name,
                                    monitor='val_loss', verbose=VERBOSE, 
                                    save_best_only=True, mode='min')

        es = EarlyStopping(monitor="val_loss", patience=50, 
                           verbose=VERBOSE, mode="min", 
                           restore_best_weights=True)
        
        if fold not in skip_folds:
          model.fit(X_train, y_train, 
                    validation_data=(X_valid, y_valid), 
                    epochs=EPOCHS,
                    verbose=VERBOSE,
                    batch_size=BATCH_SIZE, 
                    callbacks=[lr, chk_point, es])
        else:
          print('Load pretrain weight from ', checkpoint_name)
          load_locally = tf.saved_model.LoadOptions(experimental_io_device='/job:localhost')
          try:
            # At loading time, register the custom objects with a `custom_object_scope`:
            custom_objects = {"ScaleLayer": ScaleLayer}
            with tf.keras.utils.custom_object_scope(custom_objects):
                model = load_model(checkpoint_name, options=load_locally)
          except (OSError):
            print('Cannot load pretrain weight from ', checkpoint_name)
            break
                
        y_true = y_valid.squeeze().reshape(-1, 1)
        y_pred = model.predict(X_valid, batch_size=BATCH_SIZE).squeeze().reshape(-1, 1)
        train_pred[test_idx] = y_pred.reshape(-1, 80)
        score = mean_absolute_error(y_true, y_pred)
        test_preds.append(model.predict(test, batch_size=BATCH_SIZE).squeeze().reshape(-1, 1).squeeze())

        print(f"Fold-{fold+1} | OOF Score: {score}")
        
        del X_train, X_valid, y_train, y_valid, model
        reset_keras()
        gc.collect()


In [None]:
if not DEBUG:
  train_pred = train_pred.flatten()
  targets = targets.flatten()
  score = mean_absolute_error(targets, train_pred)
  print(f"Train Score: {score}")

## Create submission file

In [None]:
if not DEBUG:
  submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
  submission["pressure"] = sum(test_preds)/5
  submission.to_csv('submission.csv', index=False)

  # ENSEMBLE FOLDS WITH MEDIAN
  #取中位数
  submission["pressure"] = np.median(np.vstack(test_preds),axis=0)
  submission.to_csv('submission_median.csv', index=False)


  # ENSEMBLE FOLDS WITH MEDIAN AND ROUND PREDICTIONS
  submission["pressure"] =\
      np.round( (submission.pressure - pressure_min)/pressure_step ) * pressure_step + pressure_min
  submission.pressure = np.clip(submission.pressure, pressure_min, pressure_max)
  submission.to_csv('submission_median_round.csv', index=False)