In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_absolute_error

import gc
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from IPython.display import display
from warnings import filterwarnings
filterwarnings('ignore')

2021-10-09 19:45:21.472240: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/conda/lib
2021-10-09 19:45:21.472356: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
path = "../input/ventilator-pressure-prediction/"

train_ori = pd.read_csv(path + 'train.csv')
test_ori  = pd.read_csv(path + 'test.csv')
sample = pd.read_csv(path + 'sample_submission.csv')

In [3]:
def add_features(df):
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['RC']= df['R'] + df['C']
    df['u_in_lag'] = df['u_in'].shift(1).fillna(0)
    df['u_in_lag'] = df['u_in_lag'] * df['breath_id_lagsame']
    df['u_in_lag2']= df['u_in'].shift(2).fillna(0)
    df['u_in_lag2']= df['u_in_lag2']*df['breath_id_lag2same']
    df['u_out_lag2']= df['u_out'].shift(2).fillna(0)
    df['u_out_lag2']= df['u_out_lag2']*df['breath_id_lag2same']
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['cross'] = df['u_in'] * df['u_out']
    df['cross2']=df['time_step'] * df['u_out']
    df['breath_id_lag']  = df['breath_id'].shift(1).fillna(0)
    df['breath_id_lag2'] = df['breath_id'].shift(2).fillna(0)
    df['breath_id_lagsame'] = np.select([df['breath_id_lag']==df['breath_id']], [1], 0)
    df['breath_id_lag2same']= np.select([df['breath_id_lag2']==df['breath_id']], [1], 0)
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['one']  = 1
    df['count']= (df['one']).groupby(df['breath_id']).cumsum()
    df['u_in_cummean'] = df['u_in_cumsum'] / df['count']
    df = pd.get_dummies(df)
    return df
train = add_features(train_ori)
test  = add_features(test_ori)

In [4]:
del train_ori
del test_ori

In [5]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
train.drop(['pressure','id', 'breath_id','one','count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id','one','count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1)

print('')




In [6]:
# This Scaler removes the median and scales the data according to the quantile range (defaults to IQR: Interquartile Range). The IQR is the range between the 1st quartile (25th quantile) and the 3rd quartile (75th quantile).

RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

In [7]:
train = train.reshape(-1, 80, train.shape[-1])
test  = test.reshape(-1, 80, train.shape[-1])

In [8]:
train.shape, test.shape

((75450, 80, 25), (50300, 80, 25))

In [9]:
gc.collect()

148

In [10]:
EPOCH = 300
BATCH_SIZE = 512
FOLDS = 6
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

with tpu_strategy.scope():
    kf = KFold(n_splits = FOLDS, shuffle=True, random_state=2021)
    test_preds = []
    
    for fold, (trn_idx, val_idx) in enumerate(kf.split(train, targets)):
        print('-'*15, '>', f"Fold {fold+1}", '<', '-'*15)
        
        xtrain, xvalid = train[trn_idx], train[val_idx]
        ytrain, yvalid = targets[trn_idx], targets[val_idx]
        
        model = keras.models.Sequential([
            keras.layers.Input(shape=train.shape[-2:]),
            keras.layers.Bidirectional(keras.layers.LSTM(450, return_sequences=True)),
            keras.layers.Bidirectional(keras.layers.LSTM(350, return_sequences=True)),
            keras.layers.Bidirectional(keras.layers.LSTM(250, return_sequences=True)),
            keras.layers.Bidirectional(keras.layers.LSTM(150, return_sequences=True)),
            keras.layers.Bidirectional(keras.layers.LSTM(100, return_sequences=True)),
            keras.layers.Dense(50, activation='selu'),
            keras.layers.Dense(1),
        ])
        
        model.compile(optimizer="adam", loss="mae")
        
        scheduler = ExponentialDecay(1e-3, 400*((len(train)*0.8)/BATCH_SIZE), 1e-5)
        lr = LearningRateScheduler(scheduler, verbose=1)
        
        model.fit(xtrain, ytrain,
                  validation_data=(xvalid, yvalid),
                  epochs=EPOCH,
                  batch_size=BATCH_SIZE,
                  callbacks=[lr]
                 )
        test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())
    

2021-10-09 19:46:29.520749: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-10-09 19:46:29.524281: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/conda/lib
2021-10-09 19:46:29.524319: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-10-09 19:46:29.524357: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (931e7ab98537): /proc/driver/nvidia/version does not exist
2021-10-09 19:46:29.527761: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operation

--------------- > Fold 1 < ---------------
Epoch 1/300

Epoch 00001: LearningRateScheduler reducing learning rate to tf.Tensor(0.001, shape=(), dtype=float32).
Epoch 2/300

Epoch 00002: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009997559, shape=(), dtype=float32).
Epoch 3/300

Epoch 00003: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009995119, shape=(), dtype=float32).
Epoch 4/300

Epoch 00004: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009992679, shape=(), dtype=float32).
Epoch 5/300

Epoch 00005: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009990239, shape=(), dtype=float32).
Epoch 6/300

Epoch 00006: LearningRateScheduler reducing learning rate to tf.Tensor(0.00099878, shape=(), dtype=float32).
Epoch 7/300

Epoch 00007: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009985362, shape=(), dtype=float32).
Epoch 8/300

Epoch 00008: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009982925, 

In [11]:
output = pd.read_csv(path + 'sample_submission.csv')
output.pressure = sum(test_preds) / FOLDS
output.to_csv('submission.csv', index=False)