# Import Packages

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import random
from sklearn.model_selection import train_test_split

# Read Dataset

In [2]:
train = pd.read_csv( "train.csv")
train.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.0,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.35585,0,12.234987


In [3]:
np.unique(train['R'])

array([ 5, 20, 50], dtype=int64)

In [4]:
test= pd.read_csv("test.csv")
test.head(n=400)

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.000000,0.000000,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.230610,0
4,5,0,5,20,0.127644,26.320956,0
...,...,...,...,...,...,...,...
395,396,31,20,50,2.552009,4.977700,1
396,397,31,20,50,2.586213,4.981205,1
397,398,31,20,50,2.620192,4.984142,1
398,399,31,20,50,2.654192,4.986621,1


In [5]:
np.unique(train['breath_id'], return_counts = True)

(array([     1,      2,      3, ..., 125743, 125745, 125749], dtype=int64),
 array([80, 80, 80, ..., 80, 80, 80], dtype=int64))

In [6]:
final_dataset = train.groupby('breath_id')
final_dataset.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.000000,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.355850,0,12.234987
...,...,...,...,...,...,...,...,...
6035920,6035921,125749,50,10,0.000000,6.030572,0,3.939334
6035921,6035922,125749,50,10,0.033187,25.504196,0,5.345377
6035922,6035923,125749,50,10,0.066365,21.614707,0,9.563505
6035923,6035924,125749,50,10,0.099784,16.266744,0,15.117375


In [7]:
data = train.groupby(["breath_id"])
data.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.000000,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.355850,0,12.234987
...,...,...,...,...,...,...,...,...
6035920,6035921,125749,50,10,0.000000,6.030572,0,3.939334
6035921,6035922,125749,50,10,0.033187,25.504196,0,5.345377
6035922,6035923,125749,50,10,0.066365,21.614707,0,9.563505
6035923,6035924,125749,50,10,0.099784,16.266744,0,15.117375


# Features Engineering

In [8]:
train['last_value_u_in'] = train.groupby('breath_id')['u_in'].transform('last')
train['u_in_lag1'] = train.groupby('breath_id')['u_in'].shift(1)
train['u_out_lag1'] = train.groupby('breath_id')['u_out'].shift(1)
train['u_in_lag_back1'] = train.groupby('breath_id')['u_in'].shift(-1)
train['u_out_lag_back1'] = train.groupby('breath_id')['u_out'].shift(-1)
train['u_in_lag2'] = train.groupby('breath_id')['u_in'].shift(2)
train['u_out_lag2'] = train.groupby('breath_id')['u_out'].shift(2)
train['u_in_lag_back2'] = train.groupby('breath_id')['u_in'].shift(-2)
train['u_out_lag_back2'] = train.groupby('breath_id')['u_out'].shift(-2)
train['u_in_lag3'] = train.groupby('breath_id')['u_in'].shift(3)
train['u_out_lag3'] = train.groupby('breath_id')['u_out'].shift(3)
train['u_in_lag_back3'] = train.groupby('breath_id')['u_in'].shift(-3)
train['u_out_lag_back3'] = train.groupby('breath_id')['u_out'].shift(-3)
train = train.fillna(0)


train['R__C'] = train["R"].astype(str) + '__' + train["C"].astype(str)

# max value of u_in and u_out for each breath
train['breath_id__u_in__max'] = train.groupby(['breath_id'])['u_in'].transform('max')
train['breath_id__u_out__max'] = train.groupby(['breath_id'])['u_out'].transform('max')

# difference between consequitive values
train['u_in_diff1'] = train['u_in'] - train['u_in_lag1']
train['u_out_diff1'] = train['u_out'] - train['u_out_lag1']
train['u_in_diff2'] = train['u_in'] - train['u_in_lag2']
train['u_out_diff2'] = train['u_out'] - train['u_out_lag2']
# from here: https://www.kaggle.com/yasufuminakama/ventilator-pressure-lstm-starter
train.loc[train['time_step'] == 0, 'u_in_diff'] = 0
train.loc[train['time_step'] == 0, 'u_out_diff'] = 0

# difference between the current value of u_in and the max value within the breath
train['breath_id__u_in__diffmax'] = train.groupby(['breath_id'])['u_in'].transform('max') - train['u_in']
train['breath_id__u_in__diffmean'] = train.groupby(['breath_id'])['u_in'].transform('mean') - train['u_in']

# OHE
train = train.merge(pd.get_dummies(train['R'], prefix='R'), left_index=True, right_index=True).drop(['R'], axis=1)
train = train.merge(pd.get_dummies(train['C'], prefix='C'), left_index=True, right_index=True).drop(['C'], axis=1)
train = train.merge(pd.get_dummies(train['R__C'], prefix='R__C'), left_index=True, right_index=True).drop(['R__C'], axis=1)

# https://www.kaggle.com/c/ventilator-pressure-prediction/discussion/273974
train['u_in_cumsum'] = train.groupby(['breath_id'])['u_in'].cumsum()
train['time_step_cumsum'] = train.groupby(['breath_id'])['time_step'].cumsum()

In [9]:

test['last_value_u_in'] = test.groupby('breath_id')['u_in'].transform('last')
test['u_in_lag1'] = test.groupby('breath_id')['u_in'].shift(1)
test['u_out_lag1'] = test.groupby('breath_id')['u_out'].shift(1)
test['u_in_lag_back1'] = test.groupby('breath_id')['u_in'].shift(-1)
test['u_out_lag_back1'] = test.groupby('breath_id')['u_out'].shift(-1)
test['u_in_lag2'] = test.groupby('breath_id')['u_in'].shift(2)
test['u_out_lag2'] = test.groupby('breath_id')['u_out'].shift(2)
test['u_in_lag_back2'] = test.groupby('breath_id')['u_in'].shift(-2)
test['u_out_lag_back2'] = test.groupby('breath_id')['u_out'].shift(-2)
test['u_in_lag3'] = test.groupby('breath_id')['u_in'].shift(3)
test['u_out_lag3'] = test.groupby('breath_id')['u_out'].shift(3)
test['u_in_lag_back3'] = test.groupby('breath_id')['u_in'].shift(-3)
test['u_out_lag_back3'] = test.groupby('breath_id')['u_out'].shift(-3)
test = test.fillna(0)
test['R__C'] = test["R"].astype(str) + '__' + test["C"].astype(str)

test['breath_id__u_in__max'] = test.groupby(['breath_id'])['u_in'].transform('max')
test['breath_id__u_out__max'] = test.groupby(['breath_id'])['u_out'].transform('max')

test['u_in_diff1'] = test['u_in'] - test['u_in_lag1']
test['u_out_diff1'] = test['u_out'] - test['u_out_lag1']
test['u_in_diff2'] = test['u_in'] - test['u_in_lag2']
test['u_out_diff2'] = test['u_out'] - test['u_out_lag2']
test.loc[test['time_step'] == 0, 'u_in_diff'] = 0
test.loc[test['time_step'] == 0, 'u_out_diff'] = 0

test['breath_id__u_in__diffmax'] = test.groupby(['breath_id'])['u_in'].transform('max') - test['u_in']
test['breath_id__u_in__diffmean'] = test.groupby(['breath_id'])['u_in'].transform('mean') - test['u_in']

test = test.merge(pd.get_dummies(test['R'], prefix='R'), left_index=True, right_index=True).drop(['R'], axis=1)
test = test.merge(pd.get_dummies(test['C'], prefix='C'), left_index=True, right_index=True).drop(['C'], axis=1)
test = test.merge(pd.get_dummies(test['R__C'], prefix='R__C'), left_index=True, right_index=True).drop(['R__C'], axis=1)

test['u_in_cumsum'] = test.groupby(['breath_id'])['u_in'].cumsum()
test['time_step_cumsum'] = test.groupby(['breath_id'])['time_step'].cumsum()

In [10]:
train.isna().sum()
print(train.shape)

(6036000, 46)


In [11]:
features_train = train.drop(['pressure', 'id', 'breath_id'], axis = 1)
features_test = test.drop([ 'id', 'breath_id'], axis = 1)

In [12]:
print(features_train)

         time_step       u_in  u_out  last_value_u_in  u_in_lag1  u_out_lag1  \
0         0.000000   0.083334      0         4.987079   0.000000         0.0   
1         0.033652  18.383041      0         4.987079   0.083334         0.0   
2         0.067514  22.509278      0         4.987079  18.383041         0.0   
3         0.101542  22.808822      0         4.987079  22.509278         0.0   
4         0.135756  25.355850      0         4.987079  22.808822         0.0   
...            ...        ...    ...              ...        ...         ...   
6035995   2.504603   1.489714      1         1.482739   1.420711         1.0   
6035996   2.537961   1.488497      1         1.482739   1.489714         1.0   
6035997   2.571408   1.558978      1         1.482739   1.488497         1.0   
6035998   2.604744   1.272663      1         1.482739   1.558978         1.0   
6035999   2.638017   1.482739      1         1.482739   1.272663         1.0   

         u_in_lag_back1  u_out_lag_back

In [13]:
features_train = features_train.fillna(0)
features_test = features_test.fillna(0)

In [14]:
print(features_train)

         time_step       u_in  u_out  last_value_u_in  u_in_lag1  u_out_lag1  \
0         0.000000   0.083334      0         4.987079   0.000000         0.0   
1         0.033652  18.383041      0         4.987079   0.083334         0.0   
2         0.067514  22.509278      0         4.987079  18.383041         0.0   
3         0.101542  22.808822      0         4.987079  22.509278         0.0   
4         0.135756  25.355850      0         4.987079  22.808822         0.0   
...            ...        ...    ...              ...        ...         ...   
6035995   2.504603   1.489714      1         1.482739   1.420711         1.0   
6035996   2.537961   1.488497      1         1.482739   1.489714         1.0   
6035997   2.571408   1.558978      1         1.482739   1.488497         1.0   
6035998   2.604744   1.272663      1         1.482739   1.558978         1.0   
6035999   2.638017   1.482739      1         1.482739   1.272663         1.0   

         u_in_lag_back1  u_out_lag_back

In [15]:

target = train['pressure']
features_train.isna().sum()

time_step                    0
u_in                         0
u_out                        0
last_value_u_in              0
u_in_lag1                    0
u_out_lag1                   0
u_in_lag_back1               0
u_out_lag_back1              0
u_in_lag2                    0
u_out_lag2                   0
u_in_lag_back2               0
u_out_lag_back2              0
u_in_lag3                    0
u_out_lag3                   0
u_in_lag_back3               0
u_out_lag_back3              0
breath_id__u_in__max         0
breath_id__u_out__max        0
u_in_diff1                   0
u_out_diff1                  0
u_in_diff2                   0
u_out_diff2                  0
u_in_diff                    0
u_out_diff                   0
breath_id__u_in__diffmax     0
breath_id__u_in__diffmean    0
R_5                          0
R_20                         0
R_50                         0
C_10                         0
C_20                         0
C_50                         0
R__C_20_

In [16]:
print(features_train)

         time_step       u_in  u_out  last_value_u_in  u_in_lag1  u_out_lag1  \
0         0.000000   0.083334      0         4.987079   0.000000         0.0   
1         0.033652  18.383041      0         4.987079   0.083334         0.0   
2         0.067514  22.509278      0         4.987079  18.383041         0.0   
3         0.101542  22.808822      0         4.987079  22.509278         0.0   
4         0.135756  25.355850      0         4.987079  22.808822         0.0   
...            ...        ...    ...              ...        ...         ...   
6035995   2.504603   1.489714      1         1.482739   1.420711         1.0   
6035996   2.537961   1.488497      1         1.482739   1.489714         1.0   
6035997   2.571408   1.558978      1         1.482739   1.488497         1.0   
6035998   2.604744   1.272663      1         1.482739   1.558978         1.0   
6035999   2.638017   1.482739      1         1.482739   1.272663         1.0   

         u_in_lag_back1  u_out_lag_back

In [17]:
features_train = np.array(features_train)
features_test = np.array(features_test)
features_train.shape

(6036000, 43)

In [18]:
features_train = features_train.reshape(int(features_train.shape[0]/80), 80, features_train.shape[1])
features_test = features_test.reshape(int(features_test.shape[0]/80), 80, features_test.shape[1])
features_train.shape

(75450, 80, 43)

In [19]:
target = np.array(target)
target.shape

(6036000,)

In [20]:
target = target.reshape(-1, 80)
target.shape

(75450, 80)

# Deep Learning Model

In [21]:
norm = tf.keras.layers.Normalization(input_shape = [80, features_train.shape[2],], axis = -1)
norm.adapt(features_train)

In [22]:
my_model = tf.keras.Sequential([
    norm,
    tf.keras.layers.Conv1D(128, 3, activation = "relu"),
    tf.keras.layers.MaxPooling1D(),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv1D(256, 3, activation = "relu"),
    tf.keras.layers.MaxPooling1D(),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv1D(256, 3, activation = "relu"),
    tf.keras.layers.MaxPooling1D(),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(512, return_sequences = True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(512, return_sequences = True)),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(80,)
])
my_model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss= "mae", metrics =[tf.keras.metrics.RootMeanSquaredError()])

In [23]:
my_model.load_weights('weights.hdf5')

In [24]:
my_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizatio  (None, 80, 43)           87        
 n)                                                              
                                                                 
 conv1d (Conv1D)             (None, 78, 128)           16640     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 39, 128)          0         
 )                                                               
                                                                 
 batch_normalization (BatchN  (None, 39, 128)          512       
 ormalization)                                                   
                                                                 
 conv1d_1 (Conv1D)           (None, 37, 256)           98560     
                                                        

# Training Model

In [25]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint('weights.hdf5', monitor='val_loss', save_best_only=True)
from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor='val_loss', mode='min', verbose=0, patience=200,factor=0.2)
es = EarlyStopping(monitor = 'val_loss', patience = 250)

In [26]:
model1 = tf.keras.models.load_model('Ventilator_Pressure_Prediction1.h5')

In [27]:
model1.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss= "mae", metrics =[tf.keras.metrics.RootMeanSquaredError()])

In [30]:
history1 = model1.fit(features_train, target, validation_split = 0.2, epochs = 100, batch_size = 512,
                callbacks = [reduce_lr,checkpoint,es])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize = (20, 5))
plt.plot(history1.history['loss'], label = "loss")
plt.plot(history1.history['val_loss'], label = "val_loss")
plt.legend()

In [None]:
from tensorflow import keras
my_model = keras.models.load_model('Ventilator_Pressure_Prediction.h5')

In [None]:
my_model.evaluate(features_train, target)

In [None]:
y_pred = my_model.predict(features_train)

In [None]:
target[0]

In [None]:
y_pred[0]

# Visualizing Model's Perdormance 

In [None]:
def draw_result(start, end, target, y_pred):
  plt.figure(figsize = (20, 7))
  plt.plot(np.reshape(target[start:end], -1), linewidth=5, label = "actual Pressure values")
  plt.plot(np.reshape(y_pred[start:end], -1), linewidth=2, label = "predict Pressure values")
  plt.legend()

In [None]:
draw_result(0, 50, target, y_pred)

In [None]:
draw_result(50, 110, target, y_pred)

In [None]:
draw_result(110, 160, target, y_pred)

In [None]:
draw_result(160, 220, target, y_pred)

In [None]:
plt.figure(figsize = (20, 7))
plt.plot(np.reshape(target[:500], -1), np.reshape(target[:500], -1), linewidth=3, label = "actual Pressure values")
plt.scatter(np.reshape(target[:500], -1), np.reshape(y_pred[:500], -1), c = 'g', label = "predict Pressure values")
plt.legend()

In [None]:
def create_table(target, y_pred):
  target = np.reshape(target, -1)
  y_pred = np.reshape(y_pred, -1)
  return pd.DataFrame({
      "actual Pressure values": target,
      "predict Pressure values": y_pred,
      "diff": np.abs(target - y_pred)
  })

In [None]:
create_table(target, y_pred).head(40)

In [None]:
my_model.save("Ventilator_Pressure_Prediction1.h5")

In [None]:
y_pred_test = my_model.predict(features_test, batch_size = 512)

In [None]:
print(y_pred_test.shape)

In [None]:
y_pred_test[0]

In [None]:
features_test.shape

In [None]:
y_pred_test = np.array(y_pred_test).reshape(-1,1)

In [None]:
y_pred_test

In [None]:
for i in y_pred_test:
    print(i)

In [None]:
submission = pd.DataFrame()

In [None]:
submission['id'] = test['id']

In [None]:
submission['pressure'] = y_pred_test

In [None]:
print(submission)

In [None]:
def csv_download_link(df, csv_file_name, delete_prompt=True):
    """Display a download link to load a data frame as csv from within a Jupyter notebook"""
    df.to_csv(csv_file_name, index=False)
    from IPython.display import FileLink
    display(FileLink(csv_file_name))
    if delete_prompt:
        a = input('Press enter to delete the file after you have downloaded it.')
        import os
        os.remove(csv_file_name)

In [None]:
csv_download_link(submission, 'submission.csv')

In [None]:
from IPython.display import HTML
import base64
def create_download_link( df, title = "Download CSV file", filename = "data.csv"):  
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

create_download_link(submission)