In [None]:
import pandas as pd
import numpy as np

dtype = {'id': np.int32, 'breath_id': np.int32, 'R':np.float32,	'C':np.float32,	'time_step':np.float32,	'u_in':np.float32, 'u_out':np.float32, 'pressure':np.float32 }

df = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/train.csv', dtype=dtype, index_col=['breath_id'])

# id - globally-unique time step identifier across an entire file
# breath_id - globally-unique time step for breaths
# 
# R - lung attribute indicating how restricted the airway is (in cmH2O/L/S). 
# Physically, this is the change in pressure per change in flow (air volume per time). 
# Intuitively, one can imagine blowing up a balloon through a straw. 
# We can change R by changing the diameter of the straw, with higher R being harder to blow.
# 
# 
# C - lung attribute indicating how compliant the lung is (in mL/cmH2O). 
# Physically, this is the change in volume per change in pressure.
# Intuitively, one can imagine the same balloon example. 
# We can change C by changing the thickness of the balloon’s latex, with higher C having thinner latex and easier to blow.
# 
# time_step - the actual time stamp.
# 
# u_in - the control input for the inspiratory solenoid valve. Ranges from 0 to 100.
# u_out - the control input for the exploratory solenoid valve. Either 0 or 1.
# 
# pressure - the airway pressure measured in the respiratory circuit, measured in cmH2O.

df = df.drop(columns=['id', 'time_step'])
df

In [None]:
# count nan in dataset
df.isna().sum().sum()

In [None]:
graal = 'pressure'

features = df.columns.to_list()
features.remove(graal)
features

In [None]:
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20, 6)

df_graph = df.iloc[0:1200].reset_index()
df_graph[['R', 'C',  'u_out', 'u_in', 'pressure']].plot(subplots=True)

In [None]:
df_graph['div'] = df_graph['u_in'] / df_graph['pressure']
df_graph['div'].plot()

In [None]:
df_graph = df.iloc[0:80].reset_index()
df_graph[['pressure', 'u_in']].plot()

In [None]:
from sklearn.preprocessing import MinMaxScaler

scalerX = MinMaxScaler(feature_range=(0, 1))
scalerY = MinMaxScaler(feature_range=(0, 1))

df_scaled = pd.DataFrame( scalerX.fit_transform(df[features]), columns=features, index=df.index)
df_scaled[graal] = scalerY.fit_transform(df[[graal]])
df_scaled

In [None]:
from sklearn.preprocessing import MinMaxScaler

def split(df1):
    return np.array(list(df1.groupby(df1.index).apply(pd.DataFrame.to_numpy)))

train_size = int(len(df_scaled) * 0.92)

df_train, df_test = df_scaled.iloc[:train_size], df_scaled.iloc[train_size:]

X_train, X_test = split(df_train[features]), split(df_test[features])
y_train, y_test = split(df_train[[graal]]), split(df_test[[graal]])

print('train :', X_train.shape, ' -> ', y_train.shape)
print('test :', X_test.shape, ' -> ', y_test.shape)

In [None]:
import numpy as np

import keras_tuner
from tensorflow.keras.optimizers import Adam, Adadelta
import tensorflow as tf

from tensorflow.signal import fft, ifft, rfft, irfft
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Conv1DTranspose, BatchNormalization, LayerNormalization, Dropout

def build_model(ksize, kernel, dense, dropout):
    input_shape = X_train.shape[1:]
    activation = 'relu'

    model = Sequential()

    for layer in range(0, 4):
        model.add(Conv1D(kernel, ksize, padding='same', strides=2, activation=activation, input_shape=input_shape))    
        model.add(LayerNormalization())

    for layer in range(0, 4):    
        model.add(Conv1DTranspose(kernel, ksize, padding='same', strides=2, activation=activation))    

    for layer in range(1, dense):
        model.add(Dropout(0.1))
        model.add(Dense(1))

    model.add(Dropout(0.1))
    model.add(Dense(1))
    return model

model = build_model(10, 10, 2, 0.1)

model.summary()

In [None]:
learning_rate = 0.00018

def tuner_model(hp):
    ksize = hp.Choice('ksize', [2, 5, 7])
    kernel = hp.Choice('kernel', [64, 128, 256])
    dense = hp.Choice('dense', [1, 2, 3])
    dropout = hp.Choice('dropout', [0.1, 0.2, 0.3, 0.5])

    model = build_model(ksize, kernel, dense, dropout)
    opt = Adam(learning_rate=learning_rate)
    model.compile(optimizer=opt, loss='mean_absolute_error')
    return model

! rm -Rf untitled_project/
tuner = keras_tuner.BayesianOptimization(tuner_model, objective='val_loss', max_trials=30)

tuner.search(X_train, y_train, validation_data=(X_test, y_test), epochs=1)
tuner.results_summary()

best_model = tuner.get_best_models()[0]

In [None]:
# best Hyperparameters:
# ksize: 5
# kernel: 256
# dense: 1
# dropout: 0.1
# Score: 0.009928727522492409

try: model = best_model
except NameError: model = build_model(7, 256, 1, 0.1)

model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_absolute_error')
model.summary()

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

model_file = 'checkpoint.h5'

early = EarlyStopping(monitor='val_loss', min_delta=0, patience=4, mode='auto')

checkpoint = ModelCheckpoint(model_file, monitor='val_loss', save_best_only=True, verbose=0, save_weights_only=False, mode='auto', save_freq='epoch')

rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=2, verbose=1)

callbacks = [rlrop, checkpoint, early]


hist = model.fit(X_train, y_train, validation_data=(X_test, y_test), verbose=1, batch_size=32, epochs=100, callbacks=callbacks) # val_loss: 0.0052

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))

plt.plot(hist.history['loss'], label='mean absolute error')
plt.plot(hist.history['val_loss'], label='val mean absolute error')
plt.ylabel('Metric')
plt.xlabel('Epoch')
plt.legend(loc="upper left")
plt.show()

In [None]:
from tensorflow import keras

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error

model = keras.models.load_model(model_file)

y_test_pred = np.array(model.predict(X_test, verbose=1))
y_test_pred = scalerY.inverse_transform(y_test_pred.reshape(y_test_pred.shape[0] * y_test_pred.shape[1], y_test_pred.shape[2]))

In [None]:
df_test_pred = pd.DataFrame( scalerX.inverse_transform(df_test[features]), columns=features, index=df_test.index)
df_test_pred['pressure'] = scalerY.inverse_transform(df_test[[graal]])
df_test_pred['pred'] = y_test_pred
df_test_pred['diff'] = df_test_pred['pressure'] - df_test_pred['pred']
df_test_pred

In [None]:
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,6)

df_graph = df_test_pred.iloc[0:1200].reset_index()

df_graph[['pressure', 'pred']].plot()
df_graph[['diff']].plot()

In [None]:
# calculate root mean squared error
trainScore = mean_absolute_error(df_test_pred['pressure'], df_test_pred['pred'])
print('Test Score: %.2f MAE' % (trainScore))

In [None]:
dtype = {'id': np.int32, 'breath_id': np.int32, 'R':np.float32,	'C':np.float32,	'time_step':np.float32,	'u_in':np.float32, 'u_out':np.float32, 'pressure':np.float32 }

df_score = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/test.csv', dtype=dtype, index_col=['breath_id'])
df_score = df_score.drop(columns=['id', 'time_step'])
df_score

In [None]:
from tensorflow import keras

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error

model = keras.models.load_model(model_file)

df_scaled_score = pd.DataFrame( scalerX.fit_transform(df_score), columns=features, index=df_score.index)
X_score = split(df_scaled_score)

y_score_pred = np.array(model.predict(X_score, verbose=1))
y_score_pred = scalerY.inverse_transform(y_score_pred.reshape(y_score_pred.shape[0] * y_score_pred.shape[1], y_score_pred.shape[2]))

In [None]:
submission = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/sample_submission.csv')
submission["pressure"] = y_score_pred
submission.to_csv('submission.csv', index=False)

# Score: 0.4376