In [None]:
from IPython.display import YouTubeVideo
YOUTUBE_ID = 'jNL8Iyhvx3o'
YouTubeVideo(YOUTUBE_ID,width=900, height=700)

# Google Brain - Ventilator Pressure Prediction 
What do doctors do when a patient has trouble breathing? They use a ventilator to pump oxygen into a sedated patient's lungs via a tube in the windpipe. But mechanical ventilation is a clinician-intensive procedure, a limitation that was prominently on display during the early days of the COVID-19 pandemic. At the same time, developing new methods for controlling mechanical ventilators is prohibitively expensive, even before reaching clinical trials. High-quality simulators could reduce this barrier.


<img src="https://storage.googleapis.com/kaggle-competitions/kaggle/29594/logos/thumb76_76.png?t=2021-07-29-12-46-57" width="600px">



### Data Description

The ventilator data used in this competition was produced using a modified open-source ventilator connected to an artificial bellows test lung via a respiratory circuit. The diagram below illustrates the setup, with the two control inputs highlighted in green and the state variable (airway pressure) to predict in blue. The first control input is a continuous variable from 0 to 100 representing the percentage the inspiratory solenoid valve is open to let air into the lung (i.e., 0 is completely closed and no air is let in and 100 is completely open). The second control input is a binary variable representing whether the exploratory valve is open (1) or closed (0) to let air out.

In this competition, participants are given numerous time series of breaths and will learn to predict the airway pressure in the respiratory circuit during the breath, given the time series of control inputs.


<img src="https://raw.githubusercontent.com/google/deluca-lung/main/assets/2020-10-02%20Ventilator%20diagram.svg" width="800px">


### Files
* train.csv - the training set
* test.csv - the test set
* sample_submission.csv - a sample submission file in the correct format


### Columns
* id - globally-unique time step identifier across an entire file
* breath_id - globally-unique time step for breaths
* R - lung attribute indicating how restricted the airway is (in cmH2O/L/S). Physically, this is the change in pressure per change in flow (air volume per time). Intuitively, one can imagine blowing up a balloon through a straw. We can change R by changing the diameter of the straw, with higher R being harder to blow.
* lung attribute indicating how compliant the lung is (in mL/cmH2O). Physically, this is the change in volume per change in pressure. Intuitively, one can imagine the same balloon example. We can change C by changing the thickness of the balloon’s latex, with higher C having thinner latex and easier to blow.
* time_step - the actual time stamp.
* u_in - the control input for the inspiratory solenoid valve. Ranges from 0 to 100.
* u_out - the control input for the exploratory solenoid valve. Either 0 or 1.
* pressure - the airway pressure measured in the respiratory circuit, measured in cmH2O.

### 
Dataset link 

[Here](https://www.kaggle.com/c/ventilator-pressure-prediction/overview)


In [None]:
!pip install dataprep by

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import mean_squared_error
from dataprep.eda import *
from dataprep.eda import plot
from dataprep.eda import plot_correlation
from dataprep.eda import plot_missing
import plotly.express as px
import plotly.figure_factory as ff
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
sns.set(rc={'figure.figsize': [10, 10]}, font_scale=1.3)

In [None]:
df_train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
df_test  = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')

In [None]:
df_train.head()

In [None]:
df_train.info()

In [None]:
df_test

In [None]:
df_train.info()

In [None]:
df_train.drop('id', axis=1, inplace=True)
df_train.drop('breath_id', axis=1, inplace=True)
df_test.drop('id', axis=1, inplace=True)
df_test.drop('breath_id', axis=1, inplace=True)
print(df_train.head())
print("*************************************")
print(df_test.head())

In [None]:
plot_missing(df_train)

In [None]:
plot_missing(df_test)

In [None]:
plot(df_train)

In [None]:
plot(df_train, 'R')

In [None]:
plot(df_train, 'C')

In [None]:
plot(df_train, 'time_step')

In [None]:
plot(df_train, 'u_in')

In [None]:
plot(df_train, 'u_out')

In [None]:
plot(df_train, 'pressure')

In [None]:
create_report(df_train)

In [None]:
create_report(df_test)

In [None]:
plot_correlation(df_train)

In [None]:
df_train.skew()

In [None]:
df_test.skew()

In [None]:
df_train

In [None]:
from sklearn.model_selection import train_test_split
x = df_train.drop('pressure', axis=1)
y = df_train['pressure']

In [None]:
x

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=42)

In [None]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.preprocessing import StandardScaler
sc =  StandardScaler()

sc.fit(x_train)

x_train = sc.transform(x_train)
x_test = sc.transform(x_test)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

In [None]:
x_train.shape

In [None]:
x_train.shape[1]

In [None]:
model = Sequential()
model.add(Dense(units=1024, kernel_initializer='normal', activation='relu', input_shape=[x_train.shape[1]]))
model.add(Dense(units=1024, kernel_initializer='normal', activation='relu', ))
model.add(layers.Dropout(.2))
model.add(Dense(units=2024, kernel_initializer='normal', activation='relu', ))
model.add(Dense(units=2024, kernel_initializer='normal', activation='relu', ))
model.add(layers.Dropout(.2))
model.add(Dense(units=2024, kernel_initializer='normal', activation='relu', ))
model.add(Dense(units=2024, kernel_initializer='normal', activation='relu', ))
model.add(layers.Dropout(.1))
model.add(Dense(units=700, kernel_initializer='normal', activation='relu', ))
model.add(layers.Dropout(.1))
model.add(Dense(units=512, kernel_initializer='normal', activation='relu', ))
model.add(Dense(units=64, kernel_initializer='normal', activation='relu', ))
model.add(Dense(units=32, kernel_initializer='normal', activation='relu', ))
model.add(Dense(units=1, kernel_initializer='normal'))

In [None]:
model.summary()

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model, 'model.png', show_shapes=True)

In [None]:
model.compile(loss='mean_squared_error', optimizer=Adam())

In [None]:
lrd = ReduceLROnPlateau(monitor = 'val_loss',
                         patience = 10,
                         verbose = 1,
                         factor = 0.70,
                         min_lr = 1e-2)

mcp = ModelCheckpoint('model.h5')

es = EarlyStopping(verbose=1, patience=10)

In [None]:
%time
history = model.fit(x_train, y_train, validation_split=0.2, batch_size=128, epochs=200, callbacks=[lrd, mcp, es])

In [None]:
model.evaluate(x_test, y_test)

In [None]:
y_pred = model.predict(x_test)
y_pred

In [None]:
y_pred[10]

In [None]:
y_test.iloc[10]

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

In [None]:
history.history.keys()

In [None]:
import matplotlib.pyplot as plt
# summarize history for loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
y_pred_df_tset = model.predict(df_test)
y_pred_df_tset

In [None]:
sub = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
sub

In [None]:
sub['pressure'] = y_pred_df_tset
sub.to_csv('submission.csv', index=False)

In [None]:
sub.head(10)