In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns


In [None]:
df=pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
df=df.drop(['id'],axis=1)
df.head(2)

# How many Series 

In [None]:
df.breath_id.unique().shape[0]

In [None]:
time_idx=df['time_step'][df['time_step']==0].index


time_idx

In [None]:
for i,t in enumerate(time_idx[:10]):
    plt.plot(df['time_step'][t:t+80], df['pressure'][t:t+80], lw=2, label=f"sample{i}")
    plt.xlabel('time step')
    plt.ylabel('pressure')
plt.legend()
plt.show()


#pressure seems be effected by other feature 

In [None]:
df['time_step'][1],df['time_step'][81],df['time_step'][161]  # unequal time_step of each data

In [None]:
#series number

#each data pass 80 time steps 
len(time_idx)

In [None]:
df=df.drop(['time_step','breath_id'],axis=1)

# Modeling Series

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import GRU,LSTM,Dense,Bidirectional

* For each series

 * x size :(1,80,4)
 
 * batch size :1

 * series length:80

 * dimension : 6(drop time_step、breath_id)-->4
 
   * R
   * C
   * u_in
   * u_out
   
   
   
* Model 
 * Type : Many to many(input 4 dim output pressure)


In [None]:
batch_size=128
epochs=200
lr=0.001
time_slice=80
units=512
units_dense=256

m=df.shape[0]

train_size=int(m*0.7) #if m's first number is 0 , then m*0.7=int(m/80)*0.7*80 is valid

* Read 80 data , and slide 80 data

In [None]:
def build_ds(X,y):
    
    X=(X-X.mean(axis=0))/X.std(axis=0)
    ds_X=tf.keras.preprocessing.timeseries_dataset_from_array(
      data=X,
      targets=tf.ones((X.shape[0])),
      sequence_length=time_slice,
      sequence_stride=time_slice,
      shuffle=False,
      batch_size=batch_size
    ).map(lambda x,y : x)

    ds_Y=tf.keras.preprocessing.timeseries_dataset_from_array(
      data=y,
      targets=tf.ones((y.shape[0])),
      sequence_length=time_slice,
      sequence_stride=time_slice,
      shuffle=False,
      batch_size=batch_size
    ).map(lambda x,y : x)
    ds=tf.data.Dataset.zip((ds_X,ds_Y))
    ds=ds.map(lambda x,y:(tf.cast(x,'float32'),y))
    ds=ds.prefetch(tf.data.experimental.AUTOTUNE)
    return ds

In [None]:
x_train=df.drop(['pressure'],axis=1)[:train_size]
y_train=df.pressure[:train_size]
x_val=df.drop(['pressure'],axis=1)[train_size:]
y_val=df.pressure[train_size:]



ds_train=build_ds(x_train,y_train)
ds_val=build_ds(x_val,y_val)

* Only use 1 direction rnn because inverse prediction of pressure is not reasonable

In [None]:
'''
model=tf.keras.Sequential([GRU(units=units,return_sequences=True),
                              #recurrent_dropout=0.5),
                           GRU(units=units,return_sequences=True,
                               dropout=0.5,),
                           GRU(units=units,return_sequences=True,
                               dropout=0.5,),
                              #recurrent_dropout=0.5),
                           Dense(units=units_dense,activation='relu'),
                           Dense(units=1,activation=None)
                          ])
'''
model=tf.keras.Sequential([Bidirectional(LSTM(units=units,return_sequences=True,
                              recurrent_dropout=0.2)),
                           Bidirectional(LSTM(units=units,return_sequences=True,
                               recurrent_dropout=0.2)),
                           Dense(units=units_dense,activation='relu'),
                           Dense(units=1,activation=None)
                          ])



opt=tf.keras.optimizers.Adam(learning_rate=lr)
model.compile(optimizer=opt,
              loss='mse',metrics=['mse','mae'])

In [None]:
ckpt = tf.train.Checkpoint(opt=opt, model=model)
manager = tf.train.CheckpointManager(ckpt, './ckpt', max_to_keep=1)

In [None]:
def scheduler(epoch, lr):
    return lr * tf.math.exp(-0.01)

In [None]:



model.fit(ds_train,
          epochs=epochs,
          verbose=1,
          validation_data=ds_val,
          validation_batch_size=batch_size,
          callbacks=[tf.keras.callbacks.LearningRateScheduler(scheduler),
                    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, verbose=0)],
          use_multiprocessing=True)

manager.save()

In [None]:
model.save('./gru.pt')

# Sub

In [None]:
df_=pd.read_csv('../input/ventilator-pressure-prediction/test.csv')

id_=df_.id

df_=df_.drop(['id','breath_id','time_step'],axis=1)
df_.head(2)

In [None]:
ds_=build_ds(df_,df_)

In [None]:
pred=[]
for x,_ in ds_:
    pred.append(model(x,training=False))

In [None]:
pred=tf.reshape(tf.concat(pred,axis=0),(-1)).numpy()
pred.shape

In [None]:
sub=pd.DataFrame(np.vstack([id_,pred]).T,columns=['id','pressure'])

In [None]:
sub['id']=sub['id'].astype('int64')

In [None]:
sub.to_csv('./sub.csv',index=False)