<a href="https://colab.research.google.com/github/chi-hun/kaggle/blob/main/Google_Brain_Ventilator_Pressure_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Google Brain - Ventilator Pressure Prediction

https://www.kaggle.com/competitions/ventilator-pressure-prediction

In [1]:
!pip install kaggle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import layers

In [2]:
!mkdir -p ~/.kaggle
!cp '/content/drive/MyDrive/Colab Notebooks/kaggle/kaggle.json' ~/.kaggle
!chmod -600 ~/.kaggle/kaggle.json

In [3]:
!kaggle competitions download -c ventilator-pressure-prediction

Downloading ventilator-pressure-prediction.zip to /content
 99% 220M/223M [00:01<00:00, 201MB/s]
100% 223M/223M [00:01<00:00, 149MB/s]


In [4]:
!unzip /content/ventilator-pressure-prediction.zip

Archive:  /content/ventilator-pressure-prediction.zip
  inflating: sample_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


In [5]:
train = pd.read_csv('/content/train.csv')
test = pd.read_csv('/content/test.csv')

In [None]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6036000 entries, 0 to 6035999
Data columns (total 8 columns):
 #   Column     Dtype  
---  ------     -----  
 0   id         int64  
 1   breath_id  int64  
 2   R          int64  
 3   C          int64  
 4   time_step  float64
 5   u_in       float64
 6   u_out      int64  
 7   pressure   float64
dtypes: float64(3), int64(5)
memory usage: 368.4 MB


In [None]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4024000 entries, 0 to 4023999
Data columns (total 7 columns):
 #   Column     Dtype  
---  ------     -----  
 0   id         int64  
 1   breath_id  int64  
 2   R          int64  
 3   C          int64  
 4   time_step  float64
 5   u_in       float64
 6   u_out      int64  
dtypes: float64(2), int64(5)
memory usage: 214.9 MB


In [6]:
train.drop(['id', 'breath_id'], axis=1, inplace=True)
test.drop(['id', 'breath_id'], axis=1, inplace=True)

In [7]:
target = train.pop('pressure')

In [8]:
mms = MinMaxScaler()
for col in train.columns:
    train[col] = mms.fit_transform(train[[col]])
    test[col] = mms.transform(test[[col]])

In [9]:
def downcast(df, verbose=True):
    dfm_0 = df.memory_usage().sum()/1024**2
    for col in df.columns:
        col_t = df[col].dtype.name
        if col_t == 'object':
            pass
        elif col_t == 'bool':
            df[col].astype('int8')
        elif col_t.startswith('int') or (round(df[col]) == df[col]).all() :
            df[col] = pd.to_numeric(df[col], downcast='integer')
        elif col_t.startswith('float'):
            df[col] = pd.to_numeric(df[col], downcast='float')
    if verbose:
        dfm_1 = df.memory_usage().sum()/1024**2
        dfmz = round(((dfm_0 - dfm_1) / (dfm_0)) * 100, 2)
        print(f'{dfm_0} -> {dfm_1} 압축률 : {dfmz}%')
    return df

In [10]:
train = downcast(train)
test = downcast(test)

230.2552490234375 -> 97.85855102539062 압축률 : 57.5%
153.5035400390625 -> 65.23907470703125 압축률 : 57.5%


In [11]:
train.shape

(6036000, 5)

In [12]:
train_n = train.values
test_n = test.values
train_n = train_n.reshape(-1, 40, 5)
test_n = test_n.reshape(-1, 40, 5)
train_n.shape, test_n.shape

((150900, 40, 5), (100600, 40, 5))

# lstm

In [13]:
def model_maker():
    inputs = layers.Input(shape=(40,5))
    x = layers.Bidirectional(layers.LSTM(1024, activation='tanh', return_sequences=True))(inputs)
    x = layers.Bidirectional(layers.LSTM(512, activation='tanh', return_sequences=True))(x)
    x = layers.Bidirectional(layers.LSTM(256, activation='tanh', return_sequences=True))(x)
    x = layers.Bidirectional(layers.LSTM(128, activation='tanh', return_sequences=True))(x)
    x = layers.Dense(128, activation='selu')(x)
    outputs = layers.Dense(1)(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [21]:
model = model_maker()
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 40, 5)]           0         
                                                                 
 bidirectional_4 (Bidirectio  (None, 40, 2048)         8437760   
 nal)                                                            
                                                                 
 bidirectional_5 (Bidirectio  (None, 40, 1024)         10489856  
 nal)                                                            
                                                                 
 bidirectional_6 (Bidirectio  (None, 40, 512)          2623488   
 nal)                                                            
                                                                 
 bidirectional_7 (Bidirectio  (None, 40, 256)          656384    
 nal)                                                      

In [15]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mse', metrics='mae')

In [20]:
with tf.device('/GPU:0'):
    his = model.fit(train_n, target.values, epochs=3, validation_split=0.1, batch_size=1024)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [22]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss='mse', metrics='mae')

In [23]:
with tf.device('/GPU:0'):
    his = model.fit(train_n, target.values, epochs=3, validation_split=0.1, batch_size=1024)

Epoch 1/3
Epoch 2/3
Epoch 3/3
