In [1]:
import gc
import numpy as np
import pandas as pd

In [2]:
train_df = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
print(f"train_df: {train_df.shape}")
train_df.head()

train_df: (6036000, 8)


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.0,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.35585,0,12.234987


In [3]:
test_df = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
print(f"test_df: {test_df.shape}")
test_df.head()

test_df: (4024000, 7)


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.0,0.0,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.23061,0
4,5,0,5,20,0.127644,26.320956,0


In [4]:
def add_features(df):
    df['cross']= df['u_in'] * df['u_out']
    df['cross2']= df['time_step'] * df['u_out']
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['time_step_cumsum'] = df.groupby(['breath_id'])['time_step'].cumsum()
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()

    # feat-1
    df[['u_out0_mean', 'u_out0_std', 'u_out0_max']] = (df[df["u_out"]==0].groupby('breath_id')['u_in'].agg(["mean", "std", "max"]).add_prefix("u_out0_").reset_index(level=0,drop=True))
    df[['u_out1_mean', 'u_out1_std', 'u_out1_max']] = (df[df["u_out"]==1].groupby('breath_id')['u_in'].agg(["mean", "std", "max"]).add_prefix("u_out1_").reset_index(level=0,drop=True))    

    print("Step-1...Completed")
    
    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
    df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)
    df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)
    df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
    df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)
    df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)
    df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)
    df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)
    df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)
    df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)
    df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)
    df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)
    df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)
    df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)
    # feat-1
    df['u_in_lag5'] = df.groupby('breath_id')['u_in'].shift(5)
    df['u_out_lag5'] = df.groupby('breath_id')['u_out'].shift(5)
    df['u_in_lag_back5'] = df.groupby('breath_id')['u_in'].shift(-5)
    df['u_out_lag_back5'] = df.groupby('breath_id')['u_out'].shift(-5)
    df['u_in_lag6'] = df.groupby('breath_id')['u_in'].shift(6)
    df['u_out_lag6'] = df.groupby('breath_id')['u_out'].shift(6)
    df['u_in_lag_back6'] = df.groupby('breath_id')['u_in'].shift(-6)
    df['u_out_lag_back6'] = df.groupby('breath_id')['u_out'].shift(-6)
    df['u_in_lag7'] = df.groupby('breath_id')['u_in'].shift(7)
    df['u_out_lag7'] = df.groupby('breath_id')['u_out'].shift(7)
    df['u_in_lag_back7'] = df.groupby('breath_id')['u_in'].shift(-7)
    df['u_out_lag_back7'] = df.groupby('breath_id')['u_out'].shift(-7)
    df['u_in_lag8'] = df.groupby('breath_id')['u_in'].shift(8)
    df['u_out_lag8'] = df.groupby('breath_id')['u_out'].shift(8)
    df['u_in_lag_back8'] = df.groupby('breath_id')['u_in'].shift(-8)
    df['u_out_lag_back8'] = df.groupby('breath_id')['u_out'].shift(-4)

    df = df.fillna(0)
    print("Step-2...Completed")
    
    df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')
    df['breath_id__u_in__mean'] = df.groupby(['breath_id'])['u_in'].transform('mean')
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    print("Step-3...Completed")
    
    df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
    df['u_out_diff1'] = df['u_out'] - df['u_out_lag1']
    df['u_in_diff2'] = df['u_in'] - df['u_in_lag2']
    df['u_out_diff2'] = df['u_out'] - df['u_out_lag2']
    df['u_in_diff3'] = df['u_in'] - df['u_in_lag3']
    df['u_out_diff3'] = df['u_out'] - df['u_out_lag3']
    df['u_in_diff4'] = df['u_in'] - df['u_in_lag4']
    df['u_out_diff4'] = df['u_out'] - df['u_out_lag4']

    # feat-1
    df['u_in_diff5'] = df['u_in'] - df['u_in_lag5']
    df['u_out_diff5'] = df['u_out'] - df['u_out_lag5']
    df['u_in_diff6'] = df['u_in'] - df['u_in_lag6']
    df['u_out_diff6'] = df['u_out'] - df['u_out_lag6']
    df['u_in_diff7'] = df['u_in'] - df['u_in_lag7']
    df['u_out_diff7'] = df['u_out'] - df['u_out_lag7']
    df['u_in_diff8'] = df['u_in'] - df['u_in_lag8']
    df['u_out_diff8'] = df['u_out'] - df['u_out_lag8']

    print("Step-4...Completed")
    
    df['one'] = 1
    df['count'] = (df['one']).groupby(df['breath_id']).cumsum()
    df['u_in_cummean'] =df['u_in_cumsum'] /df['count']
    
    df['breath_id_lag']=df['breath_id'].shift(1).fillna(0)
    df['breath_id_lag2']=df['breath_id'].shift(2).fillna(0)
    df['breath_id_lagsame']=np.select([df['breath_id_lag']==df['breath_id']],[1],0)
    df['breath_id_lag2same']=np.select([df['breath_id_lag2']==df['breath_id']],[1],0)
    df['breath_id__u_in_lag'] = df['u_in'].shift(1).fillna(0)
    df['breath_id__u_in_lag'] = df['breath_id__u_in_lag'] * df['breath_id_lagsame']
    df['breath_id__u_in_lag2'] = df['u_in'].shift(2).fillna(0)
    df['breath_id__u_in_lag2'] = df['breath_id__u_in_lag2'] * df['breath_id_lag2same']
    print("Step-5...Completed")
    
    df['time_step_diff'] = df.groupby('breath_id')['time_step'].diff().fillna(0)
    df['ewm_u_in_mean'] = (df\
                           .groupby('breath_id')['u_in']\
                           .ewm(halflife=9)\
                           .mean()\
                           .reset_index(level=0,drop=True))
    # feat-1
    # df[["15_in_sum","15_in_min","15_in_max","15_in_mean"]] = (df\
    #                                                           .groupby('breath_id')['u_in']\
    #                                                           .rolling(window=15,min_periods=1)\
    #                                                           .agg({"15_in_sum":"sum",
    #                                                                 "15_in_min":"min",
    #                                                                 "15_in_max":"max",
    #                                                                 "15_in_mean":"mean"})\
    #                                                            .reset_index(level=0,drop=True))
    df["u_in_rolling_mean2"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).mean()["u_in"].reset_index(drop=True)
    df["u_in_rolling_mean4"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).mean()["u_in"].reset_index(drop=True)
    df["u_in_rolling_mean8"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(8).mean()["u_in"].reset_index(drop=True)
    df["u_in_rolling_max2"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).max()["u_in"].reset_index(drop=True)
    df["u_in_rolling_max4"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).max()["u_in"].reset_index(drop=True)
    df["u_in_rolling_max8"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(8).max()["u_in"].reset_index(drop=True)
    df["u_in_rolling_min2"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).min()["u_in"].reset_index(drop=True)
    df["u_in_rolling_min4"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).min()["u_in"].reset_index(drop=True)
    df["u_in_rolling_min8"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(8).min()["u_in"].reset_index(drop=True)
    df["u_in_rolling_std2"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).std()["u_in"].reset_index(drop=True)
    df["u_in_rolling_std4"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).std()["u_in"].reset_index(drop=True)
    df["u_in_rolling_std8"] = df[["breath_id", "u_in"]].groupby("breath_id").rolling(8).std()["u_in"].reset_index(drop=True)

    print("Step-6...Completed")
    
    df['u_in_lagback_diff1'] = df['u_in'] - df['u_in_lag_back1']
    df['u_out_lagback_diff1'] = df['u_out'] - df['u_out_lag_back1']
    df['u_in_lagback_diff2'] = df['u_in'] - df['u_in_lag_back2']
    df['u_out_lagback_diff2'] = df['u_out'] - df['u_out_lag_back2']
    print("Step-7...Completed")
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str) + '__' + df["C"].astype(str)
    df = pd.get_dummies(df)
    print("Step-8...Completed")
    
    return df


print("Train data...\n")
train = add_features(train_df)

print("\nTest data...\n")
test = add_features(test_df)

del train_df
del test_df
gc.collect()

Train data...

Step-1...Completed
Step-2...Completed
Step-3...Completed
Step-4...Completed
Step-5...Completed
Step-6...Completed
Step-7...Completed
Step-8...Completed

Test data...

Step-1...Completed
Step-2...Completed
Step-3...Completed
Step-4...Completed
Step-5...Completed
Step-6...Completed
Step-7...Completed
Step-8...Completed


0

In [5]:
train.to_csv('train.csv', index=False)

KeyboardInterrupt: 