# 主成分分析

In [6]:
# ====================================================
# Directory settings
# ====================================================
import os

EXP_NAME = 'work'

DATA_DIR = "../input/ventilator-pressure-prediction/"

In [7]:
# ====================================================
# Library
# ====================================================
import os
import gc
import sys
import json
import math
import random
from time import time
from datetime import datetime
from collections import Counter, defaultdict

import scipy as sp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

from tqdm.auto import tqdm
import category_encoders as ce

from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from sklearn.decomposition import PCA 

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from torch.cuda.amp import GradScaler
from torch.cuda.amp import autocast

from transformers import AdamW
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

import warnings
warnings.filterwarnings("ignore")

#if CFG.apex:
#    from apex import amp

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
# ====================================================
# CFG
# ====================================================
class CFG:
    experiment_name=EXP_NAME
    competition='ventilator'
    apex=True
    print_freq=1000
    num_workers=4
    model_name='lstm'
    scheduler='CosineAnnealingWarmRestarts' # ['linear', 'cosine', 'ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    batch_scheduler=False
    #num_warmup_steps=100 # ['linear', 'cosine']
    #num_cycles=0.5 # 'cosine'
    factor=0.995 # ReduceLROnPlateau
    patience=7 # ReduceLROnPlateau
    eps=1e-6 # ReduceLROnPlateau
    T_max=50 # CosineAnnealingLR
    T_0=20 # CosineAnnealingWarmRestarts
    epochs=300
    max_grad_norm=1000
    gradient_accumulation_steps=1
    hidden_size=1024
    lr=1e-3
    min_lr=1e-5
    weight_decay=1e-6
    batch_size=256
    n_fold=5
    trn_fold=[0, 1, 2, 3, 4]
    cate_seq_cols=[]
    cont_seq_cols=['R', 'C', 'time_step', 'u_in', 'u_out']
    train=True
    inference=True
    feature_importance=True
    debug=False
    wandb=True

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold=[0]

In [9]:
# ====================================================
# Data Loading
# ====================================================
train = pd.read_csv(DATA_DIR + 'train.csv')
if CFG.debug:
    train = train[:80*5000]
test = pd.read_csv(DATA_DIR + 'test.csv')
sub = pd.read_csv(DATA_DIR + 'sample_submission.csv')

display(train.head())
display(test.head())
display(sub.head())

unique_pressures = train["pressure"].unique()
sorted_pressures = np.sort(unique_pressures)
total_pressures_len = len(sorted_pressures)

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.0,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.35585,0,12.234987


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.0,0.0,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.23061,0
4,5,0,5,20,0.127644,26.320956,0


Unnamed: 0,id,pressure
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0


In [26]:
class AbstractBaseBlock:
    def fit(self, input_df: pd.DataFrame, y=None):
        return self.transform(input_df)

    def transform(self, input_df: pd.DataFrame) -> pd.DataFrame:
        raise NotImplementedError()


class AddMultiplyingDividing(AbstractBaseBlock):
    def transform(self, input_df):
        input_df['area'] = input_df['time_step'] * input_df['u_in']
        input_df['area'] = input_df.groupby('breath_id')['area'].cumsum()
        input_df['cross'] = input_df['u_in']*input_df['u_out']
        input_df['cross2'] = input_df['time_step']*input_df['u_out']
        input_df['u_in_cumsum'] = (input_df['u_in']).groupby(input_df['breath_id']).cumsum()
        input_df['one'] = 1
        input_df['count'] = (input_df['one']).groupby(input_df['breath_id']).cumsum()
        input_df['u_in_cummean'] = input_df['u_in_cumsum'] / input_df['count']
        # input_df = input_df.merge(
        #     input_df[input_df["u_out"]==0].groupby('breath_id')['u_in'].agg(["mean", "std", "max"]).add_prefix("u_out0_").reset_index(),
        #     on="breath_id"
        # )
        # input_df = input_df.merge(
        #     input_df[input_df["u_out"]==1].groupby('breath_id')['u_in'].agg(["mean", "std", "max"]).add_prefix("u_out1_").reset_index(),
        #     on="breath_id"
        # )

        # feat-11
        input_df['time_step_cumsum'] = input_df.groupby(['breath_id'])['time_step'].cumsum()
        input_df['breath_id__u_in__max'] = input_df.groupby(['breath_id'])['u_in'].transform('max')
        input_df['breath_id__u_in__mean'] = input_df.groupby(['breath_id'])['u_in'].transform('mean')
        input_df['breath_id__u_in__diffmax'] = input_df.groupby(['breath_id'])['u_in'].transform('max') - input_df['u_in']
        input_df['breath_id__u_in__diffmean'] = input_df.groupby(['breath_id'])['u_in'].transform('mean') - input_df['u_in']

        output_df = pd.DataFrame(
            {
                "area": input_df['area'],
                #"cross": input_df['cross'],
                #"cross2": input_df['cross2'],
                "u_in_cumsum": input_df['u_in_cumsum'],
                "u_in_cummean": input_df['u_in_cummean'],
                'time_step_cumsum': input_df['time_step_cumsum'],
                "breath_id__u_in__max": input_df['breath_id__u_in__max'],
                "breath_id__u_in__mean": input_df['breath_id__u_in__mean'],
                "breath_id__u_in__diffmax": input_df['breath_id__u_in__diffmax'],
                "breath_id__u_in__diffmean": input_df['breath_id__u_in__diffmean'],

            }
        )
        CFG.cont_seq_cols += output_df.add_suffix(f'@{self.__class__.__name__}').columns.tolist()
        return output_df


class RCDummry(AbstractBaseBlock):
    def transform(self, input_df):
        input_df['R_dummy'] = input_df['R'].astype(str)
        input_df['C_dummy'] = input_df['C'].astype(str)
        #input_df['RC_dummy'] = input_df['R_dummy'] + input_df['C_dummy']
        output_df = pd.get_dummies(input_df[["R_dummy", "C_dummy"]])
        CFG.cont_seq_cols += output_df.add_suffix(f'@{self.__class__.__name__}').columns.tolist()
        return output_df


class AddBreathTimeAndUInTime(AbstractBaseBlock):
    def transform(self, input_df):
        output_df = pd.DataFrame(
            {
                "breath_time": input_df['time_step'] - input_df['time_step'].shift(1),
                "u_in_time": input_df['u_in'] - input_df['u_in'].shift(1)
            }
        )
        output_df.loc[input_df['time_step'] == 0, 'breath_time'] = output_df['breath_time'].mean()
        output_df.loc[input_df['time_step'] == 0, 'u_in_time'] = output_df['u_in_time'].mean()
        CFG.cont_seq_cols += output_df.add_suffix(f'@{self.__class__.__name__}').columns.tolist()
        return output_df

class LagFeatures(AbstractBaseBlock):
    def transform(self, input_df):
        output_df = pd.DataFrame(
            {
                "u_in_lag1": input_df.groupby("breath_id")["u_in"].shift(1).fillna(0),
                "u_in_lag2": input_df.groupby("breath_id")["u_in"].shift(2).fillna(0),
                "u_in_lag3": input_df.groupby("breath_id")["u_in"].shift(3).fillna(0),
                "u_in_lag4": input_df.groupby("breath_id")["u_in"].shift(4).fillna(0),
                "u_in_lag-1": input_df.groupby("breath_id")["u_in"].shift(-1).fillna(0),
                "u_in_lag-2": input_df.groupby("breath_id")["u_in"].shift(-2).fillna(0),
                "u_in_lag-3": input_df.groupby("breath_id")["u_in"].shift(-3).fillna(0),
                "u_in_lag-4": input_df.groupby("breath_id")["u_in"].shift(-4).fillna(0),
                "u_out_lag1": input_df.groupby("breath_id")["u_out"].shift(1).fillna(0),
                "u_out_lag2": input_df.groupby("breath_id")["u_out"].shift(2).fillna(0),
                "u_out_lag3": input_df.groupby("breath_id")["u_out"].shift(3).fillna(0),
                "u_out_lag4": input_df.groupby("breath_id")["u_out"].shift(4).fillna(0),
                "u_out_lag-1": input_df.groupby("breath_id")["u_out"].shift(-1).fillna(0),
                "u_out_lag-2": input_df.groupby("breath_id")["u_out"].shift(-2).fillna(0),
                "u_out_lag-3": input_df.groupby("breath_id")["u_out"].shift(-3).fillna(0),
                "u_out_lag-4": input_df.groupby("breath_id")["u_out"].shift(-4).fillna(0),
            }
        )
        output_df["u_in_lag1_diff"] = input_df["u_in"] - output_df["u_in_lag1"]
        output_df["u_in_lag2_diff"] = input_df["u_in"] - output_df["u_in_lag2"]
        output_df["u_in_lag3_diff"] = input_df["u_in"] - output_df["u_in_lag3"]
        output_df["u_in_lag4_diff"] = input_df["u_in"] - output_df["u_in_lag4"]
        output_df["u_out_lag1_diff"] = input_df["u_out"] - output_df["u_out_lag1"]
        output_df["u_out_lag2_diff"] = input_df["u_out"] - output_df["u_out_lag2"]
        output_df["u_out_lag3_diff"] = input_df["u_out"] - output_df["u_out_lag3"]
        output_df["u_out_lag4_diff"] = input_df["u_out"] - output_df["u_out_lag4"]
        output_df["u_in_lag-1_diff"] = input_df["u_in"] - output_df["u_in_lag-1"]
        output_df["u_in_lag-2_diff"] = input_df["u_in"] - output_df["u_in_lag-2"]
        output_df["u_out_lag-1_diff"] = input_df["u_out"] - output_df["u_out_lag-1"]
        output_df["u_out_lag-2_diff"] = input_df["u_out"] - output_df["u_out_lag-2"]

        output_df['u_in_ewm9'] = (input_df.groupby('breath_id')['u_in'].ewm(halflife=9).mean().reset_index(level=0,drop=True))
        output_df['u_in_ewm15'] = (input_df.groupby('breath_id')['u_in'].ewm(halflife=15).mean().reset_index(level=0,drop=True))

        output_df["u_in_rolling_mean2"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).mean()["u_in"].reset_index(drop=True)
        output_df["u_in_rolling_mean4"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).mean()["u_in"].reset_index(drop=True)
        output_df["u_in_rolling_mean15"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(15).mean()["u_in"].reset_index(drop=True)
        if not CFG.debug:
            output_df["u_in_rolling_max2"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).max()["u_in"].reset_index(drop=True)
            output_df["u_in_rolling_max4"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).max()["u_in"].reset_index(drop=True)
            output_df["u_in_rolling_max15"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(15).max()["u_in"].reset_index(drop=True)
            output_df["u_in_rolling_min2"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).min()["u_in"].reset_index(drop=True)
            output_df["u_in_rolling_min4"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).min()["u_in"].reset_index(drop=True)
            output_df["u_in_rolling_min15"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(15).min()["u_in"].reset_index(drop=True)
            output_df["u_in_rolling_std2"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).std()["u_in"].reset_index(drop=True)
            output_df["u_in_rolling_std4"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).std()["u_in"].reset_index(drop=True)
            output_df["u_in_rolling_std15"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(15).std()["u_in"].reset_index(drop=True)
            output_df["u_in_rolling_sum2"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).sum()["u_in"].reset_index(drop=True)
            output_df["u_in_rolling_sum4"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).sum()["u_in"].reset_index(drop=True)
            output_df["u_in_rolling_sum15"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(15).sum()["u_in"].reset_index(drop=True)
        for col in output_df.columns:
            output_df[col] = output_df[col].fillna(output_df[col].mean())
        CFG.cont_seq_cols += output_df.add_suffix(f'@{self.__class__.__name__}').columns.tolist()
        return output_df
    
class AddPCA(AbstractBaseBlock):
    def transform(self, input_df):
        output_df = pd.DataFrame(
            {
                "pca1": input_df['pca1'],
                "pca2": input_df['pca2'],
                "pca3": input_df['pca3'],
                "pca4": input_df['pca4'],
                "pca5": input_df['pca5'],
                "pca6": input_df['pca6'],
                "pca7": input_df['pca7'],
                "pca8": input_df['pca8'],
                "pca9": input_df['pca9'],
                "pca10": input_df['pca10'],
                "pca11": input_df['pca11'],
                "pca12": input_df['pca12'],
                "pca13": input_df['pca13'],
                "pca14": input_df['pca14'],
                "pca15": input_df['pca15'],
            }
        )
        for col in output_df.columns:
            output_df[col] = output_df[col].fillna(output_df[col].mean())
        CFG.cont_seq_cols += output_df.add_suffix(f'@{self.__class__.__name__}').columns.tolist()
        return output_df

In [11]:
feature_blocks = [
    AddMultiplyingDividing(),
    AddBreathTimeAndUInTime(),
    RCDummry(),
    LagFeatures()
]

In [14]:
def decorate(s: str, decoration=None):
    if decoration is None:
        decoration = '★' * 20

    return ' '.join([decoration, str(s), decoration])


class Timer:
    def __init__(self, logger=None, format_str='{:.3f}[s]', prefix=None, suffix=None, sep=' ', verbose=0):

        if prefix: format_str = str(prefix) + sep + format_str
        if suffix: format_str = format_str + sep + str(suffix)
        self.format_str = format_str
        self.logger = logger
        self.start = None
        self.end = None
        self.verbose = verbose

    @property
    def duration(self):
        if self.end is None:
            return 0
        return self.end - self.start

    def __enter__(self):
        self.start = time()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.end = time()
        if self.verbose is None:
            return
        out_str = self.format_str.format(self.duration)
        if self.logger:
            self.logger.info(out_str)
        else:
            print(out_str)


def run_blocks(input_df, blocks, y=None, test=False):
    out_df = pd.DataFrame()

    print(decorate('start run blocks...'))

    with Timer(prefix='run test={}'.format(test)):
        for block in feature_blocks:
            with Timer(prefix='out_df shape: {} \t- {}'.format(out_df.shape, str(block))):
                if not test:
                    out_i = block.fit(input_df.copy(), y=y)
                else:
                    out_i = block.transform(input_df.copy())

            assert len(input_df) == len(out_i), block
            name = block.__class__.__name__
            out_df = pd.concat([out_df, out_i.add_suffix(f'@{name}')], axis=1)
    print(f"out_df shape: {out_df.shape}")

    return pd.concat([input_df, out_df], axis=1)

train = run_blocks(train, blocks=feature_blocks)
test = run_blocks(test, blocks=feature_blocks, test=True)
CFG.cont_seq_cols = list(set(CFG.cont_seq_cols))
display(train.head())
display(test.head())

★★★★★★★★★★★★★★★★★★★★ start run blocks... ★★★★★★★★★★★★★★★★★★★★
out_df shape: (0, 0) 	- <__main__.AddMultiplyingDividing object at 0x7f557a589b50> 1.150[s]
out_df shape: (6036000, 8) 	- <__main__.AddBreathTimeAndUInTime object at 0x7f557a589a50> 0.162[s]
out_df shape: (6036000, 10) 	- <__main__.RCDummry object at 0x7f557a589c10> 11.184[s]
out_df shape: (6036000, 16) 	- <__main__.LagFeatures object at 0x7f557a589910> 84.150[s]
run test=False 98.316[s]
out_df shape: (6036000, 61)
★★★★★★★★★★★★★★★★★★★★ start run blocks... ★★★★★★★★★★★★★★★★★★★★
out_df shape: (0, 0) 	- <__main__.AddMultiplyingDividing object at 0x7f557a589b50> 0.670[s]
out_df shape: (4024000, 8) 	- <__main__.AddBreathTimeAndUInTime object at 0x7f557a589a50> 0.080[s]
out_df shape: (4024000, 10) 	- <__main__.RCDummry object at 0x7f557a589c10> 7.252[s]
out_df shape: (4024000, 16) 	- <__main__.LagFeatures object at 0x7f557a589910> 56.072[s]
run test=True 65.181[s]
out_df shape: (4024000, 61)


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure,area@AddMultiplyingDividing,u_in_cumsum@AddMultiplyingDividing,u_in_cummean@AddMultiplyingDividing,time_step_cumsum@AddMultiplyingDividing,breath_id__u_in__max@AddMultiplyingDividing,breath_id__u_in__mean@AddMultiplyingDividing,breath_id__u_in__diffmax@AddMultiplyingDividing,breath_id__u_in__diffmean@AddMultiplyingDividing,breath_time@AddBreathTimeAndUInTime,u_in_time@AddBreathTimeAndUInTime,R_dummy_20@RCDummry,R_dummy_5@RCDummry,R_dummy_50@RCDummry,C_dummy_10@RCDummry,C_dummy_20@RCDummry,C_dummy_50@RCDummry,u_in_lag1@LagFeatures,u_in_lag2@LagFeatures,u_in_lag3@LagFeatures,u_in_lag4@LagFeatures,u_in_lag-1@LagFeatures,u_in_lag-2@LagFeatures,u_in_lag-3@LagFeatures,u_in_lag-4@LagFeatures,u_out_lag1@LagFeatures,u_out_lag2@LagFeatures,u_out_lag3@LagFeatures,u_out_lag4@LagFeatures,u_out_lag-1@LagFeatures,u_out_lag-2@LagFeatures,u_out_lag-3@LagFeatures,u_out_lag-4@LagFeatures,u_in_lag1_diff@LagFeatures,u_in_lag2_diff@LagFeatures,u_in_lag3_diff@LagFeatures,u_in_lag4_diff@LagFeatures,u_out_lag1_diff@LagFeatures,u_out_lag2_diff@LagFeatures,u_out_lag3_diff@LagFeatures,u_out_lag4_diff@LagFeatures,u_in_lag-1_diff@LagFeatures,u_in_lag-2_diff@LagFeatures,u_out_lag-1_diff@LagFeatures,u_out_lag-2_diff@LagFeatures,u_in_ewm9@LagFeatures,u_in_ewm15@LagFeatures,u_in_rolling_mean2@LagFeatures,u_in_rolling_mean4@LagFeatures,u_in_rolling_mean15@LagFeatures,u_in_rolling_max2@LagFeatures,u_in_rolling_max4@LagFeatures,u_in_rolling_max15@LagFeatures,u_in_rolling_min2@LagFeatures,u_in_rolling_min4@LagFeatures,u_in_rolling_min15@LagFeatures,u_in_rolling_std2@LagFeatures,u_in_rolling_std4@LagFeatures,u_in_rolling_std15@LagFeatures,u_in_rolling_sum2@LagFeatures,u_in_rolling_sum4@LagFeatures,u_in_rolling_sum15@LagFeatures
0,1,1,20,50,0.0,0.083334,0,5.837492,0.0,0.083334,0.083334,0.0,28.313036,10.146007,28.229702,10.062673,4.370474e-07,2.318432e-07,1,0,0,0,0,1,0.0,0.0,0.0,0.0,18.383041,22.509278,22.808822,25.35585,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083334,0.083334,0.083334,0.083334,0.0,0.0,0.0,0.0,-18.299707,-22.425944,0.0,0.0,0.083334,0.083334,7.296453,7.122734,6.293573,8.130658,9.024859,11.255598,6.462249,5.530754,2.927842,1.179743,1.644297,2.874732,14.592907,28.490936,94.403589
1,2,1,20,50,0.033652,18.383041,0,5.907794,0.618632,18.466375,9.233188,0.033652,28.313036,10.146007,9.929994,-8.237035,0.03365231,18.29971,1,0,0,0,0,1,0.083334,0.0,0.0,0.0,22.509278,22.808822,25.35585,27.259866,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.299707,18.383041,18.383041,18.383041,0.0,0.0,0.0,0.0,-4.126236,-4.425781,0.0,0.0,9.585358,9.444557,9.233188,7.122734,6.293573,18.383041,9.024859,11.255598,0.083334,5.530754,2.927842,12.939847,1.644297,2.874732,18.466375,28.490936,94.403589
2,3,1,20,50,0.067514,22.509278,0,7.876254,2.138333,40.975653,13.658551,0.101167,28.313036,10.146007,5.803758,-12.363271,0.03386211,4.126236,1,0,0,0,0,1,18.383041,0.083334,0.0,0.0,22.808822,25.35585,27.259866,27.127486,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.126236,22.425944,22.509278,22.509278,0.0,0.0,0.0,0.0,-0.299544,-2.846573,0.0,0.0,14.22904,14.002181,20.44616,7.122734,6.293573,22.509278,9.024859,11.255598,18.383041,5.530754,2.927842,2.91769,1.644297,2.874732,40.892319,28.490936,94.403589
3,4,1,20,50,0.101542,22.808822,0,11.742872,4.454391,63.784476,15.946119,0.202709,28.313036,10.146007,5.504214,-12.662816,0.03402781,0.2995445,1,0,0,0,0,1,22.509278,18.383041,0.083334,0.0,25.35585,27.259866,27.127486,26.807732,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.299544,4.425781,22.725488,22.808822,0.0,0.0,0.0,0.0,-2.547028,-4.451044,0.0,0.0,16.627759,16.358716,22.65905,15.946119,6.293573,22.808822,22.808822,11.255598,22.509278,0.083334,2.927842,0.21181,10.766279,2.874732,45.3181,63.784476,94.403589
4,5,1,20,50,0.135756,25.35585,0,12.234987,7.896588,89.140326,17.828065,0.338464,28.313036,10.146007,2.957185,-15.209844,0.0342133,2.547028,1,0,0,0,0,1,22.808822,22.509278,18.383041,0.083334,27.259866,27.127486,26.807732,27.864715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.547028,2.846573,6.972809,25.272516,0.0,0.0,0.0,0.0,-1.904016,-1.771635,0.0,0.0,18.652046,18.328164,24.082336,22.264248,6.293573,25.35585,25.35585,11.255598,22.808822,18.383041,2.927842,1.801021,2.885502,2.874732,48.164673,89.056992,94.403589


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,area@AddMultiplyingDividing,u_in_cumsum@AddMultiplyingDividing,u_in_cummean@AddMultiplyingDividing,time_step_cumsum@AddMultiplyingDividing,breath_id__u_in__max@AddMultiplyingDividing,breath_id__u_in__mean@AddMultiplyingDividing,breath_id__u_in__diffmax@AddMultiplyingDividing,breath_id__u_in__diffmean@AddMultiplyingDividing,breath_time@AddBreathTimeAndUInTime,u_in_time@AddBreathTimeAndUInTime,R_dummy_20@RCDummry,R_dummy_5@RCDummry,R_dummy_50@RCDummry,C_dummy_10@RCDummry,C_dummy_20@RCDummry,C_dummy_50@RCDummry,u_in_lag1@LagFeatures,u_in_lag2@LagFeatures,u_in_lag3@LagFeatures,u_in_lag4@LagFeatures,u_in_lag-1@LagFeatures,u_in_lag-2@LagFeatures,u_in_lag-3@LagFeatures,u_in_lag-4@LagFeatures,u_out_lag1@LagFeatures,u_out_lag2@LagFeatures,u_out_lag3@LagFeatures,u_out_lag4@LagFeatures,u_out_lag-1@LagFeatures,u_out_lag-2@LagFeatures,u_out_lag-3@LagFeatures,u_out_lag-4@LagFeatures,u_in_lag1_diff@LagFeatures,u_in_lag2_diff@LagFeatures,u_in_lag3_diff@LagFeatures,u_in_lag4_diff@LagFeatures,u_out_lag1_diff@LagFeatures,u_out_lag2_diff@LagFeatures,u_out_lag3_diff@LagFeatures,u_out_lag4_diff@LagFeatures,u_in_lag-1_diff@LagFeatures,u_in_lag-2_diff@LagFeatures,u_out_lag-1_diff@LagFeatures,u_out_lag-2_diff@LagFeatures,u_in_ewm9@LagFeatures,u_in_ewm15@LagFeatures,u_in_rolling_mean2@LagFeatures,u_in_rolling_mean4@LagFeatures,u_in_rolling_mean15@LagFeatures,u_in_rolling_max2@LagFeatures,u_in_rolling_max4@LagFeatures,u_in_rolling_max15@LagFeatures,u_in_rolling_min2@LagFeatures,u_in_rolling_min4@LagFeatures,u_in_rolling_min15@LagFeatures,u_in_rolling_std2@LagFeatures,u_in_rolling_std4@LagFeatures,u_in_rolling_std15@LagFeatures,u_in_rolling_sum2@LagFeatures,u_in_rolling_sum4@LagFeatures,u_in_rolling_sum15@LagFeatures
0,1,0,5,20,0.0,0.0,0,0.0,0.0,0.0,0.0,37.542219,9.327338,37.542219,9.327338,6.623513e-07,1e-06,0,1,0,0,1,0,0.0,0.0,0.0,0.0,7.515046,14.651675,21.23061,26.320956,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-7.515046,-14.651675,0.0,0.0,0.0,0.0,7.312916,7.138082,6.302657,8.146446,9.042205,11.266123,6.479387,5.545624,2.935795,1.178789,1.645458,2.877506,14.625833,28.552326,94.53985
1,2,0,5,20,0.031904,7.515046,0,0.239758,7.515046,3.757523,0.031904,37.542219,9.327338,30.027173,1.812292,0.03190374,7.515046,0,1,0,0,1,0,0.0,0.0,0.0,0.0,14.651675,21.23061,26.320956,30.486938,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.515046,7.515046,7.515046,7.515046,0.0,0.0,0.0,0.0,-7.13663,-13.715564,0.0,0.0,3.902147,3.844325,3.757523,7.138082,6.302657,7.515046,9.042205,11.266123,0.0,5.545624,2.935795,5.31394,1.645458,2.877506,7.515046,28.552326,94.53985
2,3,0,5,20,0.063827,14.651675,0,1.174935,22.166721,7.388907,0.095731,37.542219,9.327338,22.890543,-5.324338,0.03192353,7.13663,0,1,0,0,1,0,7.515046,0.0,0.0,0.0,21.23061,26.320956,30.486938,33.54595,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.13663,14.651675,14.651675,14.651675,0.0,0.0,0.0,0.0,-6.578935,-11.669281,0.0,0.0,7.764551,7.614466,11.08336,7.138082,6.302657,14.651675,9.042205,11.266123,7.515046,5.545624,2.935795,5.046359,1.645458,2.877506,22.166721,28.552326,94.53985
3,4,0,5,20,0.095751,21.23061,0,3.207788,43.397331,10.849333,0.191482,37.542219,9.327338,16.311609,-11.903272,0.03192377,6.578935,0,1,0,0,1,0,14.651675,7.515046,0.0,0.0,26.320956,30.486938,33.54595,35.7176,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.578935,13.715564,21.23061,21.23061,0.0,0.0,0.0,0.0,-5.090346,-9.256328,0.0,0.0,11.529365,11.257957,17.941143,10.849333,6.302657,21.23061,21.23061,11.266123,14.651675,0.0,2.935795,4.652009,9.147936,2.877506,35.882285,43.397331,94.53985
4,5,0,5,20,0.127644,26.320956,0,6.567489,69.718287,13.943657,0.319126,37.542219,9.327338,11.221263,-16.993619,0.03189254,5.090346,0,1,0,0,1,0,21.23061,14.651675,7.515046,0.0,30.486938,33.54595,35.7176,36.971061,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.090346,11.669281,18.805911,26.320956,0.0,0.0,0.0,0.0,-4.165982,-7.224994,0.0,0.0,14.959946,14.555207,23.775783,17.429572,6.302657,26.320956,26.320956,11.266123,21.23061,7.515046,2.935795,3.599418,8.155144,2.877506,47.551566,69.718287,94.53985


# normalization

In [15]:
train_col_order = ["u_out"] + train.columns.drop("u_out").tolist()
test_col_order = ["u_out"] + test.columns.drop("u_out").tolist()
train = train[train_col_order]
test = test[test_col_order]
scaler = RobustScaler()
scaler_targets = [col for col in CFG.cont_seq_cols if col != "u_out"]
print(f"Apply Standerd Scaler these columns: {scaler_targets}")
for scaler_target in tqdm(scaler_targets):
    scaler.fit(train.loc[:,[scaler_target]])
    train.loc[:,[scaler_target]] = scaler.transform(train.loc[:,[scaler_target]])
    test.loc[:,[scaler_target]] = scaler.transform(test.loc[:,[scaler_target]])
display(train.head())
display(test.head())

Apply Standerd Scaler these columns: ['R', 'breath_id__u_in__diffmax@AddMultiplyingDividing', 'u_in_cumsum@AddMultiplyingDividing', 'u_in_rolling_mean2@LagFeatures', 'u_in_lag2@LagFeatures', 'u_in_rolling_sum4@LagFeatures', 'u_in_lag2_diff@LagFeatures', 'u_in_rolling_sum2@LagFeatures', 'u_in_cummean@AddMultiplyingDividing', 'u_in_lag-4@LagFeatures', 'u_in_lag3@LagFeatures', 'u_in_lag-1_diff@LagFeatures', 'C_dummy_10@RCDummry', 'u_in_lag1@LagFeatures', 'u_in_ewm15@LagFeatures', 'u_in_rolling_mean15@LagFeatures', 'C_dummy_50@RCDummry', 'u_in_rolling_std2@LagFeatures', 'u_in_lag-2@LagFeatures', 'u_out_lag1@LagFeatures', 'u_in_lag4_diff@LagFeatures', 'u_in_rolling_max4@LagFeatures', 'C', 'u_in_rolling_std15@LagFeatures', 'u_in_rolling_mean4@LagFeatures', 'u_out_lag1_diff@LagFeatures', 'u_in_rolling_min4@LagFeatures', 'u_out_lag4@LagFeatures', 'u_out_lag-3@LagFeatures', 'u_in_rolling_max2@LagFeatures', 'R_dummy_20@RCDummry', 'time_step', 'u_out_lag-1@LagFeatures', 'u_out_lag2_diff@LagFeatur

  0%|          | 0/65 [00:00<?, ?it/s]

Unnamed: 0,u_out,id,breath_id,R,C,time_step,u_in,pressure,area@AddMultiplyingDividing,u_in_cumsum@AddMultiplyingDividing,u_in_cummean@AddMultiplyingDividing,time_step_cumsum@AddMultiplyingDividing,breath_id__u_in__max@AddMultiplyingDividing,breath_id__u_in__mean@AddMultiplyingDividing,breath_id__u_in__diffmax@AddMultiplyingDividing,breath_id__u_in__diffmean@AddMultiplyingDividing,breath_time@AddBreathTimeAndUInTime,u_in_time@AddBreathTimeAndUInTime,R_dummy_20@RCDummry,R_dummy_5@RCDummry,R_dummy_50@RCDummry,C_dummy_10@RCDummry,C_dummy_20@RCDummry,C_dummy_50@RCDummry,u_in_lag1@LagFeatures,u_in_lag2@LagFeatures,u_in_lag3@LagFeatures,u_in_lag4@LagFeatures,u_in_lag-1@LagFeatures,u_in_lag-2@LagFeatures,u_in_lag-3@LagFeatures,u_in_lag-4@LagFeatures,u_out_lag1@LagFeatures,u_out_lag2@LagFeatures,u_out_lag3@LagFeatures,u_out_lag4@LagFeatures,u_out_lag-1@LagFeatures,u_out_lag-2@LagFeatures,u_out_lag-3@LagFeatures,u_out_lag-4@LagFeatures,u_in_lag1_diff@LagFeatures,u_in_lag2_diff@LagFeatures,u_in_lag3_diff@LagFeatures,u_in_lag4_diff@LagFeatures,u_out_lag1_diff@LagFeatures,u_out_lag2_diff@LagFeatures,u_out_lag3_diff@LagFeatures,u_out_lag4_diff@LagFeatures,u_in_lag-1_diff@LagFeatures,u_in_lag-2_diff@LagFeatures,u_out_lag-1_diff@LagFeatures,u_out_lag-2_diff@LagFeatures,u_in_ewm9@LagFeatures,u_in_ewm15@LagFeatures,u_in_rolling_mean2@LagFeatures,u_in_rolling_mean4@LagFeatures,u_in_rolling_mean15@LagFeatures,u_in_rolling_max2@LagFeatures,u_in_rolling_max4@LagFeatures,u_in_rolling_max15@LagFeatures,u_in_rolling_min2@LagFeatures,u_in_rolling_min4@LagFeatures,u_in_rolling_min15@LagFeatures,u_in_rolling_std2@LagFeatures,u_in_rolling_std4@LagFeatures,u_in_rolling_std15@LagFeatures,u_in_rolling_sum2@LagFeatures,u_in_rolling_sum4@LagFeatures,u_in_rolling_sum15@LagFeatures
0,0,1,1,0.0,0.75,-0.989052,-0.937384,5.837492,-0.516581,-0.725228,-0.648438,-0.496658,0.112208,0.908069,0.326941,1.273864,-15.475736,-0.044814,1.0,0.0,0.0,0.0,0.0,1.0,-0.864121,-0.842527,-0.819258,-0.794152,2.890207,3.664026,3.743915,4.277824,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.471409,0.217993,0.121028,0.068315,0.0,0.0,0.0,0.0,-124.171952,-67.568904,0.0,0.0,-0.674415,-0.645875,0.665678,0.479105,0.369347,0.726592,0.643385,0.88922,0.459244,0.344693,0.316541,2.593278,1.618068,0.575588,0.665678,0.479105,0.369347
1,0,2,1,0.0,0.75,-0.963608,3.049278,5.907794,-0.514031,-0.676829,0.162935,-0.496024,0.112208,0.908069,-0.178436,-1.450205,0.103311,124.171952,1.0,0.0,0.0,0.0,0.0,1.0,-0.84736,-0.842527,-0.819258,-0.794152,3.739712,3.724218,4.256189,4.661047,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,113.467773,58.76267,38.091005,26.30625,0.0,0.0,0.0,0.0,-27.963705,-13.306889,0.0,0.0,0.822722,0.602444,1.122077,0.479105,0.369347,2.832561,0.643385,0.88922,-0.823539,0.344693,0.316541,29.984876,1.618068,0.575588,1.122077,0.479105,0.369347
2,0,3,1,0.0,0.75,-0.938006,3.948195,7.876254,-0.507768,-0.617568,0.555359,-0.494752,0.112208,0.908069,-0.292389,-2.06443,0.200442,27.963705,1.0,0.0,0.0,0.0,0.0,1.0,2.83314,-0.825784,-0.819258,-0.794152,3.801381,4.236036,4.639136,4.634403,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,25.549576,71.696779,46.652513,32.222406,0.0,0.0,0.0,0.0,-1.988466,-8.54632,0.0,0.0,1.55438,1.210203,3.764462,0.479105,0.369347,3.680142,0.643385,0.88922,2.856483,0.344693,0.316541,6.641297,1.618068,0.575588,3.764462,0.479105,0.369347
3,0,4,1,0.0,0.75,-0.912278,4.013452,11.742872,-0.498222,-0.557517,0.758212,-0.492839,0.112208,0.908069,-0.300662,-2.10902,0.277152,1.988466,1.0,0.0,0.0,0.0,0.0,1.0,3.663022,2.850764,-0.802501,-0.794152,4.32576,4.618643,4.612511,4.570046,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.812565,14.110409,47.101127,32.65189,0.0,0.0,0.0,0.0,-17.244182,-13.383045,0.0,0.0,1.932321,1.524447,4.28594,2.122094,0.369347,3.741672,2.723238,0.88922,3.686258,-0.754615,0.316541,0.33877,11.831559,0.575588,4.28594,2.122094,0.369347
4,0,5,1,0.0,0.75,-0.886409,4.568332,12.234987,-0.484036,-0.490761,0.925096,-0.490282,0.112208,0.908069,-0.371002,-2.488167,0.363024,17.244182,1.0,0.0,0.0,0.0,0.0,1.0,3.723268,3.679756,2.877119,-0.777379,4.717757,4.592042,4.5482,4.782785,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,15.75373,9.058184,14.415967,36.184309,0.0,0.0,0.0,0.0,-12.879471,-5.30589,0.0,0.0,2.251268,1.787073,4.621343,3.298583,0.369347,4.264863,3.107558,0.88922,3.746496,2.938328,0.316541,4.040355,3.007792,0.575588,4.621343,3.298583,0.369347


Unnamed: 0,u_out,id,breath_id,R,C,time_step,u_in,area@AddMultiplyingDividing,u_in_cumsum@AddMultiplyingDividing,u_in_cummean@AddMultiplyingDividing,time_step_cumsum@AddMultiplyingDividing,breath_id__u_in__max@AddMultiplyingDividing,breath_id__u_in__mean@AddMultiplyingDividing,breath_id__u_in__diffmax@AddMultiplyingDividing,breath_id__u_in__diffmean@AddMultiplyingDividing,breath_time@AddBreathTimeAndUInTime,u_in_time@AddBreathTimeAndUInTime,R_dummy_20@RCDummry,R_dummy_5@RCDummry,R_dummy_50@RCDummry,C_dummy_10@RCDummry,C_dummy_20@RCDummry,C_dummy_50@RCDummry,u_in_lag1@LagFeatures,u_in_lag2@LagFeatures,u_in_lag3@LagFeatures,u_in_lag4@LagFeatures,u_in_lag-1@LagFeatures,u_in_lag-2@LagFeatures,u_in_lag-3@LagFeatures,u_in_lag-4@LagFeatures,u_out_lag1@LagFeatures,u_out_lag2@LagFeatures,u_out_lag3@LagFeatures,u_out_lag4@LagFeatures,u_out_lag-1@LagFeatures,u_out_lag-2@LagFeatures,u_out_lag-3@LagFeatures,u_out_lag-4@LagFeatures,u_in_lag1_diff@LagFeatures,u_in_lag2_diff@LagFeatures,u_in_lag3_diff@LagFeatures,u_in_lag4_diff@LagFeatures,u_out_lag1_diff@LagFeatures,u_out_lag2_diff@LagFeatures,u_out_lag3_diff@LagFeatures,u_out_lag4_diff@LagFeatures,u_in_lag-1_diff@LagFeatures,u_in_lag-2_diff@LagFeatures,u_out_lag-1_diff@LagFeatures,u_out_lag-2_diff@LagFeatures,u_in_ewm9@LagFeatures,u_in_ewm15@LagFeatures,u_in_rolling_mean2@LagFeatures,u_in_rolling_mean4@LagFeatures,u_in_rolling_mean15@LagFeatures,u_in_rolling_max2@LagFeatures,u_in_rolling_max4@LagFeatures,u_in_rolling_max15@LagFeatures,u_in_rolling_min2@LagFeatures,u_in_rolling_min4@LagFeatures,u_in_rolling_min15@LagFeatures,u_in_rolling_std2@LagFeatures,u_in_rolling_std4@LagFeatures,u_in_rolling_std15@LagFeatures,u_in_rolling_sum2@LagFeatures,u_in_rolling_sum4@LagFeatures,u_in_rolling_sum15@LagFeatures
0,0,1,0,-0.333333,0.0,-0.989052,-0.955539,-0.516581,-0.725447,-0.655828,-0.496658,0.334664,0.749398,0.584122,1.164403,-15.475632,-0.044807,0.0,1.0,0.0,0.0,1.0,0.0,-0.864121,-0.842527,-0.819258,-0.794152,0.652718,2.085062,3.426496,4.472072,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-0.045513,-0.04861,-0.051881,-0.051169,0.0,0.0,0.0,0.0,-50.966638,-44.133146,0.0,0.0,-0.687545,-0.656988,0.669557,0.481963,0.371377,0.729835,0.646003,0.890709,0.46269,0.347693,0.318837,2.591054,1.619368,0.576711,0.669557,0.481963,0.371377
1,0,2,0,-0.333333,0.0,-0.96493,0.681643,-0.515592,-0.705662,-0.322625,-0.496057,0.334664,0.749398,0.376581,0.045724,-0.706181,50.966638,0.0,1.0,0.0,0.0,1.0,0.0,-0.864121,-0.842527,-0.819258,-0.794152,2.121998,3.407081,4.450297,5.310562,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,46.570397,23.993628,15.541051,10.723828,0.0,0.0,0.0,0.0,-48.397986,-41.311212,0.0,0.0,-0.072723,-0.144347,-0.168287,0.481963,0.371377,0.600138,0.646003,0.890709,-0.840297,0.347693,0.318837,12.222635,1.619368,0.576711,-0.168287,0.481963,0.371377
2,0,3,0,-0.333333,0.0,-0.940793,2.236385,-0.511738,-0.667087,-0.000608,-0.494854,0.334664,0.749398,0.179491,-1.016625,-0.69702,48.397986,0.0,1.0,0.0,0.0,1.0,0.0,0.647331,-0.842527,-0.819258,-0.794152,3.476461,4.429972,5.288183,5.926252,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,44.22308,46.825233,30.348811,20.956255,0.0,0.0,0.0,0.0,-44.612401,-35.14263,0.0,0.0,0.535836,0.358401,1.558079,0.481963,0.371377,2.066091,0.646003,0.890709,0.670958,0.347693,0.318837,11.599388,1.619368,0.576711,1.558079,0.481963,0.371377
3,0,4,0,-0.333333,0.0,-0.916656,3.669632,-0.50336,-0.611192,0.306249,-0.493051,0.334664,0.749398,-0.002197,-1.995956,-0.696909,44.612401,0.0,1.0,0.0,0.0,1.0,0.0,2.082674,0.667302,-0.819258,-0.794152,4.524455,5.267114,5.90343,6.363341,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,40.76369,43.830413,43.999411,30.389065,0.0,0.0,0.0,0.0,-34.507996,-27.868713,0.0,0.0,1.12902,0.844261,3.174144,1.173029,0.371377,3.417487,2.485103,0.890709,2.106115,-0.771432,0.318837,10.680869,10.01957,0.576711,3.174144,1.173029,0.371377
4,0,5,0,-0.333333,0.0,-0.892543,4.778584,-0.489513,-0.541895,0.580642,-0.490646,0.334664,0.749398,-0.142776,-2.753697,-0.711369,34.507996,0.0,1.0,0.0,0.0,1.0,0.0,3.405852,2.101105,0.691833,-0.794152,5.382142,5.881814,6.340205,6.615627,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,31.52996,37.283914,38.968414,37.687552,0.0,0.0,0.0,0.0,-28.233492,-21.745198,0.0,0.0,1.669541,1.283949,4.549102,2.398325,0.371377,4.463108,3.253182,0.890709,3.429121,0.745129,0.318837,8.229177,8.907984,0.576711,4.549102,2.398325,0.371377


In [16]:
pca = PCA(n_components=15)
pca.fit(train[[col for col in CFG.cont_seq_cols if col != "u_out"]])
train_pca_feature = pca.transform(train[[col for col in CFG.cont_seq_cols if col != "u_out"]])
test_pca_feature = pca.transform(test[[col for col in CFG.cont_seq_cols if col != "u_out"]])

In [32]:
train_pca_df = pd.DataFrame(train_pca_feature, columns=["pca{}".format(x + 1) for x in range(15)])
test_pca_df = pd.DataFrame(test_pca_feature, columns=["pca{}".format(x + 1) for x in range(15)])

In [33]:
len(train_pca_feature)

6036000

In [35]:
train = pd.concat([train, train_pca_df], axis=1)
test = pd.concat([test, train_pca_df], axis=1)
CFG.cont_seq_cols += ["pca{}".format(x + 1) for x in range(15)]

In [36]:
train.tail()

Unnamed: 0,u_out,id,breath_id,R,C,time_step,u_in,pressure,area@AddMultiplyingDividing,u_in_cumsum@AddMultiplyingDividing,u_in_cummean@AddMultiplyingDividing,time_step_cumsum@AddMultiplyingDividing,breath_id__u_in__max@AddMultiplyingDividing,breath_id__u_in__mean@AddMultiplyingDividing,breath_id__u_in__diffmax@AddMultiplyingDividing,breath_id__u_in__diffmean@AddMultiplyingDividing,breath_time@AddBreathTimeAndUInTime,u_in_time@AddBreathTimeAndUInTime,R_dummy_20@RCDummry,R_dummy_5@RCDummry,R_dummy_50@RCDummry,C_dummy_10@RCDummry,C_dummy_20@RCDummry,C_dummy_50@RCDummry,u_in_lag1@LagFeatures,u_in_lag2@LagFeatures,u_in_lag3@LagFeatures,u_in_lag4@LagFeatures,u_in_lag-1@LagFeatures,u_in_lag-2@LagFeatures,u_in_lag-3@LagFeatures,u_in_lag-4@LagFeatures,u_out_lag1@LagFeatures,u_out_lag2@LagFeatures,u_out_lag3@LagFeatures,u_out_lag4@LagFeatures,u_out_lag-1@LagFeatures,u_out_lag-2@LagFeatures,u_out_lag-3@LagFeatures,u_out_lag-4@LagFeatures,u_in_lag1_diff@LagFeatures,u_in_lag2_diff@LagFeatures,u_in_lag3_diff@LagFeatures,u_in_lag4_diff@LagFeatures,u_out_lag1_diff@LagFeatures,u_out_lag2_diff@LagFeatures,u_out_lag3_diff@LagFeatures,u_out_lag4_diff@LagFeatures,u_in_lag-1_diff@LagFeatures,u_in_lag-2_diff@LagFeatures,u_out_lag-1_diff@LagFeatures,u_out_lag-2_diff@LagFeatures,u_in_ewm9@LagFeatures,u_in_ewm15@LagFeatures,u_in_rolling_mean2@LagFeatures,u_in_rolling_mean4@LagFeatures,u_in_rolling_mean15@LagFeatures,u_in_rolling_max2@LagFeatures,u_in_rolling_max4@LagFeatures,u_in_rolling_max15@LagFeatures,u_in_rolling_min2@LagFeatures,u_in_rolling_min4@LagFeatures,u_in_rolling_min15@LagFeatures,u_in_rolling_std2@LagFeatures,u_in_rolling_std4@LagFeatures,u_in_rolling_std15@LagFeatures,u_in_rolling_sum2@LagFeatures,u_in_rolling_sum4@LagFeatures,u_in_rolling_sum15@LagFeatures,pca1,pca2,pca3,pca4,pca5,pca6,pca7,pca8,pca9,pca10,pca11,pca12,pca13,pca14,pca15,pca1.1,pca2.1,pca3.1,pca4.1,pca5.1,pca6.1,pca7.1,pca8.1,pca9.1,pca10.1,pca11.1,pca12.1,pca13.1,pca14.1,pca15.1
6035995,1,6035996,125749,0.666667,-0.25,0.904641,-0.630999,3.869032,0.596562,0.293724,-0.204152,1.295715,0.044505,-0.106479,0.210531,0.285296,-0.008168,0.423565,0.0,0.0,1.0,1.0,0.0,0.0,-0.578382,-0.570658,-0.546281,-0.519965,-0.58802,-0.545879,-0.587564,-0.527135,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.382508,0.38811,0.222269,0.131571,0.0,0.0,0.0,0.0,0.053074,-0.174043,0.0,0.0,-0.431204,-0.350302,-0.710836,-0.585529,-0.750261,-0.637541,-0.493588,-0.492746,-0.554596,-0.498351,-0.207135,-0.040931,-0.151132,-0.540822,-0.710836,-0.585529,-0.750261,,,,,,,,,,,,,,,,-0.02227,-2.19315,-0.123503,3.145181,-3.036665,-2.007939,-3.242517,2.660101,1.495772,-0.673589,1.195218,0.379592,0.810241,0.278495,0.046489
6035996,1,6035997,125749,0.666667,-0.25,0.929862,-0.631264,3.869032,0.612131,0.297643,-0.208304,1.343525,0.044505,-0.106479,0.210564,0.285477,-0.033113,-0.053074,0.0,0.0,1.0,1.0,0.0,0.0,-0.564504,-0.557095,-0.547162,-0.520905,-0.57351,-0.603413,-0.545312,-0.825567,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-0.05306,0.16825,0.228835,0.136529,0.0,0.0,0.0,0.0,-0.433603,0.685393,0.0,0.0,-0.432825,-0.355331,-0.702849,-0.579435,-0.744676,-0.637541,-0.493588,-0.492746,-0.540965,-0.498351,-0.200158,-0.152573,-0.150094,-0.54022,-0.702849,-0.579435,-0.744676,,,,,,,,,,,,,,,,-0.564868,-2.129272,-0.011665,2.281173,-2.874186,-2.059314,-3.84247,2.716583,1.469867,-0.594545,1.202232,0.279483,0.806999,0.2512,-0.138407
6035997,1,6035998,125749,0.666667,-0.25,0.955151,-0.61591,3.798729,0.628653,0.301748,-0.212269,1.391965,0.044505,-0.106479,0.208618,0.274986,0.008168,0.433603,0.0,0.0,1.0,1.0,0.0,0.0,-0.564749,-0.543232,-0.533588,-0.521787,-0.632456,-0.561199,-0.84353,-0.825567,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,0.391682,0.172981,0.235007,0.243866,0.0,0.0,0.0,0.0,1.988293,0.26458,0.0,0.0,-0.4335,-0.359684,-0.694688,-0.569856,-0.738846,-0.623313,-0.483137,-0.482945,-0.540965,-0.484728,-0.200158,-0.038495,-0.159776,-0.536275,-0.694688,-0.569856,-0.738846,,,,,,,,,,,,,,,,0.552404,-0.473242,-0.83621,3.512519,-2.822012,-1.89559,-3.343076,2.700231,1.565531,-0.667235,1.130878,0.184303,0.790808,0.281753,-0.354565
6035998,1,6035999,125749,0.666667,-0.25,0.980357,-0.678284,4.079938,0.642315,0.305098,-0.216455,1.441033,0.044505,-0.106479,0.216525,0.317606,-0.042715,-1.988293,0.0,0.0,1.0,1.0,0.0,0.0,-0.550574,-0.543476,-0.519713,-0.5082,-0.589206,-0.859152,-0.84353,-0.825567,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.821526,-0.739108,-0.502238,-0.263439,0.0,0.0,0.0,0.0,-1.381161,3.871236,0.0,1.0,-0.437477,-0.3656,-0.720119,-0.576748,-0.737051,-0.623313,-0.483137,-0.482945,-0.584368,-0.514605,-0.200158,0.316981,-0.083796,-0.539759,-0.720119,-0.576748,-0.737051,,,,,,,,,,,,,,,,-2.706575,-1.164545,-0.345889,-1.104142,-2.401967,-1.863116,-5.282372,3.229845,1.767725,-0.647028,1.24162,0.097265,0.73522,0.186516,-0.213221
6035999,1,6036000,125749,0.666667,-0.25,1.005514,-0.632519,3.869032,0.658436,0.309002,-0.220304,1.490728,0.044505,-0.106479,0.210723,0.286334,-0.072296,1.381161,0.0,0.0,1.0,1.0,0.0,0.0,-0.608158,-0.529316,-0.519958,-0.494312,-0.89447,-0.859152,-0.84353,-0.825567,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,1.25759,-0.292514,-0.063828,-0.061169,0.0,0.0,0.0,0.0,10.109515,4.504516,1.0,1.0,-0.438699,-0.369937,-0.729102,-0.577073,-0.731912,-0.638974,-0.483137,-0.482945,-0.584368,-0.514605,-0.186691,0.191416,-0.084523,-0.543402,-0.729102,-0.577073,-0.731912,,,,,,,,,,,,,,,,4.625591,6.830993,-4.529933,3.037909,-2.114951,-1.17925,-4.348264,3.198298,1.686654,-0.702875,1.202957,-0.020512,0.68124,0.060858,-0.280465


# reshape

In [12]:
print(set(train.drop(["id", "breath_id", "pressure"], axis=1).columns) - set(CFG.cont_seq_cols))
print(train.drop(["id", "breath_id", "pressure"], axis=1).shape)
print(len(CFG.cont_seq_cols))

X = np.float32(train.drop(["id", "breath_id", "pressure"], axis=1)).reshape(-1, 80, len(CFG.cont_seq_cols))
y = np.float32(train["pressure"]).reshape(-1, 80, 1)
X_test = np.float32(test.drop(["id", "breath_id"], axis=1)).reshape(-1, 80, len(CFG.cont_seq_cols))

set()
(6036000, 66)
66
