# About this notebook  
- PyTorch RNN starter code with W&B  
- Pytorch W&B Usage Examples from https://docs.wandb.ai/guides/integrations/pytorch  

If this notebook is helpful, feel free to upvote :)

![](https://raw.githubusercontent.com/google/deluca-lung/main/assets/2020-10-02%20Ventilator%20diagram.svg)

In [1]:
# for local
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
# ====================================================
# Directory settings
# ====================================================
import os

EXP_NAME='1005_base_3layer'

OUTPUT_DIR = f'./results/{EXP_NAME}/'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [3]:
# ====================================================
# CFG
# ====================================================
class CFG:
    experiment_name=EXP_NAME
    competition='ventilator'
    _wandb_kernel='hypknot'
    apex=False
    print_freq=20
    num_workers=4
    model_name='rnn'
    scheduler='CosineAnnealingLR' # ['linear', 'cosine', 'ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    batch_scheduler=False
    #num_warmup_steps=100 # ['linear', 'cosine']
    #num_cycles=0.5 # 'cosine'
    #factor=0.2 # ReduceLROnPlateau
    #patience=4 # ReduceLROnPlateau
    #eps=1e-6 # ReduceLROnPlateau
    T_max=50 # CosineAnnealingLR
    #T_0=50 # CosineAnnealingWarmRestarts
    epochs=30
    max_grad_norm=1000
    gradient_accumulation_steps=1
    hidden_size=1024
    hidden2_size=256
    lr=5e-3
    min_lr=1e-6
    weight_decay=1e-6
    batch_size=1024
    n_fold=5
    trn_fold=[0, 1, 2, 3, 4]
    cate_seq_cols=['R', 'C']
    cont_seq_cols=['time_step', 'u_in', 'u_out'] \
            + ['area', 'u_in_cumsum', 'u_in_lag1', 'u_in_lag2', 'u_in_lag3', 'u_in_lag4',
               'u_out_lag1', 'u_out_lag2', 'u_out_lag3', 'u_out_lag4',
               'u_in_lag_back1', 'u_in_lag_back2', 'u_in_lag_back3', 'u_in_lag_back4',
               'u_out_lag_back1', 'u_out_lag_back2', 'u_out_lag_back3', 'u_out_lag_back4',
               'breath_id__u_in__max', 'breath_id__u_out__max',
               'u_in_diff1', 'u_in_diff2', 'u_out_diff1', 'u_out_diff2',
               'breath_id__u_in__diffmax', 'breath_id__u_in__diffmean',
               'u_in_diff3', 'u_in_diff4', 'u_out_diff3', 'u_out_diff4', 'cross', 'cross2']
    train=True
    inference=True

In [4]:
# ====================================================
# Library
# ====================================================
import os
import gc
import sys
import json
import time
import math
import random
from datetime import datetime
from collections import Counter, defaultdict

import scipy as sp
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

from tqdm.auto import tqdm
import category_encoders as ce

from sklearn import preprocessing
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

from transformers import AdamW
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

import warnings
warnings.filterwarnings("ignore")

if CFG.apex:
    from apex import amp

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
# ====================================================
# wandb
# ====================================================
import wandb

# try:
#     from kaggle_secrets import UserSecretsClient
#     user_secrets = UserSecretsClient()
#     secret_value_0 = user_secrets.get_secret("wandb_api")
#     wandb.login(key=secret_value_0)
#     anony = None
# except:
#     anony = "must"
#     print('If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token. Use the Label name as wandb_api. \nGet your W&B access token from here: https://wandb.ai/authorize')

anony=None # not for kaggle kernel
    
def class2dict(f):
    return dict((name, getattr(f, name)) for name in dir(f) if not name.startswith('__'))

run = wandb.init(project="Ventilator-Pressure-Public", 
                 # name=CFG.model_name,
                 config=class2dict(CFG),
                 group=CFG.model_name,
                 job_type="train",
                 anonymous=anony)

[34m[1mwandb[0m: Currently logged in as: [33mhypknot[0m (use `wandb login --relogin` to force relogin)

CondaEnvException: Unable to determine environment

Please re-run this command with one of the following options:

* Provide an environment name via --name or -n
* Re-run this command inside an activated conda environment.



In [6]:
# ====================================================
# Utils
# ====================================================
def get_score(y_trues, y_preds):
    score = mean_absolute_error(y_trues, y_preds)
    return score


def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything()

In [7]:
# ====================================================
# Data Loading
# ====================================================
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
sub = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

for c in ['u_in']:
    train[c] = np.log1p(train[c])
    test[c] = np.log1p(test[c])
    
r_map = {5: 0, 20: 1, 50: 2}
c_map = {10: 0, 20: 1, 50: 2}
train['R'] = train['R'].map(r_map)
test['R'] = test['R'].map(r_map)
train['C'] = train['C'].map(c_map)
test['C'] = test['C'].map(c_map)

display(train.head())
display(test.head())
display(sub.head())

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,1,2,0.0,0.080043,0,5.837492
1,2,1,1,2,0.033652,2.964399,0,5.907794
2,3,1,1,2,0.067514,3.157395,0,7.876254
3,4,1,1,2,0.101542,3.170056,0,11.742872
4,5,1,1,2,0.135756,3.27169,0,12.234987


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,0,1,0.0,0.0,0
1,2,0,0,1,0.031904,2.141835,0
2,3,0,0,1,0.063827,2.750578,0
3,4,0,0,1,0.095751,3.10147,0
4,5,0,0,1,0.127644,3.307654,0


Unnamed: 0,id,pressure
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0


In [8]:
# ====================================================
# FE
# ====================================================
def add_feature(df):
#     # breath_time
#     df['breath_time'] = df['time_step'] - df['time_step'].shift(1)
#     df.loc[df['time_step'] == 0, 'breath_time'] = 0
#     # u_in_time
#     df['u_in_time'] = df['u_in'] - df['u_in'].shift(1)
#     df.loc[df['time_step'] == 0, 'u_in_time'] = 0
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()

    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()

    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
    df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)
    df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)
    df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
    df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)
    df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)
    df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)
    df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)
    df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)
    df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)
    df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)
    df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)
    df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)
    df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)
    df = df.fillna(0)

    df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')
    df['breath_id__u_out__max'] = df.groupby(['breath_id'])['u_out'].transform('max')

    df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
    df['u_out_diff1'] = df['u_out'] - df['u_out_lag1']
    df['u_in_diff2'] = df['u_in'] - df['u_in_lag2']
    df['u_out_diff2'] = df['u_out'] - df['u_out_lag2']

    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']

    df['u_in_diff3'] = df['u_in'] - df['u_in_lag3']
    df['u_out_diff3'] = df['u_out'] - df['u_out_lag3']
    df['u_in_diff4'] = df['u_in'] - df['u_in_lag4']
    df['u_out_diff4'] = df['u_out'] - df['u_out_lag4']
    df['cross']= df['u_in']*df['u_out']
    df['cross2']= df['time_step']*df['u_out']
    return df


train = add_feature(train)
test = add_feature(test)

In [9]:
# ====================================================
# CV split
# ====================================================
Fold = GroupKFold(n_splits=5)
groups = train['breath_id'].values
for n, (train_index, val_index) in enumerate(Fold.split(train, train['pressure'], groups)):
    train.loc[val_index, 'fold'] = int(n)
train['fold'] = train['fold'].astype(int)
print(train.groupby('fold').size())

fold
0    1207200
1    1207200
2    1207200
3    1207200
4    1207200
dtype: int64


In [10]:
# ====================================================
# Dataset
# ====================================================
class TrainDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.groups = df.groupby('breath_id').groups
        self.keys = list(self.groups.keys())
        
    def __len__(self):
        return len(self.groups)

    def __getitem__(self, idx):
        indexes = self.groups[self.keys[idx]]
        df = self.df.iloc[indexes]
        cate_seq_x = torch.LongTensor(df[CFG.cate_seq_cols].values)
        cont_seq_x = torch.FloatTensor(df[CFG.cont_seq_cols].values)
        u_out = torch.LongTensor(df['u_out'].values)
        label = torch.FloatTensor(df['pressure'].values)
        return cate_seq_x, cont_seq_x, u_out, label
    

class TestDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.groups = df.groupby('breath_id').groups
        self.keys = list(self.groups.keys())
        
    def __len__(self):
        return len(self.groups)

    def __getitem__(self, idx):
        indexes = self.groups[self.keys[idx]]
        df = self.df.iloc[indexes]
        cate_seq_x = torch.LongTensor(df[CFG.cate_seq_cols].values)
        cont_seq_x = torch.FloatTensor(df[CFG.cont_seq_cols].values)
        return cate_seq_x, cont_seq_x

In [11]:
# ====================================================
# Model
# ====================================================
class CustomModel(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.hidden_size = cfg.hidden_size
        self.hidden2_size = cfg.hidden2_size
        self.r_emb = nn.Embedding(3, 2, padding_idx=0)
        self.c_emb = nn.Embedding(3, 2, padding_idx=0)
        self.seq_emb = nn.Sequential(
            nn.Linear(4 + len(cfg.cont_seq_cols), self.hidden_size),
            nn.LayerNorm(self.hidden_size),
            nn.ReLU(),
            nn.Dropout(0.2),
        )
        self.lstm = nn.LSTM(self.hidden_size, self.hidden2_size, num_layers=3, dropout=0.2, batch_first=True, bidirectional=True)
        self.head = nn.Sequential(
            nn.Linear(self.hidden2_size * 2, self.hidden2_size * 2),
            nn.LayerNorm(self.hidden2_size * 2),
            nn.ReLU(),
            nn.Dropout(0.),
            nn.Linear(self.hidden2_size * 2, 1),
        )
        for n, m in self.named_modules():
            if isinstance(m, nn.LSTM):
                print(f'init {m}')
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        nn.init.orthogonal_(param.data)
                    else:
                        nn.init.normal_(param.data)
            elif isinstance(m, nn.GRU):
                print(f"init {m}")
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        init.orthogonal_(param.data)
                    else:
                        init.normal_(param.data)

    def forward(self, cate_seq_x, cont_seq_x):
        bs = cont_seq_x.size(0)
        r_emb = self.r_emb(cate_seq_x[:,:,0]).view(bs, 80, -1)
        c_emb = self.c_emb(cate_seq_x[:,:,1]).view(bs, 80, -1)
        seq_x = torch.cat((r_emb, c_emb, cont_seq_x), 2)
        seq_emb = self.seq_emb(seq_x)
        lstm_emb, _ = self.lstm(seq_emb)
        output = self.head(lstm_emb).view(bs, -1)
        return output

In [12]:
# ====================================================
# helper function
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    losses = AverageMeter()
    start = end = time.time()
    for step, (cate_seq_x, cont_seq_x, u_out, y) in enumerate(train_loader):
        loss_mask = u_out == 0
        cate_seq_x, cont_seq_x, y = cate_seq_x.to(device), cont_seq_x.to(device), y.to(device)
        batch_size = cont_seq_x.size(0)
        pred = model(cate_seq_x, cont_seq_x)
        loss = 2. * criterion(pred[loss_mask], y[loss_mask]) + criterion(pred[loss_mask == 0], y[loss_mask == 0])
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        if CFG.apex:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.6f}  '
                  .format(
                   epoch+1, step, len(train_loader),
                   remain=timeSince(start, float(step+1)/len(train_loader)),
                   loss=losses,
                   grad_norm=grad_norm,
                   lr=scheduler.get_lr()[0],
                   ))
        wandb.log({f"[fold{fold}] loss": losses.val,
                   f"[fold{fold}] lr": scheduler.get_lr()[0]})
    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    model.eval()
    preds = []
    losses = AverageMeter()
    start = end = time.time()
    for step, (cate_seq_x, cont_seq_x, u_out, y) in enumerate(valid_loader):
        loss_mask = u_out == 0
        cate_seq_x, cont_seq_x, y = cate_seq_x.to(device), cont_seq_x.to(device), y.to(device)
        batch_size = cont_seq_x.size(0)
        with torch.no_grad():
            pred = model(cate_seq_x, cont_seq_x)
        loss = 2. * criterion(pred[loss_mask], y[loss_mask]) + criterion(pred[loss_mask == 0], y[loss_mask == 0])
        losses.update(loss.item(), batch_size)
        preds.append(pred.view(-1).detach().cpu().numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(
                   step, len(valid_loader),
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   loss=losses,
                   ))
    preds = np.concatenate(preds)
    return losses.avg, preds


def inference_fn(test_loader, model, device):
    model.eval()
    model.to(device)
    preds = []
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    for step, (cate_seq_x, cont_seq_x) in tk0:
        cate_seq_x, cont_seq_x = cate_seq_x.to(device), cont_seq_x.to(device)
        with torch.no_grad():
            pred = model(cate_seq_x, cont_seq_x)
        preds.append(pred.view(-1).detach().cpu().numpy())
    preds = np.concatenate(preds)
    return preds

In [13]:
# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):

    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index
    
    train_folds = train.loc[trn_idx].reset_index(drop=True)
    valid_folds = train.loc[val_idx].reset_index(drop=True)
    y_true = valid_folds['pressure'].values
    non_expiratory_phase_val_idx = valid_folds[valid_folds['u_out'] == 0].index # The expiratory phase is not scored

    train_dataset = TrainDataset(train_folds)
    valid_dataset = TrainDataset(valid_folds)

    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG)
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    
    def get_scheduler(optimizer):
        if CFG.scheduler=='linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=CFG.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif CFG.scheduler=='cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=CFG.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=CFG.num_cycles
            )
        elif CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler

    scheduler = get_scheduler(optimizer)

    # ====================================================
    # apex
    # ====================================================
    if CFG.apex:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.L1Loss()

    best_score = np.inf

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score = get_score(y_true[non_expiratory_phase_val_idx], preds[non_expiratory_phase_val_idx])

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - MAE Score (without expiratory phase): {score:.4f}')
        wandb.log({f"[fold{fold}] epoch": epoch+1, 
                   f"[fold{fold}] avg_train_loss": avg_loss, 
                   f"[fold{fold}] avg_val_loss": avg_val_loss,
                   f"[fold{fold}] score": score})
        
        if score < best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'preds': preds},
                        OUTPUT_DIR+f"fold{fold}_best.pth")
            
    preds = torch.load(OUTPUT_DIR+f"fold{fold}_best.pth", map_location=torch.device('cpu'))['preds']
    valid_folds['preds'] = preds

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [14]:
# ====================================================
# main
# ====================================================
def main():
    
    """
    Prepare: 1.train 2.test
    """
    
    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df['pressure'].values
        non_expiratory_phase_val_idx = result_df[result_df['u_out'] == 0].index # The expiratory phase is not scored
        score = get_score(labels[non_expiratory_phase_val_idx], preds[non_expiratory_phase_val_idx])
        LOGGER.info(f'Score (without expiratory phase): {score:<.4f}')
    
    if CFG.train:
        # train 
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
        # CV result
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        # save result
        oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)
    
    if CFG.inference:
        test_dataset = TestDataset(test)
        test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size * 2, shuffle=False, num_workers=CFG.num_workers, pin_memory=True)
        for fold in CFG.trn_fold:
            model = CustomModel(CFG)
            path = OUTPUT_DIR+f"fold{fold}_best.pth"
            state = torch.load(path, map_location=torch.device('cpu'))
            model.load_state_dict(state['model'])
            predictions = inference_fn(test_loader, model, device)
            test[f'fold{fold}'] = predictions
            del state, predictions; gc.collect()
            torch.cuda.empty_cache()
        # submission
        test['pressure'] = test[[f'fold{fold}' for fold in range(CFG.n_fold)]].mean(1)
        test[['id', 'pressure']+[f'fold{fold}' for fold in range(CFG.n_fold)]].to_csv(OUTPUT_DIR+'raw_submission.csv', index=False)
        test[['id', 'pressure']].to_csv(OUTPUT_DIR+'submission.csv', index=False)
    
    wandb.finish()

In [None]:
if __name__ == '__main__':
    main()



init LSTM(1024, 256, num_layers=3, batch_first=True, dropout=0.2, bidirectional=True)
Epoch: [1][0/58] Elapsed 0m 3s (remain 3m 18s) Loss: 41.8990(41.8990) Grad: 67.9699  LR: 0.005000  
Epoch: [1][20/58] Elapsed 0m 19s (remain 0m 34s) Loss: 21.4790(22.9011) Grad: 1.5393  LR: 0.005000  
Epoch: [1][40/58] Elapsed 0m 35s (remain 0m 14s) Loss: 14.6366(21.1521) Grad: 11.1205  LR: 0.005000  
Epoch: [1][57/58] Elapsed 0m 45s (remain 0m 0s) Loss: 12.4337(18.7844) Grad: 10.3181  LR: 0.005000  
EVAL: [0/15] Elapsed 0m 2s (remain 0m 39s) Loss: 11.7498(11.7498) 


Epoch 1 - avg_train_loss: 18.7844  avg_val_loss: 12.0200  time: 55s
Epoch 1 - MAE Score (without expiratory phase): 5.1806
Epoch 1 - Save Best Score: 5.1806 Model


EVAL: [14/15] Elapsed 0m 9s (remain 0m 0s) Loss: 11.6493(12.0200) 
Epoch: [2][0/58] Elapsed 0m 3s (remain 3m 7s) Loss: 12.0228(12.0228) Grad: 11.1056  LR: 0.004990  
Epoch: [2][20/58] Elapsed 0m 19s (remain 0m 34s) Loss: 11.2283(11.5993) Grad: 14.0395  LR: 0.004990  
Epoch: [2][40/58] Elapsed 0m 35s (remain 0m 14s) Loss: 8.0023(10.6250) Grad: 10.0130  LR: 0.004990  
Epoch: [2][57/58] Elapsed 0m 49s (remain 0m 0s) Loss: 8.4334(10.0705) Grad: 38.0136  LR: 0.004990  
EVAL: [0/15] Elapsed 0m 2s (remain 0m 37s) Loss: 7.4847(7.4847) 


Epoch 2 - avg_train_loss: 10.0705  avg_val_loss: 7.4496  time: 58s
Epoch 2 - MAE Score (without expiratory phase): 3.3194
Epoch 2 - Save Best Score: 3.3194 Model


EVAL: [14/15] Elapsed 0m 9s (remain 0m 0s) Loss: 7.2540(7.4496) 
Epoch: [3][0/58] Elapsed 0m 2s (remain 2m 35s) Loss: 7.7152(7.7152) Grad: 22.8557  LR: 0.004966  
Epoch: [3][20/58] Elapsed 0m 13s (remain 0m 24s) Loss: 8.6772(7.2669) Grad: 75.5875  LR: 0.004966  
Epoch: [3][40/58] Elapsed 0m 27s (remain 0m 11s) Loss: 6.3280(7.1809) Grad: 51.7602  LR: 0.004966  
Epoch: [3][57/58] Elapsed 0m 41s (remain 0m 0s) Loss: 6.7072(6.9247) Grad: 70.9206  LR: 0.004966  
EVAL: [0/15] Elapsed 0m 2s (remain 0m 37s) Loss: 5.4311(5.4311) 


Epoch 3 - avg_train_loss: 6.9247  avg_val_loss: 5.3527  time: 51s
Epoch 3 - MAE Score (without expiratory phase): 2.2789
Epoch 3 - Save Best Score: 2.2789 Model


EVAL: [14/15] Elapsed 0m 9s (remain 0m 0s) Loss: 5.2467(5.3527) 
Epoch: [4][0/58] Elapsed 0m 3s (remain 3m 4s) Loss: 5.2645(5.2645) Grad: 33.5913  LR: 0.004931  
Epoch: [4][20/58] Elapsed 0m 19s (remain 0m 33s) Loss: 5.0593(5.5350) Grad: 30.1363  LR: 0.004931  
Epoch: [4][40/58] Elapsed 0m 31s (remain 0m 13s) Loss: 5.2972(5.3630) Grad: 71.4632  LR: 0.004931  
Epoch: [4][57/58] Elapsed 0m 40s (remain 0m 0s) Loss: 4.4957(5.1995) Grad: 44.6336  LR: 0.004931  
EVAL: [0/15] Elapsed 0m 2s (remain 0m 37s) Loss: 4.0977(4.0977) 


Epoch 4 - avg_train_loss: 5.1995  avg_val_loss: 4.0875  time: 50s
Epoch 4 - MAE Score (without expiratory phase): 1.6950
Epoch 4 - Save Best Score: 1.6950 Model


EVAL: [14/15] Elapsed 0m 9s (remain 0m 0s) Loss: 4.0775(4.0875) 
Epoch: [5][0/58] Elapsed 0m 3s (remain 3m 16s) Loss: 4.3481(4.3481) Grad: 25.7098  LR: 0.004887  
Epoch: [5][20/58] Elapsed 0m 19s (remain 0m 34s) Loss: 4.4036(4.9157) Grad: 13.2171  LR: 0.004887  
Epoch: [5][40/58] Elapsed 0m 35s (remain 0m 14s) Loss: 4.0189(4.7274) Grad: 35.7716  LR: 0.004887  
Epoch: [5][57/58] Elapsed 0m 49s (remain 0m 0s) Loss: 4.5263(4.5999) Grad: 68.6104  LR: 0.004887  
EVAL: [0/15] Elapsed 0m 2s (remain 0m 39s) Loss: 4.4764(4.4764) 


Epoch 5 - avg_train_loss: 4.5999  avg_val_loss: 4.5136  time: 59s
Epoch 5 - MAE Score (without expiratory phase): 1.9167


EVAL: [14/15] Elapsed 0m 9s (remain 0m 0s) Loss: 4.5257(4.5136) 
Epoch: [6][0/58] Elapsed 0m 2s (remain 2m 39s) Loss: 4.9014(4.9014) Grad: 82.3918  LR: 0.004834  
Epoch: [6][20/58] Elapsed 0m 15s (remain 0m 28s) Loss: 4.0439(4.3004) Grad: 49.2049  LR: 0.004834  
Epoch: [6][40/58] Elapsed 0m 32s (remain 0m 13s) Loss: 3.6890(4.2060) Grad: 22.4363  LR: 0.004834  
Epoch: [6][57/58] Elapsed 0m 45s (remain 0m 0s) Loss: 4.2575(4.1255) Grad: 69.5801  LR: 0.004834  
EVAL: [0/15] Elapsed 0m 2s (remain 0m 39s) Loss: 4.7138(4.7138) 


Epoch 6 - avg_train_loss: 4.1255  avg_val_loss: 4.6020  time: 55s
Epoch 6 - MAE Score (without expiratory phase): 1.9529


EVAL: [14/15] Elapsed 0m 9s (remain 0m 0s) Loss: 4.4845(4.6020) 
Epoch: [7][0/58] Elapsed 0m 3s (remain 3m 4s) Loss: 4.4363(4.4363) Grad: 76.6791  LR: 0.004772  
Epoch: [7][20/58] Elapsed 0m 17s (remain 0m 31s) Loss: 3.8189(4.0958) Grad: 24.3796  LR: 0.004772  
Epoch: [7][40/58] Elapsed 0m 28s (remain 0m 11s) Loss: 3.7391(4.2331) Grad: 31.9782  LR: 0.004772  
Epoch: [7][57/58] Elapsed 0m 39s (remain 0m 0s) Loss: 3.8520(4.1781) Grad: 57.9219  LR: 0.004772  
EVAL: [0/15] Elapsed 0m 2s (remain 0m 38s) Loss: 3.8125(3.8125) 


Epoch 7 - avg_train_loss: 4.1781  avg_val_loss: 3.7199  time: 49s
Epoch 7 - MAE Score (without expiratory phase): 1.5754
Epoch 7 - Save Best Score: 1.5754 Model


EVAL: [14/15] Elapsed 0m 9s (remain 0m 0s) Loss: 3.6897(3.7199) 
Epoch: [8][0/58] Elapsed 0m 3s (remain 3m 16s) Loss: 3.7187(3.7187) Grad: 37.5572  LR: 0.004701  
Epoch: [8][20/58] Elapsed 0m 19s (remain 0m 34s) Loss: 3.8138(3.8030) Grad: 50.8865  LR: 0.004701  
Epoch: [8][40/58] Elapsed 0m 35s (remain 0m 14s) Loss: 3.6159(3.8113) Grad: 21.4637  LR: 0.004701  
Epoch: [8][57/58] Elapsed 0m 46s (remain 0m 0s) Loss: 3.3436(3.7801) Grad: 3.7861  LR: 0.004701  
EVAL: [0/15] Elapsed 0m 2s (remain 0m 35s) Loss: 3.2633(3.2633) 


Epoch 8 - avg_train_loss: 3.7801  avg_val_loss: 3.1892  time: 56s
Epoch 8 - MAE Score (without expiratory phase): 1.3125
Epoch 8 - Save Best Score: 1.3125 Model


EVAL: [14/15] Elapsed 0m 8s (remain 0m 0s) Loss: 3.1250(3.1892) 
Epoch: [9][0/58] Elapsed 0m 3s (remain 3m 14s) Loss: 3.2300(3.2300) Grad: 9.6698  LR: 0.004621  
Epoch: [9][20/58] Elapsed 0m 19s (remain 0m 34s) Loss: 4.0834(3.5019) Grad: 74.9044  LR: 0.004621  
Epoch: [9][40/58] Elapsed 0m 35s (remain 0m 14s) Loss: 4.3971(3.5909) Grad: 81.7885  LR: 0.004621  
Epoch: [9][57/58] Elapsed 0m 49s (remain 0m 0s) Loss: 3.5483(3.5779) Grad: 53.0707  LR: 0.004621  
EVAL: [0/15] Elapsed 0m 2s (remain 0m 36s) Loss: 4.0693(4.0693) 


Epoch 9 - avg_train_loss: 3.5779  avg_val_loss: 3.9387  time: 59s
Epoch 9 - MAE Score (without expiratory phase): 1.6948


EVAL: [14/15] Elapsed 0m 9s (remain 0m 0s) Loss: 3.7992(3.9387) 
Epoch: [10][0/58] Elapsed 0m 3s (remain 3m 6s) Loss: 3.9089(3.9089) Grad: 74.7547  LR: 0.004532  
Epoch: [10][20/58] Elapsed 0m 13s (remain 0m 24s) Loss: 3.3493(3.3749) Grad: 35.4872  LR: 0.004532  
Epoch: [10][40/58] Elapsed 0m 27s (remain 0m 11s) Loss: 3.1334(3.3739) Grad: 31.0231  LR: 0.004532  
Epoch: [10][57/58] Elapsed 0m 41s (remain 0m 0s) Loss: 3.2585(3.4075) Grad: 21.9831  LR: 0.004532  
EVAL: [0/15] Elapsed 0m 2s (remain 0m 37s) Loss: 3.5654(3.5654) 


Epoch 10 - avg_train_loss: 3.4075  avg_val_loss: 3.5654  time: 51s
Epoch 10 - MAE Score (without expiratory phase): 1.5404


EVAL: [14/15] Elapsed 0m 9s (remain 0m 0s) Loss: 3.5625(3.5654) 
Epoch: [11][0/58] Elapsed 0m 3s (remain 3m 12s) Loss: 3.9414(3.9414) Grad: 61.7472  LR: 0.004436  
Epoch: [11][20/58] Elapsed 0m 19s (remain 0m 34s) Loss: 2.9097(3.2631) Grad: 8.4749  LR: 0.004436  
