# Setup

In [None]:
# # for TPU
# !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
# !python pytorch-xla-env-setup.py --apt-packages libomp5 libopenblas-dev

In [None]:
# # for TPU
# import torch_xla
# import torch_xla.core.xla_model as xm

In [None]:
from tqdm import tqdm
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from sklearn.preprocessing import RobustScaler, normalize
from sklearn.model_selection import train_test_split, GroupKFold, KFold

In [None]:
!pip install wandb -qqq
import wandb
# from wandb.keras import WandbCallback
wandb.login()

In [None]:
# device = xm.xla_device()
# torch.set_default_tensor_type('torch.FloatTensor')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Preprocessing

In [None]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

In [None]:
def add_features(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['cross']= df['u_in']*df['u_out']
    df['cross2']= df['time_step']*df['u_out']
    
    
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['one'] = 1
    df['count'] = (df['one']).groupby(df['breath_id']).cumsum()
    df['u_in_cummean'] =df['u_in_cumsum'] /df['count']
    
    #df['u_in_lag']=0
    #df['u_in_lag2']=0
    #for i in range(df.shape[0]):
        #if df['breath_id'][i]==df['breath_id'][i+1]:
        #    df['u_in_lag'][i+1]=df['u_in'][i]
        #else:
        #    df['u_in_lag'][i+1]=0
        #if df['breath_id'][i]==df['breath_id'][i+2]:
        #    df['u_in_lag'][i+2]=df['u_in'][i]
        #else:
        #    df['u_in_lag'][i+2]=0
        #if i/10000==round(i/10000):
        #    print(i)
    
    df['breath_id_lag']=df['breath_id'].shift(1).fillna(0)
    df['breath_id_lag2']=df['breath_id'].shift(2).fillna(0)
    df['breath_id_lagsame']=np.select([df['breath_id_lag']==df['breath_id']],[1],0)
    df['breath_id_lag2same']=np.select([df['breath_id_lag2']==df['breath_id']],[1],0)
    df['u_in_lag'] = df['u_in'].shift(1).fillna(0)
    df['u_in_lag'] = df['u_in_lag']*df['breath_id_lagsame']
    df['u_in_lag2'] = df['u_in'].shift(2).fillna(0)
    df['u_in_lag2'] = df['u_in_lag2']*df['breath_id_lag2same']
    df['u_out_lag2'] = df['u_out'].shift(2).fillna(0)
    df['u_out_lag2'] = df['u_out_lag2']*df['breath_id_lag2same']
    #df['u_in_lag'] = df['u_in'].shift(2).fillna(0)
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['RC'] = df['R']+df['C']
    df = pd.get_dummies(df)
    return df

train = add_features(train)
test = add_features(test)

In [None]:
train=train.fillna(0)
test=test.fillna(0)
train.shape,test.shape

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
train.drop(['pressure','id', 'breath_id','one','count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id','one','count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1)

In [None]:
train.shape, test.shape

In [None]:
RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

In [None]:
train.shape, test.shape

In [None]:
train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])

In [None]:
train.shape, test.shape

# Dataset

In [None]:
class WaveNetBiLSTMDataset(Dataset):
    def __init__(self, df, mode="test", targets=None):
        super(WaveNetBiLSTMDataset, self).__init__()
        assert mode in ["train", "test"]
        self.mode = mode
        self.df = df
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        if self.mode == "train":
            return self.df[idx], targets[idx]
        return self.df[idx]

# WaveNet BiLSTM

In [None]:
def mish(x):
    return (x*torch.tanh(F.softplus(x)))

class Wave_Block(nn.Module):

    def __init__(self, in_channels, out_channels, dilation_rates, kernel_size):
        super(Wave_Block, self).__init__()
        self.num_rates = dilation_rates
        self.convs = nn.ModuleList()
        self.filter_convs = nn.ModuleList()
        self.gate_convs = nn.ModuleList()

        self.convs.append(nn.Conv1d(in_channels, out_channels, kernel_size=1))
        dilation_rates = [2 ** i for i in range(dilation_rates)]
        for dilation_rate in dilation_rates:
            self.filter_convs.append(
                nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, padding=int((dilation_rate*(kernel_size-1))/2), dilation=dilation_rate))
            self.gate_convs.append(
                nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, padding=int((dilation_rate*(kernel_size-1))/2), dilation=dilation_rate))
            self.convs.append(nn.Conv1d(out_channels, out_channels, kernel_size=1))

    def forward(self, x):
        x = self.convs[0](x)
        res = x
        for i in range(self.num_rates):
            x = torch.tanh(self.filter_convs[i](x)) * torch.sigmoid(self.gate_convs[i](x))
            x = self.convs[i + 1](x)
            res = res + x
        return res

class WaveNet(nn.Module):
    def __init__(self, inch=8, kernel_size=3):
        super().__init__()
        
        self.wave_block1 = Wave_Block(inch, 16, 12, kernel_size)
        self.wave_block2 = Wave_Block(16, 32, 8, kernel_size)
        self.wave_block3 = Wave_Block(32, 64, 4, kernel_size)
        self.wave_block4 = Wave_Block(64, 128, 1, kernel_size)
        
        self.LSTM_block1 = nn.GRU(input_size=128, hidden_size=500, num_layers=2, 
                            batch_first=True, bidirectional=True, dropout=0.3)
        self.LSTM_block2 = nn.GRU(input_size=1000, hidden_size=375, num_layers=2, 
                            batch_first=True, bidirectional=True, dropout=0.3)
        self.LSTM_block3 = nn.GRU(input_size=750, hidden_size=100, num_layers=2, 
                            batch_first=True, bidirectional=True, dropout=0.3)
        self.LSTM_block4 = nn.GRU(input_size=200, hidden_size=64, num_layers=2, 
                            batch_first=True, bidirectional=True, dropout=0.3)
        
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, 1)
        

    def forward(self, x):  # (B, 80, n_features)
        x = x.permute(0, 2, 1)  # (B, n_features, 80)
        
        x = self.wave_block1(x)  # (B, 16, 80)
        x = mish(x)
        x = self.wave_block2(x)  # (B, 32, 80)
        x = mish(x)
        x = self.wave_block3(x)  # (B, 64, 80)
        x = mish(x)
        x = self.wave_block4(x)  # (B, 128, 80)
        x = mish(x)
        
        x = x.permute(0, 2, 1)  # (B, 80, 128)
        
        x, _ = self.LSTM_block1(x)  # (B, 80, 1000)
        x = F.relu(mish(x))
        x, _ = self.LSTM_block2(x)  # (B, 80, 750)
        x = F.relu(mish(x))
        x, _ = self.LSTM_block3(x)  # (B, 80, 200)
        x = F.relu(mish(x))
        x, _ = self.LSTM_block4(x)  # (B, 80, 128)
        x = F.relu(mish(x))
        
        x = self.fc1(x)  # (B, 80, 64)
        x = F.relu(x)
        x = self.fc2(x)  # (B, 80, 1)      
        
        return torch.squeeze(x, -1)  # (B, 80)

# Training

In [None]:
ckpt_dir = "."
model_name = "WaveNet_BiLSTM"
project_name = f"G-Vent-{model_name}"
n_features = 25
n_splits = 5
n_epochs = 300
init_lr = 1e-3
b_size = 512
num_workers = 16

kf = KFold(n_splits=n_splits, shuffle=True, random_state=2021)

models = {}

for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
    
    CHECKPOINT = '{}/{}_{}.pth'.format(ckpt_dir, model_name, fold)
    
    run = wandb.init(project=project_name, name=f"fold{fold}")
    
    print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
    X_train, X_valid = train[train_idx], train[test_idx]
    y_train, y_valid = targets[train_idx], targets[test_idx]
    
    X_train_ds = WaveNetBiLSTMDataset(X_train, mode="train", targets=y_train)
    X_valid_ds = WaveNetBiLSTMDataset(X_valid, mode="train", targets=y_valid)
    
    X_train_loader = DataLoader(X_train_ds, batch_size=b_size, sampler=RandomSampler(X_train_ds), num_workers=num_workers)
    X_valid_loader = DataLoader(X_valid_ds, batch_size=b_size, num_workers=num_workers)
    
    model = WaveNet(inch=n_features)
    
    criterion = nn.L1Loss()
    scaler = torch.cuda.amp.GradScaler()
    optimizer = torch.optim.Adam(model.parameters(), lr=init_lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, n_epochs - 1)
    
    val_loss_min = np.Inf
    
    for idx, epoch in enumerate(range(1, n_epochs + 1)):
        scheduler.step()
        model.to(device).train()
        train_loss = []

        print('Epoch: {:02d}/{:02d}'.format(epoch, n_epochs))
        print("TRAIN")

        loop = tqdm(X_train_loader)
        for X, y in loop:
            X = X.to(device).float()
            y = y.to(device).float()

            optimizer.zero_grad()

            with torch.cuda.amp.autocast():
                output = model(X)
                loss = criterion(output, y)
            
            # loss.backward()
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            # xm.optimizer_step(optimizer, barrier=True)

            train_loss.append(loss.item())
            loop.set_description('current_loss: {:.5f} | LR: {:.5f}'.format(loss.item(), optimizer.param_groups[0]['lr']))
            loop.set_postfix(loss=np.mean(train_loss))
        train_loss = np.mean(train_loss)
    
        model.eval()

        val_loss = []

        print("VAL")
        loop = tqdm(X_valid_loader)
        for X, y in loop:
            X = X.to(device).float()
            y = y.to(device).float()

            # with torch.cuda.amp.autocast():
            with torch.cuda.amp.autocast(), torch.no_grad():
                outputs = model(X)
                loss = criterion(outputs.float(), y)

            val_loss.append(loss.item())
            loop.set_description('current_loss: {:.5f}'.format(loss.item()))
            loop.set_postfix(loss=np.mean(val_loss))
        val_loss = np.mean(val_loss)
        
        wandb.log({"epoch": epoch, 
                "loss": train_loss, 
                "val_loss": val_loss,
                })
        
        if val_loss < val_loss_min:
            print('Valid loss improved from {:.5f} to {:.5f} saving model to {}'.format(val_loss_min, val_loss, CHECKPOINT))
            val_loss_min = val_loss
            torch.save(model.state_dict(), CHECKPOINT)
            artifact = wandb.Artifact(model_name, type='model')
            artifact.add_file(CHECKPOINT, name=f"fold{fold}_epoch{epoch}.pt")
            run.log_artifact(artifact)
    
#     scheduler = ExponentialDecay(1e-3, 400*((len(train)*0.8)/BATCH_SIZE), 1e-5)
#     lr = LearningRateScheduler(scheduler, verbose=1)

#     es = EarlyStopping(monitor="val_loss", patience=15, verbose=1, mode="min", restore_best_weights=True)

#         model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=EPOCH, batch_size=BATCH_SIZE, callbacks=[lr])
        #model.save(f'Fold{fold+1} RNN Weights')
#         test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())

In [None]:
X_test_ds = WaveNetBiLSTMDataset(X_train[:8], mode="test")
X_test_loader = DataLoader(X_test_ds, batch_size=512, num_workers=16)

In [None]:
model.eval()
preds = []

for X in X_test_loader:
    X = X.to(device).float()
    with torch.no_grad():
        output = model(X)
        preds.append(output.data.cpu().numpy())

In [None]:
preds[0].shape

In [None]:
import matplotlib.pyplot as plt
plt.plot(range(80), preds[0][2])

In [None]:
plt.plot(range(80), y_train[2])