In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
from torch import nn
from torch.nn import functional as F

def mish(x):
    return (x*torch.tanh(F.softplus(x)))

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_l1 = nn.Conv2d(in_channels=1, out_channels=24, padding=1, stride=1, kernel_size=3)
        self.conv_l2 = nn.Conv2d(in_channels=24, out_channels=48, padding=0, stride=2, kernel_size=3)
        self.linear_l1 = nn.Linear(3744, 4192)
        self.linear_l2 = nn.Linear(4192, 512)
        self.linear_l3 = nn.Linear(512, 512)
        self.output = nn.Linear(512, 80)


    def forward(self, x):
        x = F.relu(self.conv_l1(x))
        x = mish(self.conv_l2(x))
        x = x.reshape(x.shape[0], -1)
        x = mish(self.linear_l1(x))
        x = F.relu(self.linear_l2(x))
        x = F.relu(self.linear_l3(x))
        x = mish(self.output(x))
        return x

In [3]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class TrainDset(Dataset):
    def __init__(self, paths):
        self.paths = paths


    def __len__(self):
        return len(self.paths)


    def __getitem__(self, idx):
        df = pd.read_csv(self.paths[idx], index_col='id')
        y = df.pressure.values
        tensor = df.drop(columns=['breath_id', 'pressure']).values
        tensor = tensor.reshape(1, tensor.shape[0], tensor.shape[1])
        return {"tensor": torch.tensor(tensor), "targets": y, "u_out": df.u_out.values}

In [4]:
import os
from tqdm import tqdm

def split_by_breath_id():
    modes = ("train", "test")
    for mode in modes:
        os.makedirs(f"./{mode}/", exist_ok=True)
    train_paths = []
    test_paths = []
    
    for split in modes:
        df = pd.read_csv(f"../input/ventilator-pressure-prediction/{split}.csv")

        unique_breath_ids = df.breath_id.unique()

        for breath_id in tqdm(unique_breath_ids):
            id_df =  df[df.breath_id == breath_id]
            p = f"./{split}/{breath_id}.csv"
            id_df.to_csv(p, index=False)
            if split == "train": train_paths.append(p)
            if split == "test": test_paths.append(p)
    return train_paths, test_paths

In [5]:
train_paths, test_paths = split_by_breath_id()

In [6]:
len(train_paths), len(test_paths)

In [7]:
train_ds = TrainDset(train_paths)

In [8]:
train_ds[0]["targets"].shape

In [9]:
from sklearn.model_selection import train_test_split

train_paths_, val_paths = train_test_split(train_paths, train_size=0.85)

In [None]:
from torch.utils.data.sampler import SequentialSampler, RandomSampler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_workers = 16
b_size = 512
init_lr = 1e-3
n_epochs = 300

X_train_ds = TrainDset(train_paths_)
X_valid_ds = TrainDset(val_paths)

X_train_loader = DataLoader(X_train_ds, batch_size=b_size, sampler=RandomSampler(X_train_ds), num_workers=num_workers)
X_valid_loader = DataLoader(X_valid_ds, batch_size=b_size, num_workers=num_workers)
    
model = CNN()
    
criterion = nn.L1Loss()
scaler = torch.cuda.amp.GradScaler()
optimizer = torch.optim.Adam(model.parameters(), lr=init_lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, n_epochs - 1)
    
val_loss_min = np.Inf
    
for idx, epoch in enumerate(range(1, n_epochs + 1)):
    scheduler.step()
    model.to(device).train()
    train_loss = []

    print('Epoch: {:02d}/{:02d}'.format(epoch, n_epochs))
    print("TRAIN")

    loop = tqdm(X_train_loader)
    for batch in loop:
        X = batch["tensor"].to(device).float()
        y = batch["targets"].to(device).float()

        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            output = model(X)
            loss = criterion(output, y)
            
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss.append(loss.item())
        loop.set_description('current_loss: {:.5f} | LR: {:.5f}'.format(loss.item(), optimizer.param_groups[0]['lr']))
        loop.set_postfix(loss=np.mean(train_loss))
    train_loss = np.mean(train_loss)
    
    model.eval()

    val_loss = []

    print("VAL")
    loop = tqdm(X_valid_loader)
    for batch in loop:
        X = batch["tensor"].to(device).float()
        y = batch["targets"].to(device).float()

        # with torch.cuda.amp.autocast():
        with torch.cuda.amp.autocast(), torch.no_grad():
            outputs = model(X)
            loss = criterion(outputs.float(), y)

        val_loss.append(loss.item())
        loop.set_description('current_loss: {:.5f}'.format(loss.item()))
        loop.set_postfix(loss=np.mean(val_loss))
    val_loss = np.mean(val_loss)

In [None]:
torch.save(model.state_dict(), 'model_weights.pth')

# Inference

In [27]:
class TestDset(Dataset):
    def __init__(self, paths):
        self.paths = paths


    def __len__(self):
        return len(self.paths)


    def __getitem__(self, idx):
        df = pd.read_csv(self.paths[idx], index_col='id')
        tensor = df.drop(columns=['breath_id']).values
        tensor = tensor.reshape(1, tensor.shape[0], tensor.shape[1])
        return {"tensor": torch.tensor(tensor), 
                "ids": np.array(df.index)}

In [28]:
test_ds = TestDset(test_paths)
test_loader = DataLoader(test_ds, batch_size=512, num_workers=16)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = CNN()
model.load_state_dict(torch.load(r"../input/gventcnnsimple/model_weights.pth"))
model.eval().to(device)

preds, ids_list = [], []
loop = tqdm(test_loader)
for batch in loop:
    X = batch["tensor"].to(device).float()
    ids = batch["ids"]
    ids_list.append(ids)

    with torch.cuda.amp.autocast(), torch.no_grad():
        outputs = model(X)
        preds.append(outputs.data.cpu().numpy())

# Submission

In [46]:
np.concatenate(preds).flatten().shape

In [47]:
np.concatenate(ids_list).flatten().shape

In [49]:
submission = pd.DataFrame({
    "id": np.concatenate(ids_list).flatten(),
    "pressure": np.concatenate(preds).flatten()
})
submission.to_csv("submission.csv", index=False)