In [None]:
import warnings
warnings.filterwarnings("ignore")

import os
import pandas as pd
import numpy as np
import itertools
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from torch import nn
from torch.nn import functional as F

Create custom data loader to segment sensor time series in 256 length chuncks

In [None]:
class SegmentedDataset(Dataset):
    def __init__(self, folder_path, files, is_train=True, segment_length=256):
        self.folder_path = folder_path
        self.is_train = is_train
        self.segment_length = segment_length
        self.data, self.time, self.ids, self.targets = self.load_data(files)

    def load_data(self, files):
        data, time, ids, targets = [], [], [], []

        for file_name in files:
            file_path = os.path.join(self.folder_path, file_name)
            file_id = file_name.replace('.csv', '')

            data = pd.read_csv(file_path)
            time = data['Time'].values
            features = data[['AccV', 'AccML', 'AccAP']].values

            if self.is_train:
                targets = data[['StartHesitation', 'Turn', 'Walking']].values
                targets = np.concatenate([targets, (1 - targets.sum(axis=1)).reshape(-1, 1)], axis=1)

            num_segments = len(features) // self.segment_length
            remainder = len(features) % self.segment_length

            for i in range(num_segments):
                feature_segment = features[i * self.segment_length:(i + 1) * self.segment_length]
                time_segment = time[i * self.segment_length:(i + 1) * self.segment_length]
                target_segment = targets[i * self.segment_length:(i + 1) * self.segment_length] if self.is_train else None

                self.append_to_lists(feature_segment, time_segment, target_segment, file_id)

            if remainder > 0:
                padding_length = self.segment_length - remainder
                feature_segment = np.pad(features[-remainder:], ((0, padding_length), (0, 0)), mode='constant')
                time_segment = np.pad(time[-remainder:], (0, padding_length), mode='constant', constant_values=-1)
                target_segment = np.pad(targets[-remainder:], ((0, padding_length), (0, 0)), mode='constant') if self.is_train else None

                self.append_to_lists(feature_segment, time_segment, target_segment, file_id)

        return data, time, ids, targets

    def append_to_lists(self, feature_segment, time_segment, target_segment, file_id):
        self.data.append(feature_segment)
        self.time.append(time_segment)
        self.ids.append(file_id)

        if self.is_train:
            self.targets.append(target_segment)

    def get_segment_indices(self):
        return self.segment_indices

    def get_segment_padding(self):
        return self.segment_padding

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if self.is_train:
            return torch.Tensor(self.data[idx]), torch.Tensor(np.argmax(self.targets[idx], axis=1)).to(torch.int64)
        else:
            return torch.Tensor(self.data[idx]), self.ids[idx], self.time[idx]


Define model with 1D Conv block and training routine

In [None]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_sizes, dropout_rate, dilations):
        super(ConvBlock, self).__init__()

        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels, out_channels, kernel_size, padding=(kernel_size + (kernel_size - 1) * (dilation - 1)) // 2, dilation=dilation)
            for kernel_size, dilation in itertools.product(kernel_sizes, dilations)
        ])

        self.batch_norm = nn.BatchNorm1d(out_channels * len(kernel_sizes) * len(dilations))
        self.dropout = nn.Dropout(dropout_rate)

        self.skip_connection = nn.Conv1d(in_channels, out_channels * len(kernel_sizes) * len(dilations), kernel_size=1) \
            if in_channels != out_channels * len(kernel_sizes) * len(dilations) else nn.Identity()

    def forward(self, x):
        x = x.transpose(1, 2)
        skip = self.skip_connection(x)
        x = torch.cat([conv(x) for conv in self.convs], dim=1)
        x = self.batch_norm(x + skip)
        x = F.relu(x)
        x = self.dropout(x)
        x = x.transpose(1, 2)
        return x

class Model(pl.LightningModule):
    def __init__(self, in_channels, out_channels, kernel_sizes, dilations, dropout_rate, num_blocks, lr=0.001):
        super(Model, self).__init__()
        self.train_loss_history = []
        self.val_loss_history = []
        self.val_score_history = []
        self.lr = lr
        self.blocks = nn.Sequential(*[
            ConvBlock(in_channels if i == 0 else out_channels * len(kernel_sizes) * len(dilations),
                      out_channels,
                      kernel_sizes,
                      dropout_rate,
                      dilations)
            for i in range(num_blocks)
        ])

        num_classes = 4
        self.linear = nn.Linear(out_channels * len(kernel_sizes) * len(dilations), num_classes)
        weights = torch.tensor([1.0, 1.0, 1.0, 0.1])
        self.loss = torch.nn.NLLLoss(weight=weights)
        self.logsoftmax = nn.LogSoftmax(dim=2)
        self.output = []

    def forward(self, x):
        x = self.blocks(x)
        x = self.linear(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        y_hat = self.logsoftmax(y_hat)
        loss = self.loss(y_hat.view(-1, y_hat.size(-1)), y.view(-1))
        self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.train_loss_history.append(loss.item())
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss(y_hat.view(-1, y_hat.size(-1)), y.view(-1))
        self.log('val_loss', loss)
        return loss

    def test_step(self, batch, batch_idx):
        x, file_id, time = batch
        y_hat = self.forward(x)
        y_hat = F.softmax(y_hat, dim=2)
        self.output.append((y_hat, file_id, time))

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer

In [None]:
folder_path = '/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/'
all_files = [file for file in os.listdir(folder_path)]
train_files, val_files = train_test_split(all_files, random_state=13)


train_dataset = SegmentedDataset(folder_path, train_files)
val_dataset = SegmentedDataset(folder_path, val_files)

train_loader = DataLoader(train_dataset, batch_size=64)
val_loader = DataLoader(val_dataset, batch_size=64)

Train the model

In [None]:
model = Model(
    in_channels=3,
    out_channels=3,
    kernel_sizes=[3,5,7],
    dilations=[2,4,8],
    dropout_rate=0.1,
    num_blocks=1,
)

trainer = pl.Trainer(max_epochs=10)
trainer.fit(model, train_loader, val_loader)

In [None]:
submission = pd.read_csv("/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/sample_submission.csv")

In [None]:
test_folder_path = '/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/test/tdcsfog/'
test_files = [file for file in os.listdir(test_folder_path)]
test_dataset = SegmentedDataset(test_folder_path, test_files, is_train=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

trainer.test(model, test_loader)

In [None]:
result = pd.DataFrame()
for batch in model.output:
    preds = batch[0].cpu().numpy()
    ids = batch[1]
    times = batch[2].cpu().numpy()
    for i, time, pred in zip(ids, times, preds):
        id_time = [f'{i}_{t}' for t in time]
        tmp = pd.DataFrame(pred[:,:3], index=id_time).loc[[t != -1 for t in time]]
        result = pd.concat([result, tmp])
        
result.columns = ['StartHesitation', 'Turn', 'Walking']

submission = submission.set_index('Id')
submission.update(result)
submission.reset_index().to_csv('submission.csv', index=False)