## Imports

In [76]:
import os
import torch

import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

from tqdm import tqdm
from functools import partial
from collections import OrderedDict
from dateutil.rrule import *
from torch.utils.data import TensorDataset, DataLoader
from torchsummary import summary

## Load test data

In [77]:
test_series = pd.read_parquet('./sleep_institute_data/train_series.parquet')

## Define model architecutre

In [78]:
# Define model
class Conv1dAutoPad(nn.Conv1d):
    """
    Auto-padding convolution depending on kernel size and dilation used
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.padding = (self.dilation[0] * (self.kernel_size[0] // 2),)


autoconv = partial(Conv1dAutoPad, kernel_size=3, dilation=1, bias=False)


class WaveNetLayerInit(nn.Module):
    """
    Initializes the WaveNet layer with a dilated convolution and identity residuals and skips
    """
    def __init__(self, in_channels, out_channels, conv=autoconv, *args, **kwargs):
        """
        :param in_channels:
        :param out_channels:
        :param dilation:
        :param conv: determines what kind of convolutional layer is used
        :param args:
        :param kwargs:
        """
        super().__init__()
        self.in_channels, self.out_channels, self.conv = in_channels, out_channels, conv

        self.dilated_conv = nn.Sequential(OrderedDict(
            {
                'conv' : conv(self.in_channels, self.out_channels, *args, **kwargs),

                'bn' : nn.BatchNorm1d(self.out_channels),

                'drop': nn.Dropout(0.1)
            }))

        self.tanh = nn.Tanh()
        self.sig = nn.Sigmoid()

        self.skip = nn.Identity()

        self.shortcut = nn.Identity()

    def forward(self, x):
        """
        Forward propagation for a single WaveNet layer. Feature maps are split after dilated convolution as described in
        PixelCNN/RNN paper
        :param x: input data
        :return:
        """
        residual = x

        if self.should_apply_mapping:
            residual = self.shortcut(x)

        x = self.dilated_conv(x)

        # Gating activation
        x = self.tanh(x) * self.sig(x)

        x = self.skip(x)
        skip = x

        x += residual
        return x, skip

    @property
    def should_apply_mapping(self):
        return self.in_channels != self.out_channels


class addResidualConnection(WaveNetLayerInit):
    def __init__(self, in_channels, out_channels, expansion=1, conv=autoconv, *args, **kwargs):
        """
        Adds the parameterized residual connection if in_channels != out_channels. Standard WaveNet maintains channel
        number throughout the network
        :param in_channels:
        :param out_channels:
        :param expansion: expansion factor if in_channels != out_channels
        :param conv:
        :param args:
        :param kwargs:
        """
        super().__init__(in_channels, out_channels, *args, **kwargs)
        self.expansion = expansion

        self.shortcut = nn.Sequential(OrderedDict(
            {
                'conv' : conv(self.in_channels, self.map_channels, kernel_size=1),

                'bn' : nn.BatchNorm1d(self.map_channels),

                'drop': nn.Dropout(0.1)
            })) if self.should_apply_mapping else None


    @property
    def map_channels(self):
        return self.out_channels * self.expansion

    @property
    def should_apply_mapping(self):
        return self.in_channels != self.out_channels


class addSkipConnection(addResidualConnection):
    def __init__(self, in_channels, out_channels, conv=autoconv, *args, **kwargs):
        """
        Adds the 1X1 convolution after the gating of the dilated convolution and returns doubled number of feature maps.
        :param in_channels:
        :param out_channels:
        :param conv:
        :param args:
        :param kwargs:
        """
        super().__init__(in_channels, out_channels, *args, **kwargs)

        if in_channels != 1:
            self.skip = conv(self.out_channels, self.out_channels, kernel_size=1, dilation=1) 
        else:
            self.skip = None


class WaveNetLayer(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, layer=addSkipConnection, *args, **kwargs):
        super().__init__()

        self.layer = layer(in_channels, out_channels, *args, **kwargs)

    def forward(self, x):
        x, skip = self.layer(x)
        return x, skip


class WaveNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, layer=WaveNetLayer, block_size=1, *args, **kwargs):
        """
        Constructs a block of WaveNet layers of size block_size where the dilation increases by a power of 2 between
        every layer
        :param in_channels:
        :param out_channels:
        :param layer:
        :param block_size: number of WaveNet layers toj include in the block
        """
        super().__init__()

        self.block = nn.Sequential(
            layer(in_channels, out_channels, dilation=1,*args, **kwargs),
            *[layer(out_channels * layer.expansion, out_channels, dilation=2**(i+1), *args, **kwargs)
              for i in range(block_size-1)]
        )

    def forward(self, x):
        layer_skips = []
        for layer in self.block:
            x, layer_skip = layer(x)
            layer_skips.append(layer_skip)
        skips = torch.cat(layer_skips, 0)
        return x, skips


class WaveNetConvs(nn.Module):
    def __init__(self, in_channels, feat_sizes=(16,32), depths=(2, 3), block=WaveNetBlock, *args, **kwargs):
        """
        Constructs the convolutional layers of the WaveNet
        :param in_channels:
        :param feat_sizes: tuple of the number of feature maps per layer in each block
        :param depths: number of layers in every block of the WaveNet
        :param block:
        :param args:
        :param kwargs:
        """
        super().__init__()

        self.feat_sizes, self.depths = feat_sizes, depths

        self.init_conv = nn.Sequential(
            nn.Conv1d(in_channels, feat_sizes[0], kernel_size=3, padding=1, stride=1),
            nn.BatchNorm1d(feat_sizes[0]),
            nn.Dropout(0.1),
            nn.GELU()
        )

        self.in_out_pairs = list(zip(feat_sizes, feat_sizes[1:]))

        self.blocks = nn.ModuleList([
            block(feat_sizes[0], feat_sizes[0], block_size=depths[0], *args, **kwargs),
            *[block(in_channels, out_channels, block_size=depth, *args, **kwargs)
              for (in_channels, out_channels), depth in zip(self.in_out_pairs, depths[1:])]
        ])


    def forward(self, x):
        block_skips = []
        x = self.init_conv(x)
        for block in self.blocks:
            x, block_skip = block(x)
            block_skips.append(block_skip)
        skips = torch.cat(block_skips, 0)
        return x, skips


class WaveNetTail(nn.Module):
    def __init__(self, in_channels):
        """
        Last few layers of WaveNet where skip connections are integrated and final 1X1 convolutions occur
        :param in_channels:
        """
        super().__init__()

        self.tail = nn.Sequential(
            nn.Conv1d(in_channels, in_channels//2, kernel_size=1, padding=0),
            nn.BatchNorm1d(in_channels//2),
            nn.Dropout(0.1),
            nn.GELU(),
            nn.Conv1d(in_channels//2, 3, kernel_size=1, padding=0),
            nn.BatchNorm1d(3),
            nn.Dropout(0.1),
            nn.GELU(),
            nn.Conv1d(3, 3, kernel_size=7, padding=3)
        )

    def forward(self, x):
        x = self.tail(x)
        return x


class WaveNet(nn.Module):
    def __init__(self, in_channels, *args, **kwargs):
        """
        The completed WaveNet model
        :param in_channels:
        :param args:
        :param kwargs:
        """
        super().__init__()

        self.feature_extraction = WaveNetConvs(in_channels, *args, **kwargs)
        self.tail = WaveNetTail(self.feature_extraction.feat_sizes[-1])

    def forward(self, x):
        x, skips = self.feature_extraction(x)
        skip_sum = torch.sum(skips, dim=0) 
        x += torch.relu(skip_sum)
        x = self.tail(x)
        return x

## Evaluation helper functions

In [81]:
def evaluate(model, accel_data, mode='hard'):
    model.eval()
    predictions = []

    output = model(accel_data)  # (batch, # classes, time)
    output = F.softmax(output, dim=1)

    if mode == 'hard':
        predictions.append(torch.squeeze(torch.argmax(output, dim=1)))
    else:
        predictions.append(torch.squeeze(output))

    return predictions

def ensemble_predict(models, data, mode='hard'):
    """
    Loads models and evaluates the given data. Predictions are then voted on with either 'hard' or
    'soft' voting depending on mode selection. 'SOFT' VOTING CURRENTLY UNAVAILABLE
    """
    # Iterate over models and make predictions
    pred_list = []
    model_num = 0
    for model in models:
        predictions = evaluate(model, data, mode=mode)
        pred_list.append(predictions)
        model_num += 1
    
    return pred_list

def ensemble_voting(predictions, mode='hard'):
    """
    Takes a nested list of model predictions and computes voting results depending on
    mode selection.
    """

    # Stack predictions from each model
    stacked_preds = []
    for i in range(len(predictions)):
        stacked_preds.append(torch.stack(predictions[i], dim=0))

    # Stack predictions from all models
    stacked_preds = torch.stack(stacked_preds, dim=0).to('cpu') # (model num, num days, num steps) when mode = hard
                                                      # (model num, num classes, num days, num steps) when mode = soft

    if mode == 'hard':
        num_votes = torch.zeros((3, stacked_preds.shape[1], stacked_preds.shape[2]))

        # Awake votes
        indxs = torch.where(stacked_preds == 0)
        num_votes[0, indxs[1], indxs[2]] += 1
        # Asleep votes
        indxs = torch.where(stacked_preds == 1)
        num_votes[1, indxs[1], indxs[2]] += 1
        # Not wearing votes
        indxs = torch.where(stacked_preds == 2)
        num_votes[2, indxs[1], indxs[2]] += 1

        voted_predictions = torch.argmax(num_votes, dim=0) # (num_days, num steps)
    
    if mode == 'soft':
        summed_preds = torch.sum(stacked_preds, dim=0) # (num classes, num days, num steps)
        voted_predictions = torch.argmax(summed_preds, dim=0) # (num days, num steps)
    
    return voted_predictions

## Evaluation

In [83]:
def submission_evaluation(series_data):
    """
    Formats test data into 24 hour segments and generates list of predictions voted on by
    ensemble of models.
    """
    day_length = 86400//5
    unique_ids = series_data.series_id.unique()

    use_mps = torch.backends.mps.is_available()
    device = torch.device('mps' if use_cuda else 'cpu')
    print('Predicting with ' + str(device))
    
    # Load models
    model_files = os.listdir('./trained_models')
    models = []
    for i in range(len(model_files)):
        checkpoint = torch.load('./trained_models/' + model_files[i],
                                map_location=device)
        hparams = checkpoint['model_hparams']
        model = WaveNet(2, feat_sizes=hparams['feat sizes'], depths=hparams['depths']).to(device)
        model.load_state_dict(checkpoint['model_state_dict'])
        models.append(model)
    
    prediction_list = []
    series_id_list = []
    first_steps = []
    for id in tqdm(unique_ids):
        num_days = int(series_data[series_data.series_id.isin([id])].shape[0]//(86400/5)+1)
        series_anglez = series_data.anglez[series_data.series_id.isin([id])].to_numpy()
        series_enmo = series_data.enmo[series_data.series_id.isin([id])].to_numpy()
        series_step = series_data.step[series_data.series_id.isin([id])].to_numpy()

        for day in range(num_days):
            if day < num_days - 1:
                data = torch.Tensor(np.array([[series_anglez[day_length*day:day_length*(day+1)],
                                               series_enmo[day_length*day:day_length*(day+1)]]])).to(device)
                chunk_steps = np.array(series_step[day_length*day:day_length*(day+1)])
                first_step = chunk_steps[0]

            if day == num_days - 1:
                # Pad incomplete days
                pad_length = day_length - series_enmo[day_length*day:].shape[0]

                end_anglez = np.pad(series_anglez[day_length*day:], ((0, pad_length)), constant_values=(0, 0))
                end_enmo = np.pad(series_enmo[day_length*day:], ((0, pad_length)), constant_values=(0, 0))

                data = torch.Tensor(np.array([[end_anglez, end_enmo]])).to(device)
                try:
                    chunk_steps = np.array(series_step[day_length*day:])
                    first_step = chunk_steps[0]
                except:
                    chunk_steps = np.array([0])
                    first_step = chunk_steps[0]
                
            pred_list = ensemble_predict(models, data, mode='hard')
            voted_preds = ensemble_voting(pred_list)
            prediction_list.append(voted_preds[0])
            series_id_list.append(id)
            first_steps.append(first_step)

    return prediction_list, series_id_list, first_steps

preds, id_list, first_steps = submission_evaluation(test_series)

Predicting with mps


100%|██████████| 277/277 [1:18:43<00:00, 17.05s/it]


## Post-processing predictions

In [89]:
def get_longer_list(lists):
    """
    Returns the larger of two lists
    """
    lengths = [len(lst) for lst in lists]
    return lists[np.argmax(lengths)]

def process_prediction(prediction):
    """
    Processes a single 24 hour period of predictions to output the step numbers of 'onset', 'wakeup', or nan
    within that day.
    """

    # If significant portion of predictions have nan label return nans
    nan_ratio = np.sum(prediction[prediction == 2])/prediction.shape[0]
    if nan_ratio >= 0.5:
        return np.nan, np.nan
    
    # Get indices where subject is predicted to be asleep and awake
    sleep_indxs = np.where(prediction == 1)[0]
    awake_indxs = np.where(prediction == 0)[0]

    # Return nans if awake for 24 hours
    if len(sleep_indxs) == 0:
        return np.nan, np.nan

    # Determine duration of predicted sleeping and waking windows
    sleep_windows = np.split(sleep_indxs, np.where(np.diff(sleep_indxs, prepend=sleep_indxs[0]-1) != 1)[0])
    sleep_duration = [array.shape[0] for array in sleep_windows]

    awake_windows = np.split(awake_indxs, np.where(np.diff(awake_indxs, prepend=awake_indxs[0]-1) != 1)[0])
    awake_duration = [array.shape[0] for array in awake_windows]

    # Ignore windows shorter than 30 minutes
    half_hour_length = 360  # Number of steps for half an hour assuming 5 seconds per step
    ignore_indxs = [i for i in range(len(sleep_duration)) if sleep_duration[i] > half_hour_length]
    valid_sleep_windows = [sleep_windows[ignore_indxs[i]] for i in range(len(ignore_indxs))]
    ignore_indxs = [i for i in range(len(awake_duration)) if awake_duration[i] > half_hour_length]
    valid_awake_windows = [awake_windows[ignore_indxs[i]] for i in range(len(ignore_indxs))]

    # Return nans if no valid sleep windows are found
    if len(valid_sleep_windows) == 0:
        return np.nan, np.nan

    # Check if any valid awake windows lie between valid sleep windows
    chosen_windows = []
    for i in range(len(valid_sleep_windows)-1):
        for j in range(len(valid_awake_windows)):
            # Condition for if valid awake window is between two valid sleep windows
            if (valid_awake_windows[j][0] > valid_sleep_windows[i][-1]) & \
               (valid_awake_windows[j][-1] < valid_sleep_windows[i+1][0]):
                # Chose larger of two valid sleep windows
                chosen_windows.append(get_longer_list([valid_sleep_windows[i], 
                                                       valid_sleep_windows[i+1]]))
    # Assign onset and wakeup steps if no valid waking windows are found between valid sleep windows
    if len(chosen_windows) == 0:
        onset_step = valid_sleep_windows[0][0]
        wakeup_step = valid_sleep_windows[-1][-1]
    else: # If valid wake windows found choose largest sleep window
        largest_window = get_longer_list(chosen_windows)
        onset_step = largest_window[0]
        wakeup_step = largest_window[-1]

    return onset_step, wakeup_step

def postprocessing(pred_list, id_list, first_steps):
    """
    Iterates over pred_list and processes sleep state predictions to return the 'onset' and 'wakeup'
    steps for each 24 hour period. Elements from first_steps are then added to the predicted event steps
    to account for chunking of data into 24 hour periods when series are longer than 1 day. Returns a
    pandas dataframe in the appropriate format.
    """

    event_steps = []
    event_list = []
    row_ids = []
    series_ids = []
    for i in range(len(pred_list)):
        # Get day-relative steps
        onset, wakeup = process_prediction(pred_list[i].numpy())
        
        # Adjust for 24 hour chunking
        onset += first_steps[i]
        wakeup += first_steps[i]

        if not np.isnan(onset):
            event_steps.append(onset)
            event_list.append('onset')
            event_steps.append(wakeup)
            event_list.append('wakeup')
            series_ids.append(id_list[i])
            series_ids.append(id_list[i])

    row_ids = [int(i) for i in range(len(event_list))]
    scores = [1.0 for _ in range(len(event_list))]

    # Create submission dataframe
    submission_df = pd.DataFrame({'row_id': row_ids,
                                  'series_id': series_ids, 
                                  'step': event_steps, 
                                  'event': event_list, 
                                  'score': scores})

    return submission_df

sub_df = postprocessing(preds, id_list, first_steps)
sub_df.to_csv('./submission.csv', index=False)
sub_df.head()