# Child Mind Sleep States

The following code is adapted from/based on a script by Daniel Phalen available at: 
https://www.kaggle.com/code/danielphalen/cmss-grunet-train 

For the architecture a mix of GRU cells and UNET architecture. We are going to predict the the critical points, i.e. where the onset and wakeup events happen.  We will take as the target a gaussian of a tunable width centered around the actual point.

Modifications were made to fit the specific requirements of this project.

Cross Entropy loss from torch.  


References:

https://www.kaggle.com/code/werus23/sleep-critical-point-train/notebook

https://www.kaggle.com/code/werus23/sleep-critical-point-infer?scriptVersionId=147143158

https://www.kaggle.com/competitions/child-mind-institute-detect-sleep-states/discussion/441470

In [None]:
# Standard library imports
import ctypes
import copy
import gc
import math
import platform
import random
from math import exp, pi, sqrt

# Third-party imports
import dateutil.relativedelta as rd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyarrow as pa
import sklearn
import torch
import torch.nn.functional as F
from IPython.display import Markdown
from plotly import __version__
from plotly.graph_objs import *
from plotly.offline import init_notebook_mode, iplot
from plotly.tools import FigureFactory as FF
from pyarrow.parquet import ParquetFile
from scipy.interpolate import interp1d
from sklearn.metrics import average_precision_score
from timm.scheduler import CosineLRScheduler
from torch import nn, Tensor
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler
from tqdm.auto import tqdm

# Local module imports
import event_detection_ap as mapmetric

# Setup and configurations
plt.style.use("ggplot")
init_notebook_mode()


In [None]:
mapmetric.series_id_column_name = 'series_id'
mapmetric.time_column_name = 'step'
mapmetric.event_column_name = 'event'
mapmetric.score_column_name = 'score'
mapmetric.use_scoring_intervals = False
tolerance_intervals = [12, 36, 60, 90, 120, 150, 180, 240, 300, 360]
tolerances = {'wakeup': tolerance_intervals, 'onset': tolerance_intervals}

In [None]:
class PATHS:
    # Set this variable to True to use Kaggle paths, False for local paths
    USE_KAGGLE_PATHS = False

    if USE_KAGGLE_PATHS:
        # Paths for Kaggle
        MAIN_DIR = "/kaggle/input/child-mind-institute-detect-sleep-states/"
        SPLIT_DIR = "/kaggle/input/child-sleep-mind-split-train/"
    else:
        # Paths for local environment
        MAIN_DIR = "../data/"
        SPLIT_DIR = f"{MAIN_DIR}split-train/"

    # Common paths
    SUBMISSION = f"{MAIN_DIR}sample_submission.csv"
    TRAIN_EVENTS = f"{MAIN_DIR}train_events.csv"
    TRAIN_SERIES = f"{MAIN_DIR}train_series.parquet"
    TEST_SERIES = f"{MAIN_DIR}test_series.parquet"

    @staticmethod
    def get_series_filename(series_id):
        return f"{PATHS.SPLIT_DIR}{series_id}_test_series.parquet"


In [None]:
class CFG:
    DEMO_MODE = False
    VERBOSE = True
    SEED = 42
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
def torch_fix_seed(seed=42):
    """
    Sets the seed for generating random numbers to ensure reproducibility.
    It affects Python's `random`, NumPy's random number generator, and PyTorch.

    Parameters:
        seed (int): The seed value for random number generators. Default is 42.

    Returns:
        None
    """
    # Seed Python's built-in random number generator
    random.seed(seed)
    
    # Seed NumPy's random number generator
    np.random.seed(seed)
    
    # Seed PyTorch's random number generator
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        # If CUDA is available, also seed the CUDA random number generator
        torch.cuda.manual_seed(seed)

        # Uncomment the following lines if you need deterministic behavior in PyTorch,
        # but be aware this can impact performance.
        # torch.backends.cudnn.deterministic = True
        # torch.use_deterministic_algorithms(True)
        # torch.backends.cudnn.benchmark = False

torch_fix_seed(CFG.SEED)  # Replace 'CFG.SEED' with the actual seed value you want to use

In [None]:
def clean_memory():
    """
    Frees up memory in the environment. 
    Performs garbage collection for Python and clears PyTorch's CUDA cache.
    On Linux systems, it also attempts to trim the malloc heap.
    """
    # Garbage collection for Python
    gc.collect()

    # Clear PyTorch's CUDA cache if CUDA is available
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    # Memory trimming specific to Linux systems
    if platform.system() == "Linux":
        try:
            # Load the C standard library
            libc = ctypes.CDLL("libc.so.6")
            # Call malloc_trim from the C library
            libc.malloc_trim(0)
        except Exception as e:
            print(f"Error during malloc_trim: {e}")

## Explore Train Events data

In [None]:
train_events = pd.read_csv(PATHS.TRAIN_EVENTS)
print(len(train_events))
print(train_events.head())

## Data Pre-processing

In [None]:
def find_longest_continuous_event_sequence(gdf):
    """
    Finds the longest continuous subsequence in a dataset where an event occurs each night.

    Args:
    gdf (DataFrame): A pandas DataFrame containing the dataset.

    Returns:
    tuple: A tuple (start_night, end_night) indicating the start and end of the longest subsequence.
    """
    # Extract nights where both 'onset' and 'wakeup' events occurred
    continuous_nights = sorted(
        list(
            set(gdf[gdf['event'] == 'onset'].dropna().night.unique()) &
            set(gdf[gdf['event'] == 'wakeup'].dropna().night.unique())
        )
    )

    # Initialize variables for tracking the longest sequence
    longest_sequence_start = -1
    longest_sequence_end = -1
    current_sequence_start = -1
    current_sequence_end = -1
    max_sequence_length = 0
    last_night = current_sequence_start

    # Iterate through each night in the continuous nights
    for night in continuous_nights:
        if night == last_night + 1:
            # Continue the current sequence
            current_sequence_end = night
            last_night = night
        else:
            # Calculate the length of the completed sequence
            sequence_length = current_sequence_end - current_sequence_start
            if sequence_length > max_sequence_length:
                # Update the longest sequence
                longest_sequence_start, longest_sequence_end = current_sequence_start, current_sequence_end
                max_sequence_length = sequence_length

            # Start a new sequence
            current_sequence_start = night
            current_sequence_end = night
            last_night = night

    # Check and update for the last sequence
    sequence_length = current_sequence_end - current_sequence_start
    if sequence_length > max_sequence_length:
        longest_sequence_start, longest_sequence_end = current_sequence_start, current_sequence_end

    return longest_sequence_start, longest_sequence_end


In [None]:
def filter_continuous_series(train_events):
    """
    Identifies series with continuous data in the 'train_events' DataFrame.
    A series is dropped if it has no non-null data or if it doesn't have a continuous range.

    Parameters:
        train_events (DataFrame): The DataFrame containing the event data.

    Returns:
        drop_series (list): List of series_ids to be dropped.
        continuous (dict): Dictionary of series_ids with their continuous start and end.
    """
    drop_series = []
    continuous = {}

    for series_id, gdf in train_events.groupby('series_id'):
        # Drop series if all values are NaN
        if gdf.dropna().empty:
            drop_series.append(series_id)
            continue

        # Check for the longest continuous range
        start, end = find_longest_continuous_event_sequence(gdf)
        
        # Drop series if there's no continuous range
        if end - start == 0:
            drop_series.append(series_id)
        else:
            continuous[series_id] = (start, end)

    print(f'Drop {len(drop_series)} series.')
    return drop_series, continuous


In [None]:
drop_series, continuous = filter_continuous_series(train_events)

In [None]:
series_ids = train_events.series_id.unique()
len(series_ids)

In [None]:
series_ids

## Utility Functions

In [None]:
def create_predictions(test_ds, index, model, max_chunk_size=1024):
    """
    Generates predictions for a specific index in the test dataset using a neural network model.

    Parameters:
        test_ds (Dataset): The test dataset.
        index (int): The index of the data to be predicted.
        model (torch.nn.Module): The neural network model for generating predictions.
        max_chunk_size (int, optional): The maximum size of data chunks to process at a time. Default is 1024.

    Returns:
        pd.DataFrame: DataFrame containing the predictions.
    """
    # Set the model to evaluation mode
    model.eval()

    # Disable gradient calculations
    with torch.no_grad():
        # Load the data from the test dataset
        X = test_ds[index]

        # Initialize a tensor to store predictions
        pred = torch.zeros(X.shape).to(CFG.DEVICE, non_blocking=True)

        # Initialize the hidden state (if applicable)
        hidden_state = None

        # Get the sequence length of the data
        seq_len = X.shape[0]

        # Process the data in chunks
        for j in range(0, seq_len, max_chunk_size):
            # Define the end index of the chunk
            end_idx = min(j + max_chunk_size, seq_len)

            # Extract the chunk of data and move it to the specified device
            X_chunk = X[j:end_idx].float().to(CFG.DEVICE, non_blocking=True)

            # Get predictions and the new hidden state from the model
            y_pred, hidden_state = model(X_chunk, hidden_state)

            # Detach the hidden state to avoid backpropagation through it
            hidden_state = [h.detach() for h in hidden_state]

            # Store the predictions
            pred[j:end_idx, :] = y_pred

            # Clean up to save memory
            del X_chunk, y_pred

        # Free up memory
        clean_memory()

    # Convert the predictions to a DataFrame and apply softmax for probability distribution
    predicted_df = pd.DataFrame(torch.softmax(pred.cpu(), dim=1).numpy(), columns=['wakeup_val', 'onset_val'])

    # Return the DataFrame containing the predictions
    return predicted_df


In [None]:
def get_predictions(res_df, target, SIGMA):
    """
    Identifies events in the predicted data and calculates their scores based on the area under the curve.

    Parameters:
        res_df (pd.DataFrame): DataFrame containing the predictions.
        target (str): The target column in the DataFrame.
        SIGMA (float): A parameter used to define the span for calculating the score.

    Returns:
        list: A list of events, each represented as [step, target, score].
    """
    # Find a threshold value, set to 10% of the maximum value in the target column
    q = res_df[target].max() * 0.1

    # Filter rows where target column value is greater than the threshold
    tmp = res_df.loc[res_df[target] > q].copy()

    # Calculate the difference between consecutive steps
    tmp['gap'] = tmp['step'].diff()

    # Filter out rows where the gap is greater than a specified value (5*5 in this case)
    tmp = tmp[tmp['gap'] > 5*5]

    # Initialize an empty list to store results
    res = []

    # Iterate through the filtered DataFrame to find local maxima
    for i in range(len(tmp) + 1):
        # Determine the start and end indices for each potential event
        start_i = 0 if i == 0 else tmp['step'].iloc[i-1]
        end_i = tmp['step'].iloc[i] if i < len(tmp) else res_df['step'].max()

        # Select the data between the start and end indices
        v = res_df.loc[(res_df['step'] > start_i) & (res_df['step'] < end_i)]

        # Check if the maximum value in the selected range is above the threshold
        if v[target].max() > q:
            # Find the index of the maximum value in the target column within the range
            idx = v.idxmax()[target]

            # Get the corresponding step value
            step = v.loc[idx, 'step']

            # Define the span for calculating the score
            span = 3 * SIGMA

            # Calculate the score as the sum of target values within the span
            score = res_df.loc[(res_df['step'] > step - span) & (res_df['step'] < step + span), target].sum()

            # Append the results to the list
            res.append([step, target, score])

    # Return the list of events with their scores
    return res


In [None]:
def compare_predictions(valid_ds, index, model, max_chunk_size=1024):
    """
    Compares predicted and actual events for a given index in the validation dataset.

    Parameters:
        valid_ds (Dataset): The validation dataset.
        index (int): The index of the data to be evaluated.
        model (torch.nn.Module): The neural network model used for predictions.
        max_chunk_size (int, optional): Maximum size of chunks to process at a time. Default is 1024.

    Returns:
        Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing DataFrames for predicted and actual values.
    """
    # Set the model to evaluation mode
    model.eval()

    # Disable gradient calculations for performance
    with torch.no_grad():
        # Load data and target from the dataset
        X, Y = valid_ds[index]

        # Move target to the specified device
        Y = Y.to(CFG.DEVICE, non_blocking=True)

        # Initialize a tensor to store predictions
        pred = torch.zeros(Y.shape).to(CFG.DEVICE, non_blocking=True)

        # Initialize hidden state (if applicable)
        hidden_state = None

        # Get the sequence length
        seq_len = X.shape[0]

        # Process the data in chunks
        for j in range(0, seq_len, max_chunk_size):
            # Calculate the end index of the chunk
            end_idx = min(j + max_chunk_size, seq_len)

            # Extract the chunk and move it to the specified device
            X_chunk = X[j:end_idx].float().to(CFG.DEVICE, non_blocking=True)

            # Get predictions and the new hidden state from the model
            y_pred, hidden_state = model(X_chunk, hidden_state)

            # Detach the hidden state to avoid backpropagation through it
            hidden_state = [h.detach() for h in hidden_state]

            # Store the predictions
            pred[j:end_idx, :] = y_pred

            # Clean up to save memory
            del X_chunk, y_pred

        # Free up memory
        clean_memory()

    # Convert the predictions to a DataFrame, applying softmax for probability distribution
    predicted_df = pd.DataFrame(torch.softmax(pred.cpu(), dim=1).numpy(), columns=['wakeup_val', 'onset_val'])

    # Convert the actual values to a DataFrame
    actual_df = pd.DataFrame(Y.cpu().numpy(), columns=['wakeup_val', 'onset_val'])

    # Return the DataFrames for predicted and actual values
    return predicted_df, actual_df


In [None]:
class SleepDatasetTrain(Dataset):
    """
    A PyTorch Dataset for Child Mind Sleep States, focusing on raw anglez and enmo variables.

    Attributes:
        series_ids (list): List of series ids in the dataset.
        events (pd.DataFrame): DataFrame containing event data.
        len_mult (int): Length multiplier to ensure sequence length is a multiple of this value.
        continuous (dict): Dictionary mapping series_id to start and end points for trimming.
        sigma (float): Width of the distribution used for output calculations.
    """
    def __init__(self, series_ids, events, len_mult, continuous=None, sigma=None):
        """
        Initializes the SleepDatasetTrain dataset.

        Parameters:
            series_ids (list): List of series ids in the dataset.
            events (pd.DataFrame): DataFrame containing event data.
            len_mult (int): Length multiplier to ensure sequence length is a multiple of this value.
            continuous (dict, optional): Dictionary for trimming series. Default is None.
            sigma (float, optional): Width of the distribution for output. Default is None.
        """
        self.series_ids = series_ids
        self.continuous = continuous
        self.len_mult = len_mult
        self.events = events
        self.sigma = sigma if events is not None else None

    def load_data(self, series_id):
        """
        Loads and preprocesses data for a given series_id.

        Parameters:
            series_id (int): The series id for which to load the data.

        Returns:
            pd.DataFrame: The preprocessed data for the specified series_id.
        """
        # Load the data from a file
        filename = PATHS.get_series_filename(series_id)
        data = pd.read_parquet(filename)

        # If events data is provided
        if self.events is not None:
            if self.continuous is not None:
                start, end = self.continuous[series_id]
            else:
                start, end = 0, 1000000

            gap = 6*60*12

            # Filter events data
            tmp = self.events[(self.events.series_id == series_id) & (self.events.night >= start) & (self.events.night <= end)]
            data = data[(data.step > (tmp.step.min() - gap)) & (data.step < (tmp.step.max() + gap))]

            # Join events data with the main data
            data = data.set_index(['series_id', 'step']).join(tmp.set_index(['series_id', 'step'])[['event', 'night']]).reset_index()

            # Calculate the distribution for each event type
            if self.sigma == 0:
                self.sigma = 1  # or some other small value
            norm = 1 / np.sqrt(pi / self.sigma)

            for evt in ['wakeup', 'onset']:
                steps = data[data.event == evt]['step'].values
                col = f'{evt}_val'
                data[col] = 0.0
                for i in steps:
                    x = 0.5 * ((data.step.astype(np.int64) - i) / self.sigma) ** 2
                    data[col] += np.exp(-x) * norm
                data[col] /= data[col].sum()

        # Trim the data to be a multiple of len_mult
        n = int((len(data) // self.len_mult) * self.len_mult)
        return data.iloc[:n]
        
    def __len__(self):
        """
        Returns the length of the dataset.

        Returns:
            int: The number of series ids in the dataset.
        """
        return len(self.series_ids)

    def __getitem__(self, index):
        """
        Gets the item at the specified index.

        Parameters:
            index (int): The index of the item.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: A tuple containing feature tensor X and target tensor Y (if sigma is not None).
        """
        # Load the data for the given series id
        series_id = self.series_ids[index]
        data = self.load_data(series_id)

        # Extract features and convert them to a tensor
        X = data[['anglez', 'enmo']].values.astype(np.float32)
        X = torch.from_numpy(X)

        # If sigma is provided, also prepare the target tensor
        if self.sigma is not None:
            Y = data[['wakeup_val', 'onset_val']].values.astype(np.float32)
            Y = torch.from_numpy(Y)
            return X, Y
        else:
            return X


## Model Architecture

In [None]:
class ResidualBiLSTM(nn.Module):
    """
    Bi-directional LSTM with residual connections.
    """
    def __init__(self, hidden_size, n_layers=1, bidir=True):
        super(ResidualBiLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.lstm = nn.LSTM(hidden_size, hidden_size, n_layers,
                            batch_first=True, bidirectional=bidir)
        dir_factor = 2 if bidir else 1
        self.fc1 = nn.Linear(hidden_size * dir_factor, hidden_size * dir_factor * 2)
        self.ln1 = nn.LayerNorm(hidden_size * dir_factor * 2)
        self.fc2 = nn.Linear(hidden_size * dir_factor * 2, hidden_size)
        self.ln2 = nn.LayerNorm(hidden_size)

    def forward(self, x, h=None, c=None):
        # LSTM requires both hidden state (h) and cell state (c)
        if h is None or c is None:
            h, c = [None] * self.n_layers, [None] * self.n_layers
        res, (new_h, new_c) = self.lstm(x, (h, c))
        res = self.fc1(res)
        res = self.ln1(res)
        res = nn.functional.relu(res)
        res = self.fc2(res)
        res = self.ln2(res)
        res = nn.functional.relu(res)
        res = res + x  # skip connection
        return res, new_h, new_c


class MultiResidualBiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, out_size, n_layers, bidir=True):
        super(MultiResidualBiLSTM, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.out_size = out_size
        self.n_layers = n_layers

        self.fc_in = nn.Linear(input_size, hidden_size)
        self.ln = nn.LayerNorm(hidden_size)
        self.res_bilstms = nn.ModuleList(
            [
                ResidualBiLSTM(hidden_size, n_layers=1, bidir=bidir)
                for _ in range(n_layers)
            ]
        )
        self.fc_out = nn.Linear(hidden_size, out_size)

    def forward(self, x, h=None, c=None):
        # Initialize hidden and cell states if not provided
        if h is None or c is None:
            h, c = [None] * self.n_layers, [None] * self.n_layers

        x = self.fc_in(x)
        x = self.ln(x)
        x = F.relu(x)

        new_h, new_c = [], []
        for i, res_bilstm in enumerate(self.res_bilstms):
            x, new_hi, new_ci = res_bilstm(x, h[i], c[i])
            new_h.append(new_hi)
            new_c.append(new_ci)

        x = self.fc_out(x)
        return x, new_h, new_c  # Output + new hidden and cell states

    

class EncoderLayer(nn.Module):
    def __init__(self, in_channels, hidden_size, kernel_size, stride, padding, dilation, use_layernorm, print_shape):
        super(EncoderLayer, self).__init__()
        self.conv = nn.Conv1d(in_channels, hidden_size, kernel_size, stride, padding=padding, dilation=1)
        self.ln = nn.LayerNorm(hidden_size) if use_layernorm else None
        self.print_shape = print_shape
        
    def forward(self, x):
        x = self.conv(x.transpose(-1,-2))
        if self.print_shape:
            print('After Conv', x.shape)
        if self.ln is not None:
            x = self.ln(x.transpose(-1, -2))
        else:
            x = x.transpose(-1,-2)
        if self.print_shape:
            print('After Layernorm', x.shape)
        x = nn.functional.relu(x)
        return x
    

class GRUNET(nn.Module):
    def __init__(self, arch, in_channels, kernel_size, stride, dconv_padding, hidden_size, n_layers, bidir=True, print_shape=False):
        """
        Initialize the GRUNET model.

        Args:
            arch (list of tuples): Architecture configuration for EncoderLayer.
            in_channels (int): Number of input channels.
            kernel_size (int): Size of the convolutional kernel.
            stride (int): Stride value for convolution.
            dconv_padding (int): Padding value for deconvolution.
            hidden_size (int): Size of the hidden layer.
            n_layers (int): Number of GRU layers.
            bidir (bool): Whether to use bidirectional GRU.
            print_shape (bool): Whether to print the shape of tensors during forward pass.
        """
        super(GRUNET, self).__init__()
        
        self.input_size = in_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.hidden_size = hidden_size
        # self.out_size = out_size
        self.n_layers = n_layers
        self.padding = kernel_size//2
        self.print_shape = print_shape
        self.arch = arch
        self.dilation = 1
        assert arch[-1][1] == hidden_size

        self.conv = nn.Sequential(*[EncoderLayer(in_chan, out_chan, ksize, stride=stride, padding=ksize//2, dilation=self.dilation, use_layernorm=True, print_shape=print_shape) for in_chan, out_chan, stride, ksize in self.arch])
        self.res_bigrus = nn.ModuleList(
            [
                ResidualBiGRU(hidden_size, n_layers=1, bidir=bidir)
                for _ in range(n_layers)
            ]
        )
        self.dconv = nn.Sequential(*sum([[nn.ConvTranspose1d(out_chan, in_chan, ksize, stride=stride, padding=ksize//2, dilation=self.dilation, output_padding=1), 
                                  nn.Conv1d(in_chan, in_chan, ksize, stride=1, padding=ksize//2, dilation=self.dilation), nn.ReLU(), 
                                  nn.Conv1d(in_chan, in_chan, ksize, stride=1, padding=ksize//2, dilation=self.dilation), nn.ReLU()] for in_chan, out_chan, stride, ksize in reversed(arch)], []))
        self.output_layer = nn.Conv1d(2, 2, kernel_size=1, stride=1)
        
    def forward(self, x, h=None):
        # if we are at the beginning of a sequence (no hidden state)
        init_shape = x.shape
        if h is None:
            # (re)initialize the hidden state
            h = [None for _ in range(self.n_layers)]

        if self.print_shape:
            print('In', x.shape)
        x = self.conv(x)
        if self.print_shape:
            print('After EncoderLayer', x.shape)
        new_h = []
        for i, res_bigru in enumerate(self.res_bigrus):
            x, new_hi = res_bigru(x, h[i])
            new_h.append(new_hi)
        if self.print_shape:
            print('After GRU', x.shape)
        x = self.dconv(x.transpose(-1, -2))
        if self.print_shape:
            print('After DConv', x.shape)
            
        x = self.output_layer(x)
        x = x.transpose(-1,-2)
            
        if self.print_shape:
            print('After SmoothConv', x.shape)
        
        return x, new_h  # probabilities + hidden states

In [None]:
# Define the architecture
arch = [
    (2, 8, 2, 3),
    (8, 32, 2, 3),
    (32, 64, 2, 3)
]

in_channels = 2
hidden_size = arch[-1][1]
kernel_size = 20
stride = arch[-1][0]
dilation = 1
n_layers = 3
dconv_padding = 3
len_mult = 2**len(arch)

# Create the GRUNET model
net = GRUNET(
    arch=arch,
    in_channels=in_channels,  # You didn't specify out_channels, so I assume it's 2.
    kernel_size=kernel_size,
    stride=stride,
    dconv_padding=dconv_padding,
    hidden_size=hidden_size,
    n_layers=n_layers,
    bidir=True,
    print_shape=True
)

# Generate random input data
X = torch.randn(512, 2).float()

# Forward pass through the network
Z, h = net(X)

# Check if input and output shapes match
assert X.shape == Z.shape


## Training

In [None]:
# Filter out the series_ids that are not in the drop_series list
useable_series_ids = [s for s in series_ids if s not in drop_series]

# If in DEMO_MODE (a configuration flag), limit the number of series IDs to 75 for quicker runs or testing
if CFG.DEMO_MODE:
    useable_series_ids = useable_series_ids[:75]

# Shuffle the usable series IDs for randomness, which can be important for training/validation splits
np.random.shuffle(useable_series_ids)

# Print or retrieve the length of the usable series IDs
len(useable_series_ids)

In [None]:
def get_sigma(epoch):
    """
    Determines the sigma value based on the current epoch.

    The function returns different sigma values depending on the given epoch.
    It's designed to change the sigma value as the training progresses through epochs.

    Parameters:
        epoch (int): The current epoch in the training process.

    Returns:
        int: The sigma value corresponding to the given epoch.
    """
    # For early epochs (less than 4), return a larger sigma value
    if epoch < 4:
        return 90
    # For middle epochs (4 to 6), return a medium sigma value
    elif epoch < 7:
        return 60
    # For later epochs (7 and above), return a smaller sigma value
    return 36


In [None]:
# Constants and Configuration
NUM_FOLDS = 5
LEARNING_RATE = .05
CLIP_VALUE = 5.0
WEIGHT_DECAY = 0.0
WARMUP_PROP = 0.1
EPOCHS = 10
MAX_CHUNK_SIZE = 24 * 60 * 12
SIGMA = 0
valid_ds = None

loss_fct = nn.CrossEntropyLoss()


# Splitting the entire dataset into 5 folds for cross-validation.
for fold, valid_series_ids in enumerate(np.array_split(useable_series_ids, 5)):

    print(f'Fold {fold}')
    
    train_series_ids = [s for s in useable_series_ids if s not in valid_series_ids]
    
    net = GRUNET(arch=arch,in_channels=2, hidden_size=hidden_size, kernel_size=kernel_size, stride=stride, 
                 dconv_padding=dconv_padding, n_layers=n_layers, bidir=True, print_shape=False).to(CFG.DEVICE)
    
    optimizer = torch.optim.Adam(net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)


    train_size = len(train_series_ids)
    steps = train_size * EPOCHS
    warmup_steps = int(steps * WARMUP_PROP)
    scheduler = CosineLRScheduler(optimizer,t_initial= steps,warmup_t=warmup_steps, warmup_lr_init=1e-5,lr_min=1e-6,)
    m = nn.LogSoftmax(dim=0)

    train_loss_history = []
    valid_loss_history = []
    learning_rate_history = []
    mAP_history = []

    # with torch.autograd.detect_anomaly(check_nan=True):
    for epoch in range(EPOCHS):
        SIGMA = get_sigma(epoch)
        
        np.random.shuffle(train_series_ids)
        train_ds = SleepDatasetTrain(train_series_ids, events=train_events, len_mult=len_mult, continuous=continuous, sigma=SIGMA)
        valid_ds = SleepDatasetTrain(valid_series_ids, events=train_events, len_mult=len_mult, sigma=SIGMA)
        print(f'Epoch {epoch}, sigma = {SIGMA}')
        net.train()
        train_loss = 0

        for i in tqdm(range(len(train_ds))):
            X, Y = train_ds[i]
            Y = Y.to(CFG.DEVICE, non_blocking=True)
            if not np.isfinite(Y.sum().cpu()):
                print(f'Nan Target {i}')

            pred = torch.zeros(Y.shape).to(CFG.DEVICE, non_blocking=True)
            optimizer.zero_grad()
            scheduler.step(i+train_size*epoch)
            h = None

            seq_len = X.shape[0]
            for j in range(0, seq_len, MAX_CHUNK_SIZE):
                X_chunk = X[j: j + MAX_CHUNK_SIZE].float().to(CFG.DEVICE, non_blocking=True)
                y_pred, h = net(X_chunk, h)
                h = [hi.detach() for hi in h]
                pred[j: j+MAX_CHUNK_SIZE, :] = y_pred

                del X_chunk, y_pred

            if not np.isfinite(pred.sum().cpu().detach()):
                print(f'Nan Pred before logsoftmax {i}')
            pred = m(pred.float())
            if not np.isfinite(pred.sum().cpu().detach()):
                print(f'Nan Pred after logsoftmax {i}')
            loss = loss_fct(pred.float(), Y.float())
            loss.backward()
            train_loss += loss.item()

            nn.utils.clip_grad_norm_(net.parameters(), max_norm=CLIP_VALUE)
            optimizer.step()

            del pred, loss, Y, X, h
            clean_memory()

        # Calculating average training loss for the epoch.
        train_loss /= len(train_ds)
        print(f'Epoch {epoch} train loss = {train_loss}')
        train_loss_history.append(train_loss)
        print(f'Learning Rate = {optimizer.param_groups[0]["lr"]}')
        learning_rate_history.append(optimizer.param_groups[0]["lr"])
        print('Evaluate Validation Loss and mAP')
        net.eval()
        val_loss = 0
        with torch.no_grad():
            for i in tqdm(range(len(valid_ds))):
                X, Y = valid_ds[i]
                Y = Y.to(CFG.DEVICE, non_blocking=True)
                pred = torch.zeros(Y.shape).to(CFG.DEVICE, non_blocking=True)

                h = None

                seq_len = X.shape[0]
                for j in range(0, seq_len, MAX_CHUNK_SIZE):
                    X_chunk = X[j: j + MAX_CHUNK_SIZE].float().to(CFG.DEVICE, non_blocking=True)
                    y_pred, h = net(X_chunk, h)
                    h = [hi.detach() for hi in h]
                    pred[j: j+MAX_CHUNK_SIZE, :] = y_pred

                    del X_chunk, y_pred
                pred = m(pred.float())
                loss = loss_fct(pred.float(), Y.float())
                val_loss += loss.item()
                del pred, loss, Y, X, h
                clean_memory()
            val_loss /= len(valid_ds)

            if epoch >= 1:
                all_df = []
                all_truth_df = []
                for i in tqdm(range(len(valid_ds))):
                    series_id = valid_ds.series_ids[i]
                    # print(series_id)
                    data = valid_ds.load_data(series_id)
                    res_df, act_df = compare_predictions(valid_ds, i, net)
                    res_df['step'] = data['step']
                    onset_pred = get_predictions(res_df, target='onset_val', SIGMA=SIGMA)
                    wakeup_pred = get_predictions(res_df, target='wakeup_val', SIGMA=SIGMA)
                    pred_df = pd.DataFrame(wakeup_pred + onset_pred, columns=['step', 'event', 'score'])
                    pred_df['series_id'] = series_id
                    pred_df['row_id'] = pred_df.index
                    pred_df = pred_df.sort_values(by='step').drop_duplicates(subset='step').reset_index(drop=True)

                    all_df.append(pred_df)
                    all_truth_df.append(train_events[(train_events.series_id == series_id) & (train_events.step <= data.step.max()) & (train_events.step >= data.step.min())])

                pred_df = pd.concat(all_df).reset_index(drop=True)
                pred_df['row_id'] = pred_df.index
                pred_df = pred_df[['row_id', 'series_id', 'step', 'event', 'score']]
                pred_df = pred_df.sort_values(by=['series_id', 'step'])
                pred_df.event = pred_df.event.map(lambda x: x.replace('_val', ''))
                #pred_df = renormalize(pred_df)
                truth_df = pd.concat(all_truth_df).reset_index(drop=True)
                if len(pred_df) > 0:
                    map_val = mapmetric.event_detection_ap(solution=truth_df, submission=pred_df[['series_id', 'step', 'event', 'score']], tolerances=tolerances)
                else:
                    print(f'Empty pred dataframe')
                    map_val = 0

                tmp = [x for x in mAP_history if not np.isnan(x)]
                if len(tmp) > 0 and map_val > np.max(tmp):
                    torch.save(net.state_dict(), f'model_best_mAP{fold}.pth')
            else:
                map_val = np.nan

        print(f'Epoch {epoch} validation loss = {val_loss}, mAP = {map_val}')
        valid_loss_history.append(val_loss)
        mAP_history.append(map_val)
        
        
    torch.save(net.state_dict(), f'model_resid_bigru_fold{fold}.pth')
    iplot({'data': [Scatter(y=train_loss_history, name='train'), Scatter(y=valid_loss_history, name='valid')], 'layout': Layout(title=f'KLDivLoss {fold}')})
    iplot({'data': [Scatter(y=learning_rate_history, name='lr')], 'layout': Layout(title=f'Learning Rate {fold}')})
    iplot({'data': [Scatter(y=mAP_history, name='mAP')], 'layout': Layout(title=f'Event mAP {fold}')})
    print('Break after 1 to save GPU!')
    break

## Visualization

In [None]:
i = 0
series_id = valid_ds.series_ids[i]
print(series_id)
data = valid_ds.load_data(series_id)
res_df, act_df = compare_predictions(valid_ds, i, net)
res_df['step'] = data['step']
act_df['step'] = data['step']

iplot({'data': [Scatter(x=res_df['step'], y=res_df['wakeup_val'], name='pred'), Scatter(x=act_df['step'], y=act_df['wakeup_val'], name='act')], 'layout': Layout(title='wakeup')})
iplot({'data': [Scatter(x=res_df['step'], y=res_df['onset_val'], name='pred'), Scatter(x=act_df['step'], y=act_df['onset_val'], name='act')], 'layout': Layout(title='onset')})