# Sleep Classification Algorithms 

In this notebook we will define some of the most common sleep algorithms used to date (excluding deep learning approches which are presented in a separate file). (This is an exploratory notebook and not the finalized one we use for our batch processing)

In [3]:
# General Libraries
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
import pampro 
from datetime import datetime, date, time, timedelta
from pampro import data_loading, Time_Series, Channel, channel_inference, Bout , triaxial_calibration
from io import StringIO # required by pd.read_csv. It expects an object with a .read() method.
from glob import glob
import re 

# Machine Learning Libraries
from sklearn import metrics
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression, SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import ShuffleSplit, train_test_split, cross_val_predict, KFold, LeaveOneOut
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

%matplotlib inline

The first step on our journey to classify sleep-wake cycles is to define functions that are able to input all sorts of accelerometer and heart rate data.
The current version of this software focuses on accelerometer data but next iterations will integrate HR signals (TO BE ADDED)

Additionally, we will add definition functions for our ground truths (Polysomnography (PSG) gold standard method, sleep diaries, etc).

** Potentially add an attribute or feature for age group to make sure we are using the best classifier or that we are not missinterpretting the data.

## Accelerometer Data Preparation

For our Accelerometer Data Preparation section we rely on PAMPRO: https://github.com/Thomite/pampro, which was developed by Tom White (@Thomite on Github) from our lab. We focus on Axivity, Actigraph, Geneactiv and ActivPAL data preprocessing here as they are the most common accelerometers used for research purposes these days but feel free to add any commits. The Preprocessing Steps done with PAMPRO are defined in sleep_data_preprocessing.py in this repository.

In [None]:
def load_acc_data(filename):
    # load data
    acc_df = pd.read_hdf(filename) # index_col="line"?
    # Check Activity
    acc_df["ActivityValue"] = acc_df["activity"]
    acc_df["NonActive"] = acc_df["activity"]== 0
    #Timestamps
    # get timestamps from Pampro here
    
    # add other potential features of use here
    
    
    #del unecessary features here
    
    return acc_df

filename = "sleepdata/actigraphy/examplefilename001.csv"

acc_df = load_acc_data(filename)

acc_df.head()
# Check how it looks


In [None]:
# Get time periods for Cole-Kripke, Sadeh and Saznov algorithms

def get_start_period(header):
    
    start_date = header[3].split("Start Date")[1].strip()
    start_time = header[2].split("Start Time")[1].strip()
    
    return start_time, start_date

def get_timestamp(start_date, start_time):
    return datetime.strptime(start_date + " " + start_time, '%m/%d/%Y %H:%M:%S')

def get_time_interval(n):
    minutes = n / 2
    hours = minutes/ 60
    rest_minutes = minutes - (hours * 60)
    rest_seconds = "30" if n%2 == 1 else "00"
    return "%02d:%02d:%s" % (hours, rest_minutes, rest_seconds)

In [None]:
# Prepare data from accelerometer for algorithm use
# Adapted from Aarti's and Actigraph code

def from_file_to_acc_df(filename):

    with open(filename) as f:
        lines = f.readlines()
        header = lines[:10]
    
        start_time, start_date = get_start_period(header)

        csv = io.StringIO("".join(lines[10:]))
      
    acc_df = pd.read_csv(csv)

    # Annotate data with time information
    ts = get_timestamp(start_date, start_time)
    pts = pd.Timestamp(ts)
    acc_df["time"] = pd.date_range(pts, periods=acc_df.shape[0], freq='S')

    
    #https://actigraph.desk.com/customer/en/portal/articles/2515585-where-can-i-find-documentation-for-the-sadeh-and-cole-kripke-algorithms-
    # The maximal value for each axis is forced to be 300
    #acc_df[["Axis1","Axis2","Axis3"]] = acc_df[["Axis1","Axis2","Axis3"]].clip(upper=300) 
    acc_df[["X","Y","Z"]] = acc_df[["X","Y","Z"]].clip(upper=300) 

    
    # Group rows by minute
    #df = df.resample('1Min', on="time").mean().reset_index()
    acc_df = acc_df.resample('1Min', on="time").sum().reset_index()

    # Add column to check for activity
    #acc_df["NonActive"] = acc_df[["Axis1","Axis2","Axis3"]].apply(sum, axis=1) == 0
    acc_df["NonActive"] = acc_df[["X","Y","Z"]].apply(sum, axis=1) == 0

    
    acc_df["actValue"] = acc_df["X"]
    #acc_df["actValue"] = acc_df["Axis1"]

    return acc_df

filename = "sleepdata/accelerometer/exampleacc001.csv"

acc_df = from_file_to_acc_df(filename)

# Load and deal with training data
This data comes from either PSG, reduced array or diary

In [None]:
# PSG loading Adapted from Aarti's project:


def  load_PSG(filename):
    PSG_df = pd.read_csv(filename, index_col="line")
    
    PSG_df["NonActive"] = PSG_df["activity"] == 0
    PSG_df["ActivityValue"] = PSG_df["activity"]
    PSG_df["time"] = pd.to_datetime(PSG_df["linetime"])
    PSG_df["gt"] = PSG_df["stage"] > 0
    
    del PSG_df["linetime"]
    
    return PSG_df

filename = "sleepdata/PSGdata/PSGnight_001.csv"
PSG_df = load_PSG(filename)

PSG_df = PSG_df[PSG_df["interval"] != "EXCLUDED"]
PSG_df["active"] = (PSG_df["interval"] == "ACTIVE").astype(int)


In [None]:
# Reduced Array loading

def  load_RedArray (filename):
    RedArray_df = pd.read_csv(filename, index_col="line")
    
    RedArray_df["NonActive"] = RedArray_df["activity"] == 0
    RedArray_df["ActivityValue"] = RedArray_df["activity"]
    RedArray_df["time"] = pd.to_datetime(PSG_df["linetime"])
    RedArray_df["gt"] = RedArray_df["stage"] > 0
    
    del RedArray_df["linetime"]
    
    return RedArray_df

filename = "sleepdata/ReducedArraydata/RedArray_001.csv"
RedArray_df = load_RedArray(filename)

RedArray_df = RedArray_df[RedArray_df["interval"] != "EXCLUDED"]
RedArray_df["active"] = (RedArray_df["interval"] == "ACTIVE").astype(int)

In [None]:
# Implement Data Loading from Diary (Complexity here is different diary types will include some fields and some will not). 
# Use conditional statements for those cases (only real required fields are time in bed, time awake, all other should be optional)


#//////////////////////////////////////////////////ADD DIARY PROCESSING HERE//////////////////


# Heuristic Approaches

Our first set of algorithms will include some of the best cited algorithms for actigraphy sleep-wake classification using the so-called heuristic approaches. These algorithms include the Sadeh, Cole-Kripke, Saznov and Van Hees algorithm.

(Potentially add others in the near future).

In [None]:
# Implementation of Sadeh's algorithm using 6 windows prior and centered on the the middle of 11 total windows. We follow: #https://actigraph.desk.com/customer/en/portal/articles/2515585-where-can-i-find-documentation-for-the-sadeh-and-cole-kripke-algorithms-

def sadeh(acc_df, min_value=0):
    window_past = 6
    window_nat = 11
    window_centered = 11
    
    acc_df["_mean"] = acc_df["ActivityValue"].rolling(window=window_centered, center=True, min_periods=1).mean()
    acc_df["_std"] = acc_df["ActivityValue"].rolling(window=window_past, min_periods=1).std()
    acc_df["_nat"] = ((acc_df["ActivityValue"] >= 50) & (acc_df["actValue"] < 100)).rolling(window=window_nat, center=True, min_periods=1).sum()
    
    df["_LocAct"] = (acc_df["ActivityValue"] + 1.).apply(np.log) 
    
    acc_df["sadeh"] = (7.601 - 0.065 * acc_df["_mean"] - 0.056 * acc_df["_std"] - 0.0703 *acc_df["_LocAct"] - 1.08 * acc_df["_nat"])
    acc_df["sadeh"] = (acc_df["sadeh"]  > min_value).astype(int)

    del acc_df["_mean"]
    del acc_df["_std"]
    del acc_df["_nat"]
    del acc_df["_LocAct"]

sadeh(acc_df)

In [None]:
# Cole-Kripke algorithm, again following ActiGraph's site instructions
def cole(acc_df):
    acc_df["_A0"] = acc_df["ActivityValue"]
    for i in range(1,5):
        acc_df["_A-%d" % (i)] = acc_df["ActivityValue"].shift(i).fillna(0.0)
    for i in range(1,3):
        acc_df["_A+%d" % (i)] = acc_df["ActivityValue"].shift(-i).fillna(0.0)

    w_m4, w_m3, w_m2, w_m1, w_0, w_p1, w_p2 = [404, 598, 326, 441, 1408, 508, 350]
    p = 0.00001
    
    cole = p * (w_m4 * acc_df["_A-4"] + w_m3 * acc_df["_A-3"] + w_m2 * acc_df["_A-2"] + w_m1 * acc_df["_A-1"] + w_0 * acc_df["_A0"] + w_p1 * acc_df["_A+1"] + w_p2 * acc_df["_A+2"])
    acc_df["cole"] = (cole < 1.0).astype(int)
        
cole(acc_df)

In [None]:
# Saznov Sleep Classification Algorithm (DERIVED IN INFANTS), adapted from TILMANNES paper: https://onlinelibrary.wiley.com/doi/pdf/10.1111/j.1365-2869.2008.00706.x
# which reflects on :https://www.semanticscholar.org/paper/Activity-based-sleep-wake-identification-in-Sazonov-Sazonova/1b152434c886024a8a2b20b31121abd9af953e0f

def saznov(acc_df):
    for w in range(1,10):
        acc_df["_w%d" % (w-1)] = acc_df["ActivityValue"].rolling(window=w, min_periods=1).max()

    acc_df["saznov"] = 1.99604  - 0.1945 * acc_df["_w0"]- 0.09746 * acc_df["_w1"]- 0.09975 * acc_df["_w2"]- 0.10194 * acc_df["_w3"]\
                            - 0.08917 * acc_df["_w4"]- 0.08108 * acc_df["_w5"]- 0.07494 * acc_df["_w6"]- 0.07300 * acc_df["_w7"]\
                            - 0.10207 * acc_df["_w8"]
                            
    for w in range(1,10):
        del acc_df["_w%d" % (w-1)]

    acc_df["saznov"] = (acc_df["saznov"]  > 0.0).astype(int)  
saznov(acc_df)

In [5]:
# Implement Van Hees Algorithm (2018, SPT based) in Python here. Adapt from R code

## Statistical Learning Approaches

Multiple methods have shown different levels of success, we will explore Naive Bayes, Regularized Logistic Regression, Random Forrest, Adaboost and Extreme Gradient Boosting (Potentially include only XGBoost).

## Deep Learning Approaches

During the last 5 years, the resurgence of artificial neural networks by improved computing capabilities through the use of GPUs has popularized these approaches in time-series inferences, we present a separate pipeline for deep learning approaches in our repository under sleep_classification_deep. Please make sure you have installed the appropriate libraries and packages and have set up either a GPU rig or cloud computing services for these purposes. 