# **aha-sleep-fe-cnn**
**sleep event** transforms - read event csv with converters
1. remove rows with NANs
2. transform timestamp -> uint32 minutes past 2017-01-01
3. transform event from text to enum (NAN, **ONSET**, SLEEP, **WAKEUP**, WAKE)
4. drop "step"
**series parquet** transforms - read series parquet with transforms
1. remove rows with NANs
2. transform timestamp -> uint32 minutes past 2017-01-01
3. transform zangle -> uint16 zangle
4. transform enmo -> uint16 enmo * 1000
**feature label** generation
<br>generate labels for series - X_train series, Y labels
```
for each series_id 
    set event onset time
    set event wakeup time
    for each series row
        if series time < event wakeup time
            series row label = SLEEP
        else if series time = event wakeup time
            series row label = WAKEUP
        else if series time > event wakeup time AND series time < event onset time
            series row label = WAKE
        else if series time = event onset time
            series row label = ONSET
```

**model** 
CNN using X_train, Y labels

 

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/child-mind-institute-detect-sleep-states/train_series.parquet
/kaggle/input/child-mind-institute-detect-sleep-states/sample_submission.csv
/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv
/kaggle/input/child-mind-institute-detect-sleep-states/test_series.parquet


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import seaborn as sns
import pyarrow.parquet as pq

In [5]:
# column labels
SERIES_ID_COLUMN = 'series_id'
NIGHT_COLUMN = 'night'
EVENT_COLUMN = 'event'
STEP_COLUMN = 'step'
TIME_COLUMN = 'timestamp'

NAN_TIME = 0

ANGLEZ_COLUMN = 'anglez'
ENMO_COLUMN = 'enmo'

# event labels
ONSET_EVENT_LABEL = 'onset'
WAKEUP_EVENT_LABEL = 'wakeup'

# event enumeration
NAN_EVENT = 0
ONSET_EVENT = 1
SLEEP_EVENT = 2
WAKEUP_EVENT = 3
WAKE_EVENT = 4


In [49]:
import datetime

def remove_rows_with_nan(row):
  """Removes rows with NaN.

  Args:
    row: A Pandas Series object representing the current row of the CSV file.

  Returns:
    None if the row contains NaN, otherwise the row.
  """

  if row.isna().any():
    return None
  else:
    return row

def convert_to_minutes(date_string):
    """Converts a date string to minutes past 2017-01-01.

    Args:
    date_string: A string in the format YYYY-MM-DDTHH:MM:SS-TZ.

    Returns:
    An integer representing the number of minutes since 2017-01-01.
    """
    #print(f"date_string->,{date_string}")
    if len(date_string) == 0:
        return None # NAN_TIME
    
    # 2018-08-14T22:26:00-0400
    date_time = datetime.datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S-%f")
    #return (date_time - datetime.datetime(2018, 1, 1)).total_seconds() // 60
    #return np.int32((date_time - datetime.datetime(2017, 1, 1)).total_seconds() // 60)
    time_in_minutes = np.int32((date_time - datetime.datetime(2017, 1, 1)).total_seconds() // 60)
    #print(time_in_minutes)
    return time_in_minutes 


def convert_event_enumeration(event_string):
    if event_string == ONSET_EVENT_LABEL:
        return ONSET_EVENT
    elif event_string == WAKEUP_EVENT_LABEL:
        return WAKEUP_EVENT
    return EVENT_COLUMN  # skip column label line #1

In [50]:
print(convert_to_minutes('2018-08-14T22:26:00-0400'))
print(convert_event_enumeration('onset'))
print(convert_event_enumeration('wakeup'))
print(convert_event_enumeration('dunno'))

850946
1
3
event


In [51]:
#converters = {'remove_rows_with_nan': {'column_1': remove_rows_with_nan, 'column_2': remove_rows_with_nan}}

#converters = {'remove_rows_with_nan': {STEP_COLUMN: remove_rows_with_nan, TIME_COLUMN: remove_rows_with_nan}, TIME_COLUMN: convert_to_minutes, EVENT_COLUMN: convert_event_enumeration}
converters = {TIME_COLUMN: convert_to_minutes, EVENT_COLUMN: convert_event_enumeration}
# converters = {'timestamp': convert_to_minutes, 'event': convert_event_enumeration}

In [55]:
train_event = pd.read_csv('/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv', converters=converters)
print(len(train_events))
# drop NAN rows
train_event = train_event.dropna(axis=0)
print(len(train_event))
# drop night, step columns
train_event = train_event.drop(NIGHT_COLUMN, axis=1)
train_event = train_event.drop(STEP_COLUMN, axis=1)

train_event[TIME_COLUMN] = train_event[TIME_COLUMN].astype('uint32')
print(train_event.iloc[0])
print(train_event[TIME_COLUMN].dtype)

pd.set_option('display.max_rows', None)
train_event

9585
9585
series_id    038441c925bb
event                   1
timestamp          850946
Name: 0, dtype: object
uint32


Unnamed: 0,series_id,event,timestamp
0,038441c925bb,1,850946
1,038441c925bb,3,851441
2,038441c925bb,1,852217
3,038441c925bb,3,852821
4,038441c925bb,1,853863
5,038441c925bb,3,854230
6,038441c925bb,1,855300
7,038441c925bb,3,855768
10,038441c925bb,1,858138
11,038441c925bb,3,858685


In [43]:
train_events = pd.read_csv('/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv')
# drop NAN rows
#train_events = train_events.dropna(axis=0)
pd.set_option('display.max_rows', None)
train_events

Unnamed: 0,series_id,night,event,step,timestamp
0,038441c925bb,1,onset,4992.0,2018-08-14T22:26:00-0400
1,038441c925bb,1,wakeup,10932.0,2018-08-15T06:41:00-0400
2,038441c925bb,2,onset,20244.0,2018-08-15T19:37:00-0400
3,038441c925bb,2,wakeup,27492.0,2018-08-16T05:41:00-0400
4,038441c925bb,3,onset,39996.0,2018-08-16T23:03:00-0400
5,038441c925bb,3,wakeup,44400.0,2018-08-17T05:10:00-0400
6,038441c925bb,4,onset,57240.0,2018-08-17T23:00:00-0400
7,038441c925bb,4,wakeup,62856.0,2018-08-18T06:48:00-0400
8,038441c925bb,5,onset,,
9,038441c925bb,5,wakeup,,
