# **aha-sleep-fe-cnn**
**sleep event** transforms - read event csv with converters
1. remove rows with NANs
2. transform timestamp -> uint32 minutes past 2017-01-01
3. transform event from text to enum (NAN, **ONSET**, SLEEP, **WAKEUP**, WAKE)
4. drop "step"
5. drop "night"
**series parquet** transforms - read series parquet with transforms
1. remove rows with NANs
2. transform timestamp -> uint32 minutes past 2017-01-01
3. transform zangle -> uint16 zangle
4. transform enmo -> uint16 enmo * 1000
**feature label** generation
<br>generate labels for series - X_train series, Y labels
```
for each series_id 
    set event onset time
    set event wakeup time
    for each series row
        if series time < event wakeup time
            series row label = SLEEP
        else if series time = event wakeup time
            series row label = WAKEUP
        else if series time > event wakeup time AND series time < event onset time
            series row label = WAKE
        else if series time = event onset time
            series row label = ONSET
```

**model** 
1. train CNN using X_train, Y labels
2. optimize learning rate
3. forecast
4. evaluate


 

In [None]:
!pip install icecream



In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import seaborn as sns
import pyarrow.parquet as pq
import tensorflow as tf

In [None]:
from icecream import ic
ic(tf.__version__)

In [None]:
####################################################################
import wandb
#wandb_enabled = True       # on -> interactive
wandb_enabled = False     # off -> submission

if wandb_enabled:
    wandb.login()

In [None]:
# read raw csv & print all rows
# train_events = pd.read_csv('/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv')
# pd.set_option('display.max_rows', None)
# train_events

In [None]:
####################################################################
from types import SimpleNamespace
tuner = SimpleNamespace(
    # column labels
    SERIES_ID_COLUMN = 'series_id',
    NIGHT_COLUMN = 'night',
    EVENT_COLUMN = 'event',
    STEP_COLUMN = 'step',
    TIME_COLUMN = 'timestamp',

    NAN_TIME = 0,

    ANGLEZ_COLUMN = 'anglez',
    ENMO_COLUMN = 'enmo',

    # event labels
    ONSET_EVENT_LABEL = 'onset',
    WAKEUP_EVENT_LABEL = 'wakeup',

    # event enumeration
    NAN_EVENT = 0,
    ONSET_EVENT = 1,
    SLEEP_EVENT = 2,
    WAKEUP_EVENT = 3,
    WAKE_EVENT = 4
)


# converters
Converters
==========
## remove_rows_with_nan
## convert_to_minutes
## convert_event_enumeration

In [None]:
import datetime

def remove_rows_with_nan(row):
  """Removes rows with NaN.

  Args:
    row: A Pandas Series object representing the current row of the CSV file.

  Returns:
    None if the row contains NaN, otherwise the row.
  """

  if row.isna().any():
    return None
  else:
    return row

def convert_to_seconds(date_string):
    """Converts a date string to seconds past 2017-01-01.

    Args:
    date_string: A string in the format YYYY-MM-DDTHH:MM:SS-TZ.

    Returns:
    An integer representing the number of seconds since 2017-01-01.
    """
    #print(f"date_string->,{date_string}")
    if len(date_string) == 0:
        return None # NAN_TIME
    
    # 2018-08-14T22:26:00-0400
    date_time = datetime.datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S-%f")
    time_in_seconds = np.int32((date_time - datetime.datetime(2017, 1, 1)).total_seconds())
    #print(time_in_minutes)
    return time_in_seconds 

def convert_to_minutes(date_string):
    """Converts a date string to minutes past 2017-01-01.

    Args:
    date_string: A string in the format YYYY-MM-DDTHH:MM:SS-TZ.

    Returns:
    An integer representing the number of minutes since 2017-01-01.
    """
    #print(f"date_string->,{date_string}")
    if len(date_string) == 0:
        return None # NAN_TIME
    
    # 2018-08-14T22:26:00-0400
    date_time = datetime.datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S-%f")
    #return (date_time - datetime.datetime(2018, 1, 1)).total_seconds() // 60
    #return np.int32((date_time - datetime.datetime(2017, 1, 1)).total_seconds() // 60)
    time_in_minutes = np.int32((date_time - datetime.datetime(2017, 1, 1)).total_seconds() // 60)
    #print(time_in_minutes)
    return time_in_minutes 


def convert_event_enumeration(event_string):
    if event_string == tuner.ONSET_EVENT_LABEL:
        return tuner.ONSET_EVENT
    elif event_string == tuner.WAKEUP_EVENT_LABEL:
        return tuner.WAKEUP_EVENT
    return tuner.NAN_EVENT  

def convert_zangle(zangle_string):
    zangle_float = np.float32(zangle_string)
    zangle = np.int16(zangle_float)

    return zangle

def convert_enmo(enmo_string):
    enmo_float = np.float32(enmo_string)
    enmo = np.uint16(enmo_float*1000)

    return enmo


In [None]:
# unit test converters
print(convert_to_seconds('2018-08-14T22:26:00-0400'))
print(convert_to_seconds(''))
print(convert_to_minutes('2018-08-14T22:26:00-0400'))
print(convert_to_minutes(''))

print(convert_to_seconds('2018-09-06T04:59:55-0400'))
duration = convert_to_seconds('2018-09-06T04:59:55-0400') - convert_to_seconds('2018-08-14T22:26:00-0400')
print(f"begin-end-trial duration (seconds)->{duration}")

print(convert_event_enumeration('onset'))
print(convert_event_enumeration('wakeup'))
print(convert_event_enumeration('dunno'))

print(convert_zangle('2.636700'))
print(convert_zangle('-90.636700'))

print(convert_enmo('0.0216'))

In [None]:
event_converters = {tuner.TIME_COLUMN: convert_to_seconds, tuner.EVENT_COLUMN: convert_event_enumeration}
#event_converters = {tuner.TIME_COLUMN: convert_to_minutes, tuner.EVENT_COLUMN: convert_event_enumeration}
# converters = {'timestamp': convert_to_minutes, 'event': convert_event_enumeration}
#converters = {'remove_rows_with_nan': {STEP_COLUMN: remove_rows_with_nan, TIME_COLUMN: remove_rows_with_nan}, TIME_COLUMN: convert_to_minutes, EVENT_COLUMN: convert_event_enumeration}


In [None]:
train_event = pd.read_csv('/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv', 
                          converters=event_converters)
print(f"raw # event rows - > {len(train_event)}")
# drop NAN rows
train_event = train_event.dropna(axis=0)
print(f"drop NAN # event rows - > {len(train_event)}")
# drop night, step columns
train_event = train_event.drop(tuner.NIGHT_COLUMN, axis=1)
train_event = train_event.drop(tuner.STEP_COLUMN, axis=1)

train_event[tuner.TIME_COLUMN] = train_event[tuner.TIME_COLUMN].astype('uint32')
print(train_event.iloc[0])
print(train_event[tuner.TIME_COLUMN].dtype)

#pd.set_option('display.max_rows', None)
train_event

In [None]:
series_id = train_event.loc[0]['series_id']
series_id

# Train Series
* metrics at 5 sec intervals
* ~86,400 per day (NAN rows will be deleted)


In [None]:
parquet_train_series = '/kaggle/input/child-mind-institute-detect-sleep-states/train_series.parquet'
#train_series = pq.read_table(parquet_train_series).to_pandas()

train_series = pq.read_table(parquet_train_series,
                             filters=[[('series_id', '=', series_id)],]).to_pandas()
train_series

In [None]:
print(f"raw # train_series rows - > {len(train_series)}")
# drop NAN rows
train_series = train_series.dropna(axis=0)
print(f"drop NAN # train_series rows - > {len(train_series)}")
# drop step column
print(train_series.columns)
train_series = train_series.drop(tuner.STEP_COLUMN, axis=1)
train_series

In [None]:
# train_series_x = train_series
# train_series_x[tuner.TIME_COLUMN] = train_series_x[tuner.TIME_COLUMN].apply(convert_to_seconds)
# train_series_x

In [None]:
train_series_x = train_series
train_series_x[tuner.TIME_COLUMN] = train_series_x[tuner.TIME_COLUMN].apply(convert_to_seconds)
train_series_x[tuner.ANGLEZ_COLUMN] = train_series[tuner.ANGLEZ_COLUMN].apply(convert_zangle)
train_series_x[tuner.ENMO_COLUMN] = train_series[tuner.ENMO_COLUMN].apply(convert_enmo)
train_series_x

In [None]:
# train_series = train_series[tuner.TIME_COLUMN].apply(convert_to_minutes)
# train_series = train_series[tuner.ANGLEZ_COLUMN].apply(convert_zangle)
# train_series = train_series[tuner.ENMO_COLUMN].apply(convert_enmo)

# train_series

In [None]:
# train_series_time_column = train_series[tuner.TIME_COLUMN].apply(convert_to_minutes)
# train_series_zangle_column = train_series[tuner.ANGLEZ_COLUMN].apply(convert_zangle)
# train_series_enmo_column = train_series[tuner.ENMO_COLUMN].apply(convert_enmo)

# train_series_x = train_series.assign('tuner.TIME_COLUMN'=train_series_time_column)

In [None]:
train_series[tuner.TIME_COLUMN] = train_series[tuner.TIME_COLUMN].apply(convert_to_seconds)
train_series[tuner.ANGLEZ_COLUMN] = train_series[tuner.ANGLEZ_COLUMN].apply(convert_zangle)
train_series[tuner.ENMO_COLUMN] = train_series[tuner.ENMO_COLUMN].apply(convert_enmo)

train_series