# YASA feature extraction
This notebook attempts to run the YASA out-of-the-box model on our seals and examines the accuracy to see if this capstone project is even needed at all

In [1]:
import yasa
import mne
import pandas as pd
import numpy as np
import pytz
import datetime
from sklearn.metrics import confusion_matrix

In [2]:
path_to_edf = '../data/raw/01_edf_data/test12_Wednesday_05_ALL_PROCESSED.edf'
raw = mne.io.read_raw_edf(path_to_edf, include=['EEG_ICA5'], preload=False)

Extracting EDF parameters from /Users/michael/Desktop/capstone-seal-sleep/jessie-workshop/ecophys-ecoviz/data/raw/01_edf_data/test12_Wednesday_05_ALL_PROCESSED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


In [3]:
info = raw.info
sfreq = info['sfreq']
edf_start_time = info['meas_date']
# Define the PST timezone
pst_timezone = pytz.timezone('America/Los_Angeles')
# Convert to datetime object in PST
if isinstance(edf_start_time, datetime.datetime):
    # If it's already a datetime object, just replace the timezone
    recording_start_datetime = edf_start_time.replace(tzinfo=None).astimezone(pst_timezone)
    # for some reason using .replace(tzinfo=...) does weird shit - offsets based of LMT instead of UTC and gets confusing
    # recording_start_datetime = edf_start_time.replace(tzinfo=pst_timezone)
elif isinstance(edf_start_time, (int, float)):
    # Convert timestamp to datetime in PST
    recording_start_datetime = pst_timezone.localize(datetime.datetime.fromtimestamp(edf_start_time))

# Load Labels

In [4]:
labels_df = pd.read_csv('../data/raw/02_hypnogram_data/test12_Wednesday_06_Hypnogram_JKB_1Hz.csv')
labels_df['R.Time'] = pd.to_datetime(labels_df['R.Time']).dt.tz_localize('America/Los_Angeles')


In [5]:
start_seconds = int((labels_df['R.Time'].iloc[0] - recording_start_datetime).total_seconds())
end_seconds = start_seconds + 24 * 60 * 60 # 1 day

In [6]:
raw.crop(start_seconds, end_seconds)
print(len(raw.get_data('EEG_ICA5')[0]) / 500)
print(len(labels_df))

86400.002
294692


# Sleep Staging

In [7]:
sleep_stage = yasa.SleepStaging(raw, eeg_name='EEG_ICA5')

NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).


In [8]:
sleep_stage.fit()
hypno = sleep_stage.predict()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


### Map our labels to theirs

In [10]:
labelmap = {
    'W':'Active Waking',
    'N1': 'Quiet Waking',
    'N2': 'SWS',
    'N3': 'SWS',
    'R': 'REM'
}

In [11]:
labels_df_subset = labels_df.iloc[:(end_seconds - start_seconds)].copy()

In [12]:
labels_df_subset['Yasa_predicted_label'] = np.array([[x]*30 for x in hypno]).flatten()

In [13]:
labels_df_subset['Yasa_mapped_label'] = labels_df_subset['Yasa_predicted_label'].apply(lambda x: labelmap[x])

In [14]:
labels = ['Active Waking', 'Quiet Waking', 'SWS', 'REM']
conf_matr = confusion_matrix(labels_df_subset['Simple.Sleep.Code'], labels_df_subset['Yasa_mapped_label'],
                             labels=labels)
conf_matr = pd.DataFrame(conf_matr,
                         index=['True_' + label for label in labels],
                         columns=['Predicted_'+ label for label in labels])
conf_matr

Unnamed: 0,Predicted_Active Waking,Predicted_Quiet Waking,Predicted_SWS,Predicted_REM
True_Active Waking,24990,1168,321,372
True_Quiet Waking,12356,1782,169,465
True_SWS,8119,1491,12395,476
True_REM,12323,110,51,300


In [15]:
print('YASA accuracy:',
      np.mean(labels_df_subset['Simple.Sleep.Code'] == labels_df_subset['Yasa_mapped_label']))

YASA accuracy: 0.4567939814814815
