In [None]:
!pip install mne

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mne
  Downloading mne-1.2.2-py3-none-any.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 12.4 MB/s 
Installing collected packages: mne
Successfully installed mne-1.2.2


In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import mne
import glob
import os

In [None]:
path_telemetry = "/content/drive/MyDrive/BU Projects/CS777/EEG_Project/sleep-edf-database-expanded-1.0.0/sleep-telemetry/"

In [None]:
def make_stage_map(stages):
  hashmap = {'Sleep stage W': 0, 
             'Sleep stage 1': 1, 
             'Sleep stage 2': 2, 
             'Sleep stage 3': 3, 
             'Sleep stage 4': 3, 
             'Sleep stage R': 4}
  stage_map = {}
  for stage in stages:
    if stage in hashmap.keys():
      stage_map[stage] = hashmap[stage]
  return stage_map

In [None]:
files_psg = glob.glob(os.path.join(path_telemetry, "*PSG.edf"))
files_psg = np.array(sorted(files_psg))

channels = ["EEG Fpz-Cz", "EEG Pz-Oz", "EOG horizontal"]

In [None]:
def make_epochs_df(f_num):
  
  psg_fname = files_psg[f_num]

  with mne.use_log_level(verbose=False):
    raw = mne.io.read_raw_edf(psg_fname, stim_channel='Event marker', misc=['Temp rectal'], verbose=False)
    raw_df = raw.to_data_frame()
    annot = mne.read_annotations(glob.glob(path_telemetry + psg_fname[-16:-9] + '*' + 'Hypnogram.edf')[0])
    raw.set_annotations(annot, emit_warning=False, verbose=False)
    df = raw.annotations.to_data_frame()

    stage_map = make_stage_map(df.description.unique().tolist())

    last_w_idx = -1 if annot.to_data_frame().iloc[-1].description[-1] == 'W' else -2
    annot.crop(annot[1]['onset'] - 10 * 60, annot[last_w_idx]['onset'] + 10 * 60, verbose=False)
    raw.set_annotations(annot, emit_warning=False, verbose=False)

    events_train, _ = mne.events_from_annotations(raw, event_id=stage_map, chunk_duration=30, verbose=False)

    tmax = 30. - 1. / raw.info['sfreq']
    epochs_train = mne.Epochs(raw=raw, events=events_train, event_id=stage_map, tmin=0., tmax=tmax, baseline=None, verbose=False)
    epochs_df = epochs_train.to_data_frame().drop(columns=['Marker'])
    epochs_df['patient'] = psg_fname[-16:-8]

    epochs_df = epochs_df.reset_index().rename(columns={"index":"timestep"})

    labels = epochs_df[["patient", "timestep", "time", "condition"]]
    data = epochs_df[["patient", "timestep", "time", "EEG Fpz-Cz", "EEG Pz-Oz", "EOG horizontal", "EMG submental"]]

  return data, labels

In [None]:
def optimize_dtypes(df):
    ints = df.select_dtypes(include=['int64', 'int32']).columns.tolist()
    floats = df.select_dtypes(include=['float64', 'float32']).columns.tolist()
    df[ints] = df[ints].apply(pd.to_numeric, downcast='integer')
    df[floats] = df[floats].apply(pd.to_numeric, downcast='float')
    
    return df

In [None]:
df_full_data = pd.DataFrame()
df_full_labels = pd.DataFrame()
for i in range(len(files_psg)):
  df_data, df_labels = make_epochs_df(i)
  df_data = optimize_dtypes(df_data)
  
  df_full_data = pd.concat([df_full_data, df_data])
  df_full_labels = pd.concat([df_full_labels, df_labels])

In [None]:
df_full_data.to_csv("/content/drive/MyDrive/BU Projects/CS777/EEG_Project/telemetry/telemetry_data_full.csv", index=False)

In [None]:
df_full_labels.to_csv("/content/drive/MyDrive/BU Projects/CS777/EEG_Project/telemetry/telemetry_label_full.csv", index=False)

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
!gsutil -m cp -r /content/drive/MyDrive/BU\ Projects/CS777/EEG_Project/telemetry/telemetry_data_full.csv gs://bu-cs777-rj/term-project-sleep/data/

Copying file:///content/drive/MyDrive/BU Projects/CS777/EEG_Project/telemetry/telemetry_data_full.csv [Content-Type=text/csv]...
==> NOTE: You are uploading one or more large file(s), which would run
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

-
Operation completed over 1 objects/7.4 GiB.                                      


In [None]:
!gsutil -m cp -r /content/drive/MyDrive/BU\ Projects/CS777/EEG_Project/telemetry/telemetry_label_full.csv gs://bu-cs777-rj/term-project-sleep/data/

Copying file:///content/drive/MyDrive/BU Projects/CS777/EEG_Project/telemetry/telemetry_label_full.csv [Content-Type=text/csv]...
/ [0/1 files][    0.0 B/  4.2 GiB]   0% Done                                    ==> NOTE: You are uploading one or more large file(s), which would run
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

/ [1/1 files][  4.2 GiB/  4.2 GiB] 100% Done  83.3 MiB/s ETA 00:00:00           
Operation completed over 1 objec