
# EEG Dataset with ML Notebook Example


## Downloads 

In [None]:
import os
import requests
import zipfile
import io

dataset_dir = "ds005555"

def find_sub1(base):
    # check common extraction locations
    candidates = [
        os.path.join(base, 'sub-1'),
        os.path.join(base, 'ds005555-main', 'sub-1'),
        os.path.join(base, 'ds005555', 'sub-1')
    ]
    for p in candidates:
        if os.path.isdir(p) and any(os.scandir(p)):
            return p
    return None

# If sub-1 already exists, skip download
existing = find_sub1(dataset_dir)
if existing:
    print(f"Found existing dataset at: {existing}")
    # print a few files to verify
    for i, entry in enumerate(sorted(os.listdir(existing))[:20], start=1):
        print(entry)
else:
    print('Dataset not found locally — downloading...')
    url = "https://github.com/OpenNeuroDatasets/ds005555/archive/refs/heads/main.zip"
    r = requests.get(url, stream=True)
    r.raise_for_status()
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall(dataset_dir)
    print("Extracted files to ./ds005555/")
    # try locating sub-1 after extraction
    extracted = find_sub1(dataset_dir)
    if extracted:
        print(f"Located sub-1 at: {extracted}")
        for entry in sorted(os.listdir(extracted))[:20]:
            print(entry)
    else:
        print('Warning: sub-1 not found after extraction. Check repo contents.')

Found existing dataset at: ds005555\ds005555-main\sub-1
eeg
sub-1_scans.tsv


## Example of opening a dataset

In [None]:
%pip install pandas
import pandas as pd
import json

path = os.path.join('ds005555', 'ds005555-main', 'sub-1', 'eeg')
data = pd.read_csv(os.path.join(path, 'sub-1_task-Sleep_acq-psg_events.tsv'), sep = '\t')
print(data)

with open(os.path.join(path, 'sub-1_task-Sleep_acq-psg_events.json')) as json_data:
    d = json.load(json_data)
    json_data.close()
    print('\n', d)

     onset  duration  begsample  endsample  offset  stage_hum  stage_ai
0        0        30          1       7680       0          3         3
1       30        30       7681      15360       0          3         3
2       60        30      15361      23040       0          3         3
3       90        30      23041      30720       0          3         3
4      120        30      30721      38400       0          3         3
..     ...       ...        ...        ...     ...        ...       ...
910  27300        30    6988801    6996480       0          0         0
911  27330        30    6996481    7004160       0          0         0
912  27360        30    7004161    7011840       0          0         0
913  27390        30    7011841    7019520       0          0         0
914  27420        30    7019521    7027200       0          0         0

[915 rows x 7 columns]

 {'onset': {'Description': 'Start of epoch in seconds', 'HED': '(Label/onset, ID/#)'}, 'duration': {'Descriptio

## Making the ML Model and training it

In [None]:
%pip install tensorflow
import tensorflow as tf

Collecting tensorflowNote: you may need to restart the kernel to use updated packages.

  Downloading tensorflow-2.20.0-cp313-cp313-win_amd64.whl.metadata (4.6 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collect