# A. Importing Libraries

In [23]:
from scipy.io import loadmat
import mne
import os
import glob
import h5py

import numpy as np

# B. Importing Data

## 1. Data File Structure

- `data/`
  - `openendedloosely_cleaned/`
    - `Data_Design_Sub_x.mat`
  - `openendedloosely_raw/`
    - `Participant_x/`
      - `filename.vhdr` — header (metadata)
      - `filename.eeg` — EEG signal (binary samples)
      - `filename.vmrk` — event markers (timestamps)


## 2. Loading Raw Data

In [18]:
mne_data_path = os.path.abspath("../../data/openendedloosely_raw/")
vhdr_files = glob.glob(os.path.join(mne_data_path, "Participant_*", "*.vhdr"))

eeg_raw_data = {}

for vhdr in vhdr_files:
    participant_name = os.path.basename(os.path.dirname(vhdr))  
    print(f"Loading {participant_name}: {vhdr}")

    raw = mne.io.read_raw_brainvision(vhdr, preload=True)
    eeg_raw_data[participant_name] = raw

# MNE Raw objects
print("\nLoaded EEG datasets:", list(eeg_raw_data.keys()))
print("Total loaded:", len(eeg_raw_data))

Loading Participant_1: c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_raw\Participant_1\Feb_07(1)_2014.vhdr
Extracting parameters from c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_raw\Participant_1\Feb_07(1)_2014.vhdr...
Setting channel info structure...
Reading 0 ... 2612146  =      0.000 ...  5224.292 secs...
Loading Participant_10: c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_raw\Participant_10\april_2(1).vhdr
Extracting parameters from c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_raw\Participant_10\april_2(1).vhdr...
Setting channel info structure...
Reading 0 ... 1562175  =      0.000 ...  3124.350 secs...
Loading Participant_15: c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_raw\Participant_15\april_16(1).vhdr
Extracting parameters from c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_raw\Participant_15\april_16(1).vhdr...
Setting channel i

## 3. Loading Cleaned Data

### Inspect MATLAB Version

In [14]:
def inspect_signature(path):
    with open(path, 'rb') as f:
        sig = f.read(128)
    return sig[:4], sig[:20]

for f in mat_files:
    filename = os.path.basename(f)   
    print(f"\nFile: {filename}")
    print(inspect_signature(f))



File: Data_Design_Sub_1.mat
(b'MATL', b'MATLAB 5.0 MAT-file,')

File: Data_Design_Sub_10.mat
(b'MATL', b'MATLAB 7.3 MAT-file,')

File: Data_Design_Sub_15.mat
(b'MATL', b'MATLAB 7.3 MAT-file,')

File: Data_Design_Sub_20.mat
(b'MATL', b'MATLAB 7.3 MAT-file,')

File: Data_Design_Sub_25.mat
(b'MATL', b'MATLAB 7.3 MAT-file,')

File: Data_Design_Sub_5.mat
(b'MATL', b'MATLAB 7.3 MAT-file,')


### Import data

In [19]:
cleaned_path = os.path.abspath("../../data/openendedloosely_cleaned/")
mat_files = glob.glob(os.path.join(cleaned_path, "Data_Design_Sub_*.mat"))

eeg_cleaned_data = {}

def is_v73(path):
    # There is two types of MATLAB version in data v5.0 and v7.3, each needs a special loader
    with open(path, "rb") as f:
        sig = f.read(20).decode(errors="ignore")
    return "MATLAB 7.3" in sig

for mat_file in mat_files:
    key = os.path.splitext(os.path.basename(mat_file))[0]
    print(f"Loading {key}: {mat_file}")

    if is_v73(mat_file):
        eeg_cleaned_data[key] = h5py.File(mat_file, "r")
    else:
        eeg_cleaned_data[key] = loadmat(mat_file)

# Python dictionary for v5.0
# HDF5 file object for v7.3
print("\nLoaded MAT datasets:", list(eeg_cleaned_data.keys()))
print("Total:", len(eeg_cleaned_data))

Loading Data_Design_Sub_1: c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_cleaned\Data_Design_Sub_1.mat
Loading Data_Design_Sub_10: c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_cleaned\Data_Design_Sub_10.mat
Loading Data_Design_Sub_15: c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_cleaned\Data_Design_Sub_15.mat
Loading Data_Design_Sub_20: c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_cleaned\Data_Design_Sub_20.mat
Loading Data_Design_Sub_25: c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_cleaned\Data_Design_Sub_25.mat
Loading Data_Design_Sub_5: c:\Users\Aryo\PersonalMade\Programming\GAN\repo\data\openendedloosely_cleaned\Data_Design_Sub_5.mat

Loaded MAT datasets: ['Data_Design_Sub_1', 'Data_Design_Sub_10', 'Data_Design_Sub_15', 'Data_Design_Sub_20', 'Data_Design_Sub_25', 'Data_Design_Sub_5']
Total: 6


# C. Exploratory Data Analysis

## 1. Raw Data Description

### Raw EEG

In [20]:
def inspect_raw_eeg(raw):
    print("=== EEG RAW INFO ===")
    print(raw)
    print("\n=== CHANNELS ===")
    print(raw.info['ch_names'][:10], "...")  

    data, times = raw.get_data(return_times=True)
    print("\n=== DATA SHAPE (channels x samples) ===")
    print(data.shape)

    print("\n=== SAMPLE PREVIEW (first channel, first 10 samples) ===")
    print(data[0, :10])

inspect_raw_eeg(eeg_raw_data["Participant_1"])

=== EEG RAW INFO ===
<RawBrainVision | Feb_07(1)_2014.eeg, 63 x 2612147 (5224.3 s), ~1.23 GiB, data loaded>

=== CHANNELS ===
['Fp1', 'Fz', 'F3', 'F7', 'FT9', 'FC5', 'FC1', 'C3', 'T7', 'TP9'] ...

=== DATA SHAPE (channels x samples) ===
(63, 2612147)

=== SAMPLE PREVIEW (first channel, first 10 samples) ===
[-0.00796543 -0.00796101 -0.00796801 -0.00797028 -0.00797124 -0.00797133
 -0.00797287 -0.00797358 -0.00796958 -0.00796489]


### Cleaned EEG

In [None]:
def inspect_mat_data(mat):
    print("\n=== MAT CONTENTS ===")

    # Case 1: SciPy dict
    if isinstance(mat, dict):
        keys = [k for k in mat.keys() if not k.startswith("__")]
        print("Keys:", keys)

        for k in keys:
            v = mat[k]
            print(f"\n-- {k} --")
            if isinstance(v, np.ndarray):
                print("shape:", v.shape)
                print("preview:", v.flat[:10])
            else:
                print("type:", type(v))

    # Case 2: h5py file
    elif isinstance(mat, h5py.File):
        keys = list(mat.keys())
        print("Keys:", keys)

        for k in keys:
            v = mat[k]
            print(f"\n-- {k} --")
            if isinstance(v, h5py.Dataset):
                print("shape:", v.shape)
                preview = v[0:10] if v.size > 10 else v[:]
                print("preview:", preview)
            elif isinstance(v, h5py.Group):
                print("Group with subkeys:", list(v.keys()))

inspect_mat_data(eeg_cleaned_data["Data_Design_Sub_1"])
inspect_mat_data(eeg_cleaned_data["Data_Design_Sub_10"])

=== MAT CONTENTS ===
Keys: ['Design_1_1_IE', 'Design_1_1_IG', 'Design_1_1_PU', 'Design_1_1_RIE', 'Design_1_1_RIG', 'Design_1_2_IE', 'Design_1_2_IG', 'Design_1_2_PU', 'Design_1_2_RIE', 'Design_1_2_RIG', 'Design_1_3_IE', 'Design_1_3_IG', 'Design_1_3_PU', 'Design_1_3_RIE', 'Design_1_3_RIG', 'Design_1_4_IE', 'Design_1_4_IG', 'Design_1_4_PU', 'Design_1_4_RIE', 'Design_1_4_RIG', 'Design_1_5_IE', 'Design_1_5_IG', 'Design_1_5_PU', 'Design_1_5_RIE', 'Design_1_5_RIG', 'Design_1_6_IE', 'Design_1_6_IG', 'Design_1_6_PU', 'Design_1_6_RIE', 'Design_1_6_RIG', 'Design_1_RST1', 'Design_1_RST2']

-- Design_1_1_IE --
shape: (63, 18000)
preview: [9.02585853e-06 1.60095187e-05 1.89526859e-05 1.58640779e-05
 1.19633963e-05 1.06680568e-05 1.42827485e-05 2.01476532e-05
 2.50142040e-05 2.66213462e-05]

-- Design_1_1_IG --
shape: (63, 41500)
preview: [-5.91825580e-07 -2.38090595e-06 -3.32150893e-06 -3.28518556e-06
 -3.50044570e-06 -4.19427790e-06 -5.18841580e-06 -5.44808762e-06
 -4.49535263e-06 -2.59368025e-06]


## 2. Raw and Cleaned EEG 