# After saving the valid file names in a list called valid_time_files and the corresponding metadata in metadata_time_filtered.csv, in the file saving_metadata.ipynb, this code will be run to convert .mat files to .set files

This code preprocesses the mat files like Christian and then converts each of the files into a .set file, saving them to a folder called "preprocessed_set_files". Only done for the subjects that are valid ie which metadata is in the csv file metadata_time_filtered

In [None]:
import numpy as np
import pandas as pd
import scipy.io
import mne
import warnings
import shutil
from IPython.display import clear_output
import os
import mne

In [None]:
mat_dir = "E:\\ChristianMusaeus\\Data\\eAsMat_1968-14999"
output_dir = "G:\ChristianMusaeus\\Preprocessed_setfiles"  # Change this to your target drive/folder
valid_csv = "metadata_time_filtered.csv"
os.makedirs(output_dir, exist_ok=True)

# SUPPRESS VERBOSE OUTPUT
mne.set_log_level('ERROR')
warnings.filterwarnings('ignore')

# processing function
def preprocess_and_export_set(mat_path, output_path):
    mat = scipy.io.loadmat(mat_path, struct_as_record=False, squeeze_me=True)
    eeg = mat['EEG']
    if isinstance(eeg, np.ndarray):
        eeg = eeg.item()

    data = eeg.data  # shape: (n_channels, n_samples)
    sfreq = eeg.sr

    if data.ndim != 2:
        raise ValueError(f"Unexpected data shape: {data.shape}")

    n_channels, n_samples = data.shape
    ch_names = [f"Ch{i+1}" for i in range(n_channels)]
    info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types="eeg")
    raw = mne.io.RawArray(data * 1e-6, info)  # Convert µV to V

    # Step 1: Resample to 200 Hz
    raw = raw.resample(200)

    # Step 2: Band-pass filter from 1–70 Hz
    raw = raw.filter(l_freq=1., h_freq=70.)

    # Step 3: Notch filter at 50 Hz
    raw = raw.notch_filter(freqs=50)

    # Step 4: Reduce to first 19 channels
    if len(raw.ch_names) > 19:
        raw.pick_channels(raw.ch_names[:19])

    # Step 5: Create 1-second fixed-length epochs
    events = mne.make_fixed_length_events(raw, duration=1.0)
    epochs = mne.Epochs(raw, events, tmin=0.0, tmax=1.0,
                        baseline=None, preload=True, reject_by_annotation=True)
    
    # Step 6: Export to .set (EEGLAB)
    epochs.export(output_path, fmt='eeglab')

# Main loop 
valid_df = pd.read_csv(valid_csv)
total_files = len(valid_df)
processed_count = 0

for i, row in enumerate(valid_df.itertuples(index=False), 1):
    subject_id = row.subject_id
    mat_path = os.path.join(mat_dir, f"{subject_id}.mat")
    output_path = os.path.join(output_dir, f"{subject_id}_epoched.set")

    clear_output(wait=True)
    print(f"[{i}/{total_files}] Processing subject {subject_id}...")

    try: # we had some issues with the computer not having enough space for all the files.
        # therefore we continuously checked if there was enough space on the drive, and if not, the conversion would stop
        # luckily there was enough space on the drive for all 5778 files
        # Check remaining free space
        free_space_bytes = shutil.disk_usage(output_dir).free
        if free_space_bytes < 100 * 1024 * 1024:  # Require at least 100 MB
            print(" Drive is full or nearly full. Stopping processing.")
            break

        preprocess_and_export_set(mat_path, output_path)
        processed_count += 1
        print(f" Saved ({processed_count} processed so far)")

    except Exception as e:
        print(f" Error: {e}")

print(f"\n Total files successfully processed: {processed_count}")


### Sanity check: printing the number of epochs in each file. Should be between 600 (10 minutes) and 3600 (60 minutes)

In [None]:
set_dir = "G:\ChristianMusaeus\Preprocessed_setfiles"  # or your actual directory
set_files = [f for f in os.listdir(set_dir) if f.endswith('.set')]

# Loop through files and print epoch counts
for i, file in enumerate(set_files, 1):
    file_path = os.path.join(set_dir, file)
    try:
        epochs = mne.read_epochs_eeglab(file_path, verbose=False)
        print(f"[{i}/{len(set_files)}] {file} — Epochs: {len(epochs)}")
    except Exception as e:
        print(f"[{i}/{len(set_files)}] {file} — Error: {e}")
