In [1]:
import mat73
import os
import numpy as np
import glob

# Define channels to extract and their names
channels_to_extract = [37, 30, 47, 12, 49, 14, 51]
channel_names = {'37': 'Fz', '30': 'Pz', '47': 'Cz', '12': 'C3', '49': 'C4', '14': 'T7', '51': 'T8'}

# Define the base folder containing subject folders
base_folder = r"C:\Users\Aryan\Documents\MATLAB\Kato_et_al - Copy\Kato_et_al\ProcData\EEG"

# Initialize an empty list to store data for all subjects
all_data = []

# Iterate through each subject folder (excluding hidden files)
for subject_folder in glob.glob(os.path.join(base_folder, "*")):
    if subject_folder.startswith('.'):
        continue  # Skip hidden files
    subject_path = subject_folder

    # Check if "erp_v1.mat" exists before loading
    data_file = os.path.join(subject_path, "erp_v1.mat")
    if not os.path.isfile(data_file):
        print(f"erp_v1.mat not found in {subject_path}")
        continue

    # Load the MATLAB data for the subject
    data_dict = mat73.loadmat(data_file)

    # Extract relevant data for each trial
    for trial_no in range(data_dict['erp'].shape[2]):
        smell_id = data_dict['log'][trial_no, 1]
        trial_data = {}
        trial_data['Subject'] = os.path.basename(subject_path)  # Use subject folder name as subject ID
        trial_data['Smell_ID'] = smell_id
        trial_data['Trial_No'] = trial_no

        # Extract data for desired channels and store as NumPy arrays
        channel_data = data_dict['erp'][channels_to_extract, :, trial_no]
        for i, channel_index in enumerate(channels_to_extract):
            trial_data[channel_names[str(channel_index)]] = channel_data[i, :]

        all_data.append(trial_data)

# Convert the list of dictionaries to a dictionary of NumPy arrays
data_arrays = {}
for key in all_data[0].keys():
    data_arrays[key] = np.array([trial[key] for trial in all_data])

# Save the data arrays to a .npz file
np.savez("eeg_data.npz", **data_arrays)


erp_v1.mat not found in C:\Users\Aryan\Documents\MATLAB\Kato_et_al - Copy\Kato_et_al\ProcData\EEG\processed_csv.csv


In [2]:
import numpy as np

# Load the .npz file
data = np.load("eeg_data.npz")

# Access the data arrays
subject_ids = data['Subject']
smell_ids = data['Smell_ID']
trial_numbers = data['Trial_No']
channel_data = {channel: data[channel] for channel in data.files if channel not in ['Subject', 'Smell_ID', 'Trial_No']}

# Print some information
print("Subject IDs:", subject_ids)
print("Smell IDs:", smell_ids)
print("Trial Numbers:", trial_numbers)
print("Channel Data:")
for channel, values in channel_data.items():
    print(f"{channel}: {values}")


Subject IDs: ['KM26_s01' 'KM26_s01' 'KM26_s01' ... 'KM32_s17' 'KM32_s17' 'KM32_s17']
Smell IDs: [ 6. 21. 22. ...  6.  7. 21.]
Trial Numbers: [  0   1   2 ... 291 292 293]
Channel Data:
Fz: [[ -3.56386176  -3.66912336  -3.75025153 ... -14.10950881 -13.97500514
  -13.84228872]
 [ -1.57613316  -0.8783137    0.09223287 ...   5.21065636   5.23893697
    5.26426615]
 [  4.80512885   5.06062827   5.31397297 ...   4.36332127   4.34509158
    4.43269142]
 ...
 [ -4.37325446  -3.86890168  -3.21744967 ... -11.6675289  -11.414944
  -11.31764076]
 [ -2.9649201   -2.9136782   -2.91147216 ...   6.15359672   5.5206109
    4.97618254]
 [  5.35352616   5.12333014   4.79357221 ...  -6.28264792  -6.20795287
   -6.17643739]]
Pz: [[-1.11447194 -0.91301939 -0.69446394 ...  2.67664397  3.17556622
   3.57881299]
 [-4.03986665 -3.95000004 -3.87667886 ... -7.4686358  -7.55743531
  -7.62469126]
 [ 1.06809218  1.22664554  1.39637115 ...  0.88507438  1.19849056
   1.3622879 ]
 ...
 [12.43266023 12.85124657 13.44459