In [1]:
import os
import scipy.io as sio
import numpy as np


In [2]:
from loguru import logger

In [3]:
def load_mat_file(path):
    try: 
        return sio.loadmat(path)
    except Exception as e:
        logger.error(f"Error loading mat file: {e}")
        raise e

In [None]:
def load_files(path):
    files = [f for f in os.listdir(path) if f.endswith('.mat')]
    subjects = []



In [4]:
one_file = load_mat_file(os.path.join('/Users/alemalvarez/code-workspace/TFG/DATA', 'AD_001.mat'))


In [12]:
one_file.keys()
# Everything seems ot be in the 'data' key

dict_keys(['__header__', '__version__', '__globals__', 'data'])

In [19]:
one_file_data = one_file['data']
# Let's explore the structure of the data
print("Type of one_file_data:", type(one_file_data))
print("Shape of one_file_data:", one_file_data.shape)

# If it's a structured array, let's see the field names
if hasattr(one_file_data.dtype, 'names') and one_file_data.dtype.names is not None:
    print("\nField names:", one_file_data.dtype.names)
    
    # Display a sample of each field
    print("\nSample values for each field:")
    for field in one_file_data.dtype.names:
        print(f"\n{field}:")
        field_data = one_file_data[field][0]
        
        # Handle different types of data
        if isinstance(field_data, np.ndarray):
            print(f"  Shape: {field_data.shape}")
            if field_data.size > 0:
                if field_data.dtype.kind in ['U', 'S']:  # String data
                    print("  Sample (first few items):", field_data.flatten()[:5])
                else:
                    print("  Sample (first few items):", field_data.flatten()[:5])
        else:
            print("  Value:", field_data)


Type of one_file_data: <class 'numpy.ndarray'>
Shape of one_file_data: (1, 1)

Field names: ('signal', 'cfg')

Sample values for each field:

signal:
  Shape: (1,)
  Sample (first few items): [array([[[-1.01718020e-18, -8.49316897e-19, -4.52297914e-19, ...,
          -3.17103060e-18, -2.30442605e-19, -6.12338491e-19],
         [ 2.39003668e-13,  1.65808151e-12,  1.50401367e-12, ...,
          -9.59281942e-12,  1.73457152e-12, -3.34729577e-12],
         [ 5.11605867e-13,  3.09761234e-12,  2.99660186e-12, ...,
          -1.81818484e-11,  3.53864492e-12, -6.52254114e-12],
         ...,
         [ 4.85277000e-12,  1.92164971e-11, -3.47822490e-12, ...,
          -8.09977470e-11,  3.41700653e-11,  2.55781296e-11],
         [ 6.19115201e-12,  1.94772372e-11, -3.57473223e-12, ...,
          -7.54479289e-11,  3.06850603e-11,  2.47484107e-11],
         [ 7.29729071e-12,  1.98390423e-11, -3.67402241e-12, ...,
          -6.89384111e-11,  2.68191232e-11,  2.39426153e-11]],

        [[ 8.19220654e-1

In [27]:
# This is a structured array with fields:
# signal
# cfg
# Let's explore the structure of the signal field
signal_data = one_file_data['signal']
print("Type of signal_data:", type(signal_data))
print("Shape of signal_data:", signal_data.shape)

# Based on the output we saw earlier, it seems to be a nested array structure
# Let's try to understand its dimensions and contents
if signal_data.size > 0:
    print("\nFirst element type:", type(signal_data[0, 0]))
    print("First element shape:", signal_data[0, 0].shape)
    
    # Let's see if there are any other fields in the structure
    if hasattr(signal_data.dtype, 'names') and signal_data.dtype.names is not None:
        print("\nSignal field names:", signal_data.dtype.names)
    
    # Let's check the dimensions of the actual EEG data
    if isinstance(signal_data[0, 0], np.ndarray):
        print("\nDimensions of the first signal array:")
        print(f"  Number of dimensions: {signal_data[0, 0].ndim}")
        print(f"  Shape: {signal_data[0, 0].shape}")
        
        if signal_data[0, 0].ndim == 3:
            print(f" {signal_data[0, 0].shape[0]} samples, {signal_data[0, 0].shape[1]} time points, {signal_data[0, 0].shape[2]} features/electrodes")



Type of signal_data: <class 'numpy.ndarray'>
Shape of signal_data: (1, 1)

First element type: <class 'numpy.ndarray'>
First element shape: (57, 5000, 68)

Dimensions of the first signal array:
  Number of dimensions: 3
  Shape: (57, 5000, 68)
 57 samples, 5000 time points, 68 features/electrodes


In [31]:
# Data array is understood. Now let's explore the cfg array.
cfg_data = one_file_data['cfg']
print("Type of cfg_data:", type(cfg_data))
print("Shape of cfg_data:", cfg_data.shape)

# Let's see if there are any other fields in the structure
if hasattr(cfg_data.dtype, 'names') and cfg_data.dtype.names is not None:
    print("\nCfg field names:", cfg_data.dtype.names)
cfg_data


Type of cfg_data: <class 'numpy.ndarray'>
Shape of cfg_data: (1, 1)


array([[array([[(array([[1000]], dtype=uint16), array([[(array(['BandPass'], dtype='<U8'), array([[ 1, 70]], dtype=uint8), array([[3000]], dtype=uint16)),
                        (array(['BandStop'], dtype='<U8'), array([[49, 51]], dtype=uint8), array([[5000]], dtype=uint16))]],
                      dtype=[('type', 'O'), ('band', 'O'), ('order', 'O')]), array([[(array([[5]], dtype=uint8), array([[1],
                               [1],
                               [1],
                               [1],
                               [1],
                               [1],
                               [0],
                               [2],
                               [2],
                               [2],
                               [2],
                               [2],
                               [2],
                               [2],
                               [2],
                               [2],
                               [2],
                   

In [32]:
def parse_cfg(cfg_data):
    # Get the first element since it's nested in arrays
    cfg = cfg_data[0, 0][0]
    
    # Create a dictionary to store the parsed configuration
    parsed_cfg = {
        'sampling_rate': cfg['fs'][0][0][0],  # Get the sampling frequency
        'filters': []
    }
    
    # Parse the filtering information
    filter_info = cfg['filtering'][0]
    for filt in filter_info[0]:
        parsed_cfg['filters'].append({
            'type': filt['type'][0],
            'band': filt['band'][0].tolist(),
            'order': filt['order'][0][0]
        })
    
    return parsed_cfg

# Usage:
parsed = parse_cfg(cfg_data)
print(parsed)

{'sampling_rate': np.uint16(1000), 'filters': [{'type': np.str_('BandPass'), 'band': [1, 70], 'order': np.uint16(3000)}, {'type': np.str_('BandStop'), 'band': [49, 51], 'order': np.uint16(5000)}]}


In [43]:
def explore_array(arr, prefix=''):
    """
    Recursively explore a nested array/structure
    """
    if isinstance(arr, np.ndarray):
        print(f"{prefix}Shape: {arr.shape}, Type: {arr.dtype}")
        
        # If it's a structured array, explore its fields
        if arr.dtype.fields is not None:
            print(f"{prefix}Fields: {arr.dtype.names}")
            for field in arr.dtype.names:
                print(f"\n{prefix}=== Field: {field} ===")
                explore_array(arr[field], prefix + '  ')
        
        # If it's a regular array and not empty, show first element
        elif arr.size > 0:
            print(f"{prefix}First element: {arr.flat[0]}")
            
            # Recursively explore first element if it's also an array
            if isinstance(arr.flat[0], np.ndarray):
                explore_array(arr.flat[0], prefix + '  ')

# Let's explore the cfg_data
print("Exploring cfg_data structure:")
explore_array(cfg_data)

Exploring cfg_data structure:
Shape: (1, 1), Type: object
First element: [[(array([[1000]], dtype=uint16), array([[(array(['BandPass'], dtype='<U8'), array([[ 1, 70]], dtype=uint8), array([[3000]], dtype=uint16)),
          (array(['BandStop'], dtype='<U8'), array([[49, 51]], dtype=uint8), array([[5000]], dtype=uint16))]],
        dtype=[('type', 'O'), ('band', 'O'), ('order', 'O')]), array([[(array([[5]], dtype=uint8), array([[1],
                 [1],
                 [1],
                 [1],
                 [1],
                 [1],
                 [0],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
                 [2],
              

In [41]:
cfg_data[0,0][0]['head_model']

array([array(['3-layer ICMB152: brain, skull, scalp'], dtype='<U36')],
      dtype=object)

In [44]:
# Important ones might be trial_length_secs, head_model, source_orientation, atlas, N_discarded_ICA, filtering, fs, 
# lets put them in a dictionary 
def extract_important_params(cfg_data):
    # Get the first element where actual data starts
    cfg = cfg_data[0,0][0]
    
    params = {
        'fs': int(cfg['fs'][0][0][0]),  # Sampling rate
        
        # Filtering info
        'filtering': [
            {
                'type': f['type'][0],
                'band': f['band'][0].tolist(),
                'order': int(f['order'][0][0])
            }
            for f in cfg['filtering'][0][0]
        ],
        
        # Trial length in seconds
        'trial_length_secs': float(cfg['trial_length_secs'][0][0][0]),
        
        # Head model info
        'head_model': str(cfg['head_model'][0][0]),

        
        # Source orientation
        'source_orientation': str(cfg['source_orientation'][0][0][0][0]),
        
        # Atlas information
        'atlas': str(cfg['ROIs'][0][0]['Atlas'][0][0][0]),
        
        # Number of discarded ICA components
        'N_discarded_ICA': int(cfg['N_discarded_ICA'][0][0][0])
    }
    
    return params

# Use the function
important_params = extract_important_params(cfg_data)

# Print in a readable format
import json
print(json.dumps(important_params, indent=2))



{
  "fs": 1000,
  "filtering": [
    {
      "type": "BandPass",
      "band": [
        1,
        70
      ],
      "order": 3000
    },
    {
      "type": "BandStop",
      "band": [
        49,
        51
      ],
      "order": 5000
    }
  ],
  "trial_length_secs": 5.0,
  "head_model": "3-layer ICMB152: brain, skull, scalp",
  "source_orientation": "N",
  "atlas": "D",
  "N_discarded_ICA": 0
}


In [46]:
one_sample = {
    'file_name': 'AD_001.mat',
    'params': important_params,
    'signal': one_file_data['signal'][0,0],
}

one_sample



{'file_name': 'AD_001.mat',
 'params': {'fs': 1000,
  'filtering': [{'type': np.str_('BandPass'), 'band': [1, 70], 'order': 3000},
   {'type': np.str_('BandStop'), 'band': [49, 51], 'order': 5000}],
  'trial_length_secs': 5.0,
  'head_model': '3-layer ICMB152: brain, skull, scalp',
  'source_orientation': 'N',
  'atlas': 'D',
  'N_discarded_ICA': 0},
 'signal': array([[[-1.01718020e-18, -8.49316897e-19, -4.52297914e-19, ...,
          -3.17103060e-18, -2.30442605e-19, -6.12338491e-19],
         [ 2.39003668e-13,  1.65808151e-12,  1.50401367e-12, ...,
          -9.59281942e-12,  1.73457152e-12, -3.34729577e-12],
         [ 5.11605867e-13,  3.09761234e-12,  2.99660186e-12, ...,
          -1.81818484e-11,  3.53864492e-12, -6.52254114e-12],
         ...,
         [ 4.85277000e-12,  1.92164971e-11, -3.47822490e-12, ...,
          -8.09977470e-11,  3.41700653e-11,  2.55781296e-11],
         [ 6.19115201e-12,  1.94772372e-11, -3.57473223e-12, ...,
          -7.54479289e-11,  3.06850603e-11,  