In [1]:
import os
import scipy.io as sio
import numpy as np
from loguru import logger

In [2]:
def load_mat_file(path):
    try: 
        return sio.loadmat(path)
    except Exception as e:
        logger.error(f"Error loading mat file: {e}")
        raise e

In [4]:
one_hurh_file = load_mat_file(os.path.join('/Users/alemalvarez/code-workspace/TFG/DATA/BBDDs/HURH', 'ADMIL_092.mat'))
one_poctep_file = load_mat_file(os.path.join('/Users/alemalvarez/code-workspace/TFG/DATA/BBDDs/POCTEP', 'ADMIL_001_ES.mat'))
one_classical_file = load_mat_file(os.path.join('/Users/alemalvarez/code-workspace/TFG/DATA', 'AD_001.mat'))


In [6]:
print("HURH file keys:", one_hurh_file.keys())
print("POCTEP file keys:", one_poctep_file.keys())
print("Classical file keys:", one_classical_file.keys())

# Literally the same


HURH file keys: dict_keys(['__header__', '__version__', '__globals__', 'data'])
POCTEP file keys: dict_keys(['__header__', '__version__', '__globals__', 'data'])
Classical file keys: dict_keys(['__header__', '__version__', '__globals__', 'data'])


In [10]:
def print_data_info(data: np.ndarray, name: str) -> None:
    print(f"{name} data shape:", data.shape)
    
    if hasattr(data.dtype, 'names') and data.dtype.names is not None:
        print(f"\n{name} Field names:", data.dtype.names)
        
        print(f"\n{name} Sample values for each field:")
        for field in data.dtype.names:
            print(f"\n{field}:")
            field_data = data[field][0]
            
            if isinstance(field_data, np.ndarray):
                print(f"  Shape: {field_data.shape}")
                if field_data.size > 0:
                    if field_data.dtype.kind in ['U', 'S']:  # String data
                        print("  Sample (first few items):", field_data.flatten()[:1])
                    else:
                        print("  Sample (first few items):", field_data.flatten()[:1])
            else:
                print("  Value:", field_data)

# Extract data from files
data_dict = {
    'HURH': one_hurh_file['data'],
    'POCTEP': one_poctep_file['data'],
    'Classical': one_classical_file['data']
}

# Process each dataset
for name, data in data_dict.items():
    print_data_info(data, name)


HURH data shape: (1, 1)

HURH Field names: ('signal', 'cfg')

HURH Sample values for each field:

signal:
  Shape: (1,)
  Sample (first few items): [array([[ 1.25710435e-03,  3.15745855e-03, -6.66594234e-04, ...,
         -5.64261754e-04,  4.07264638e-04, -2.72627505e-03],
        [ 8.25721709e-04,  4.52803869e-03, -7.59822101e-04, ...,
         -5.57389364e-05, -2.80797851e-03, -3.99958724e-03],
        [ 5.36669940e-04,  2.32617620e-03, -2.02806301e-03, ...,
         -2.71926699e-03, -3.06211447e-03, -1.77855989e-03],
        ...,
        [ 2.06814143e-03, -6.17739656e-04, -2.06111172e-04, ...,
         -7.25497528e-04, -2.41203134e-03, -4.80048450e-04],
        [ 2.20475610e-03,  1.73705623e-03, -3.36707820e-04, ...,
         -1.32982550e-03, -2.43750675e-03, -2.07842807e-03],
        [-6.80697962e-04,  2.33029584e-04, -1.26930935e-04, ...,
         -1.16184431e-03,  9.03567162e-04, -5.83043413e-04]],
       shape=(51000, 68))                                       ]

cfg:
  Shape: (

In [12]:
signal_data_dict = {
    'HURH': data_dict['HURH']['signal'],
    'POCTEP': data_dict['POCTEP']['signal'],
    'Classical': data_dict['Classical']['signal']
}
# Process each signal dataset
for name, signal_data in signal_data_dict.items():
    print(f"\nAnalyzing {name} signal data:")
    print("Type of signal_data:", type(signal_data))
    print("Shape of signal_data:", signal_data.shape)

    # Check if we have data to analyze
    if signal_data.size > 0:
        print("\nFirst element type:", type(signal_data[0, 0]))
        print("First element shape:", signal_data[0, 0].shape)
        
        # Check for structured array fields
        if hasattr(signal_data.dtype, 'names') and signal_data.dtype.names is not None:
            print("\nSignal field names:", signal_data.dtype.names)
        
        # Analyze the actual EEG data structure
        if isinstance(signal_data[0, 0], np.ndarray):
            print("\nDimensions of the first signal array:")
            print(f"  Number of dimensions: {signal_data[0, 0].ndim}")
            print(f"  Shape: {signal_data[0, 0].shape}")
            
            if signal_data[0, 0].ndim == 3:
                print(f"  {signal_data[0, 0].shape[0]} samples, {signal_data[0, 0].shape[1]} time points, {signal_data[0, 0].shape[2]} features/electrodes")

# It's important to notice that the classical data was already separated by samples.
# In POCTEP and HURH, the data is not separated by samples, so we need to do it manually.


Analyzing HURH signal data:
Type of signal_data: <class 'numpy.ndarray'>
Shape of signal_data: (1, 1)

First element type: <class 'numpy.ndarray'>
First element shape: (51000, 68)

Dimensions of the first signal array:
  Number of dimensions: 2
  Shape: (51000, 68)

Analyzing POCTEP signal data:
Type of signal_data: <class 'numpy.ndarray'>
Shape of signal_data: (1, 1)

First element type: <class 'numpy.ndarray'>
First element shape: (132500, 68)

Dimensions of the first signal array:
  Number of dimensions: 2
  Shape: (132500, 68)

Analyzing Classical signal data:
Type of signal_data: <class 'numpy.ndarray'>
Shape of signal_data: (1, 1)

First element type: <class 'numpy.ndarray'>
First element shape: (57, 5000, 68)

Dimensions of the first signal array:
  Number of dimensions: 3
  Shape: (57, 5000, 68)
  57 samples, 5000 time points, 68 features/electrodes


In [15]:
# Create a dictionary for cfg data from all files
cfg_dict = {
    'HURH': one_hurh_file['data']['cfg'][0][0],
    'POCTEP': one_poctep_file['data']['cfg'][0][0],
    'Classical': one_classical_file['data']['cfg'][0][0]
}

# Process each cfg dataset
for name, cfg_data in cfg_dict.items():
    print(f"\nAnalyzing {name} cfg data:")
    print("Type of cfg_data:", type(cfg_data))
    print("Shape of cfg_data:", cfg_data.shape)
    
    # Let's see if there are any other fields in the structure
    if hasattr(cfg_data.dtype, 'names') and cfg_data.dtype.names is not None:
        print("Cfg field names:", cfg_data.dtype.names)

# God damn it, classical data was more flattened than the other ones........



Analyzing HURH cfg data:
Type of cfg_data: <class 'numpy.ndarray'>
Shape of cfg_data: (1, 1)
Cfg field names: ('filtering', 'fs', 'N_discarded_ICA', 'artifacts', 'channels', 'comments')

Analyzing POCTEP cfg data:
Type of cfg_data: <class 'numpy.ndarray'>
Shape of cfg_data: (1, 1)
Cfg field names: ('filtering', 'fs', 'N_discarded_ICA', 'artifacts', 'channels', 'comments')

Analyzing Classical cfg data:
Type of cfg_data: <class 'numpy.ndarray'>
Shape of cfg_data: (1, 1)
Cfg field names: ('fs', 'filtering', 'artifacts', 'N_discarded_ICA', 'ROIs', 'head_model', 'source_orientation', 'source_method', 'trial_length_secs')


In [34]:
cfg_dict['POCTEP']['artifacts'][0][0][0][0]

np.void((array([[0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0]], dtype=uint8), array([[5]], dtype=uint8), array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
 

In [25]:
cfg_dict['POCTEP']['artifacts'][0][0][0][0]['trial_length_secs'] # wtf dude

array([[5]], dtype=uint8)

In [32]:
one_poctep_file['data']['cfg'][0][0]['artifacts'][0][0][0][0]['trial_length_secs']

array([[5]], dtype=uint8)

In [84]:
filters = one_hurh_file['data']['cfg'][0][0]['filtering'][0][0][0]
for filter in filters:
    print(filter['type'])

['Band-pass']
['Notch']


In [91]:
def extract_important_params_bd(cfg_data):
    # Get the first element where actual data starts
    cfg = cfg_data[0][0]
    
    params = {
        'fs': int(cfg['fs'][0][0][0]),  # Sampling rate
        
        # Filtering info
        'filtering': [
            {
                'type': f['type'][0][0],
                'band': f['band'][0].tolist(),
                'order': int(f['order'][0][0])
            }
            for f in cfg['filtering'][0][0][0]
        ],
        
        # Trial length in seconds
        'trial_length_secs': float(cfg['artifacts'][0][0][0][0]['trial_length_secs'][0]),

        
        # Number of discarded ICA components
        'N_discarded_ICA': int(cfg['N_discarded_ICA'][0][0][0])
    }
    
    return params

important_params = extract_important_params_bd(one_hurh_file['data']['cfg'])

  'fs': int(cfg['fs'][0][0][0]),  # Sampling rate
  'trial_length_secs': float(cfg['artifacts'][0][0][0][0]['trial_length_secs'][0]),
  'N_discarded_ICA': int(cfg['N_discarded_ICA'][0][0][0])


In [92]:
important_params

{'fs': 200,
 'filtering': [{'type': 'B', 'band': [0.5, 70.0], 'order': 2000},
  {'type': 'N', 'band': [49.8, 50.2], 'order': 2000}],
 'trial_length_secs': 5.0,
 'N_discarded_ICA': 4}

In [63]:
one_hurh_file['data']['signal'][0, 0]


array([[ 1.25710435e-03,  3.15745855e-03, -6.66594234e-04, ...,
        -5.64261754e-04,  4.07264638e-04, -2.72627505e-03],
       [ 8.25721709e-04,  4.52803869e-03, -7.59822101e-04, ...,
        -5.57389364e-05, -2.80797851e-03, -3.99958724e-03],
       [ 5.36669940e-04,  2.32617620e-03, -2.02806301e-03, ...,
        -2.71926699e-03, -3.06211447e-03, -1.77855989e-03],
       ...,
       [ 2.06814143e-03, -6.17739656e-04, -2.06111172e-04, ...,
        -7.25497528e-04, -2.41203134e-03, -4.80048450e-04],
       [ 2.20475610e-03,  1.73705623e-03, -3.36707820e-04, ...,
        -1.32982550e-03, -2.43750675e-03, -2.07842807e-03],
       [-6.80697962e-04,  2.33029584e-04, -1.26930935e-04, ...,
        -1.16184431e-03,  9.03567162e-04, -5.83043413e-04]],
      shape=(51000, 68))

In [66]:
def flatten_data(data: np.ndarray, cfg: dict) -> np.ndarray:
    """Reshape the data into segments of specified length.
    
    Args:
        data: Input data array of shape (n_total_samples, n_channels)
        cfg: Configuration dict containing 'trial_length_secs' and 'fs'
        
    Returns:
        Reshaped array of shape (n_segments, n_samples_per_segment, n_channels)
    """
    data = data[0, 0]  # Extract actual data from nested structure
    
    n_samples_per_segment = int(cfg['trial_length_secs'] * cfg['fs'])
    n_channels = data.shape[1]  # Should be 68
    
    # Calculate number of complete segments
    n_total_samples = data.shape[0]
    n_segments = n_total_samples // n_samples_per_segment
    
    # Reshape into segments, truncating any incomplete segment
    return data[:n_segments * n_samples_per_segment].reshape(n_segments, n_samples_per_segment, n_channels)

flatten_data(one_poctep_file['data']['signal'], important_params)



array([[[ 1.08830140e-03,  2.13312808e-04,  2.89282062e-05, ...,
         -2.63363905e-03, -1.74410612e-03, -1.43025114e-03],
        [ 1.00836757e-03, -7.72744575e-05, -2.78727105e-04, ...,
         -2.73792743e-03, -1.97156149e-03, -1.31883443e-03],
        [ 1.05093507e-03, -1.90292666e-04, -5.81450056e-04, ...,
         -3.05801294e-03, -2.23454650e-03, -1.43334988e-03],
        ...,
        [ 1.16311728e-03,  2.26909965e-03, -1.86276751e-04, ...,
         -1.38308078e-03, -4.04432920e-04, -1.15383507e-03],
        [ 1.26011592e-03,  2.29392844e-03,  1.08459286e-04, ...,
         -8.01951730e-04, -1.64547558e-04, -1.02884729e-03],
        [ 1.30695774e-03,  2.34079149e-03,  5.07238462e-04, ...,
         -6.52676668e-05, -4.73710800e-05, -8.10253493e-04]],

       [[ 1.26741291e-03,  2.43265690e-03,  8.86017092e-04, ...,
          3.72468146e-04, -1.10556533e-05, -6.50733759e-04],
        [ 1.14806058e-03,  2.56160959e-03,  1.14153869e-03, ...,
          2.35598759e-04,  3.49951383e