# Clonamos el repositorio con los modelos y herramientas¶

In [1]:
!git clone https://github.com/dannasalazar11/Msc_thesis.git

Cloning into 'Msc_thesis'...
remote: Enumerating objects: 341, done.[K
remote: Counting objects: 100% (169/169), done.[K
remote: Compressing objects: 100% (143/143), done.[K
remote: Total 341 (delta 115), reused 50 (delta 26), pack-reused 172 (from 1)[K
Receiving objects: 100% (341/341), 49.94 MiB | 41.28 MiB/s, done.
Resolving deltas: 100% (206/206), done.


In [2]:
import sys
sys.path.append('/kaggle/working/Msc_thesis')

from gmrrnet_adhd.utils import get_segmented_data, train_L24O_cv
from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('mixed_float16')


import tensorflow as tf
import numpy as np
import random
import os

# Establecer semilla
seed = 42

# Semillas para módulos principales
np.random.seed(seed)
random.seed(seed)
tf.random.set_seed(seed)

2025-05-02 20:57:35.911944: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746219456.085594      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746219456.132964      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# Importar base de datos segmentada (Segmentos de 4 seg con translape del 50%, es decir, de 2 seg)

In [3]:
from gmrrnet_adhd.models.spatio_temporal import prepare_streams_4s

X, y, sbjs = get_segmented_data()
X.shape, y.shape, len(sbjs)

((8213, 19, 512), (8213, 2), 8213)

## Preprocesamiento de los datos mencionado por la propuesta

| Variable   | Forma resultante | Cálculo exacto                                                                                                                                                                                                                |
| ---------- | ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **`freq`** | `(N, 20, 1)`     | - PSD con `welch(signal, fs=128, nperseg=512)`.<br>- Potencia media en **20 bandas log‑espaciadas** entre 1 Hz y 64 Hz.<br>- Promedio sobre canales → vector de 20.<br>- Se añade un eje final de tamaño 1.                   |
| **`temp`** | `(N, 10, 1)`     | - Se recortan 510 muestras (de 512).<br>- Se dividen en **10 ventanas** consecutivas de 51 muestras (≈ 400 ms).<br>- **Media aritmética** dentro de cada ventana promediando canales.<br>- Se añade un eje final de tamaño 1. |
| **`spat`** | `(N, C, 1)`      | - Para cada canal: **RMS** del segmento `sqrt(mean(x**2))`.<br>- Se añade un eje final de tamaño 1.                                                                                                                           |

In [4]:
freq, temp, spat = prepare_streams_4s(X, fs=128)

freq.shape, temp.shape, spat.shape

((8213, 20, 1), (8213, 10, 1), (8213, 19, 1))

In [5]:
from sklearn.preprocessing import StandardScaler

scaler_f = StandardScaler().fit(freq.reshape(-1, 20))
scaler_t = StandardScaler().fit(temp.reshape(-1, 10))
scaler_s = StandardScaler().fit(spat.reshape(-1, spat.shape[1]))

freq = scaler_f.transform(freq.reshape(-1, 20)).reshape(freq.shape)
temp = scaler_t.transform(temp.reshape(-1, 10)).reshape(temp.shape)
spat = scaler_s.transform(spat.reshape(-1, spat.shape[1])).reshape(spat.shape)

# Importamos el modelo y definimos los hiperparámetros

In [6]:
from gmrrnet_adhd.models.spatio_temporal import build_eeg_attention_model
from tensorflow.keras.optimizers import Adam

model_name="spatio_temporal"

model_args =    {'freq_shape' : freq.shape[1:],   # (20,1)
                 'temp_shape' : temp.shape[1:],   # (10,1)
                 'spat_shape' : spat.shape[1:]}   # (19,1)

compile_args = {'optimizer':lambda: Adam(1e-4, clipnorm=1.0),
    "loss": "categorical_crossentropy",
    "metrics": ["accuracy"]
}

model = build_eeg_attention_model(
    **model_args
)

model.summary()

I0000 00:00:1746219562.014694      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1746219562.015308      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


# Resultados - Leave 24 Subjects Out

In [7]:
import os

import pickle

with open("/kaggle/input/ieee-tdah-control-database/folds.pkl", "rb") as f:
    folds = pickle.load(f)

In [8]:
# X_total is a list: [freq, temp, spat]
X_total = [freq, temp, spat]            # each array shape (N, …)

results = train_L24O_cv(build_eeg_attention_model, X_total, y, sbjs, model_args, compile_args, folds, model_name='spatio_temporal')

Fold 1/5. Test subjects: ['v28p', 'v274', 'v1p', 'v231', 'v22p', 'v29p', 'v206', 'v238', 'v31p', 'v35p', 'v177', 'v200', 'v112', 'v113', 'v48p', 'v140', 'v131', 'v125', 'v55p', 'v143', 'v43p', 'v305', 'v134', 'v114']


I0000 00:00:1746219595.757939      72 service.cc:148] XLA service 0x7f0618014690 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746219595.758463      72 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1746219595.758485      72 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1746219600.106381      72 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1746219617.191155      72 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 92ms/step
Fold metrics: {'accuracy': 0.7254632807137955, 'recall': 0.7219091259232407, 'precision': 0.7288098473374578, 'kappa': 0.44650322619859406, 'auc': 0.7219091259232406}
Fold 2/5. Test subjects: ['v18p', 'v39p', 'v234', 'v32p', 'v190', 'v6p', 'v254', 'v204', 'v24p', 'v183', 'v246', 'v219', 'v298', 'v41p', 'v47p', 'v308', 'v52p', 'v300', 'v59p', 'v299', 'v302', 'v51p', 'v109', 'v127']
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 55ms/step
Fold metrics: {'accuracy': 0.6300959232613909, 'recall': 0.5985167901338926, 'precision': 0.6429801257265659, 'kappa': 0.20806375159286639, 'auc': 0.5985167901338926}
Fold 3/5. Test subjects: ['v215', 'v3p', 'v209', 'v37p', 'v213', 'v15p', 'v284', 'v181', 'v19p', 'v34p', 'v263', 'v244', 'v138', 'v121', 'v46p', 'v54p', 'v120', 'v310', 'v147', 'v50p', 'v56p', 'v107', 'v297', 'v108']
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 56ms/step
Fold metri