# Modelos Varios

En este notebook están los modelos:

+ CNN (Convolutional Neural Network)
+ Transformer
+ TCN (Temporal Convolutional Network)
+ GRU (Gated Recurrent Unit)
+ Wavenet
+ Tanmet
+ Attention-Only

In [1]:
# Install required packages
%pip install --upgrade pip
%pip install polars numpy scikit-learn matplotlib joblib openpyxl fastexcel tensorflow tensorflow.keras

# For TensorFlow on Mac, you need to install tensorflow-macos
%pip install tensorflow-macos tensorflow-metal

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
[31mERROR: Could not find a version that satisfies the requirement tensorflow-macos (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for tensorflow-macos[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
# %%
import polars as pl
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Dense, Dropout, Input, Concatenate, BatchNormalization,
    Conv1D, MaxPooling1D, LayerNormalization, MultiHeadAttention,
    Add, GlobalAveragePooling1D, GRU, Activation, SimpleRNN, Bidirectional
)
import matplotlib.pyplot as plt
import os
from joblib import Parallel, delayed
from datetime import timedelta
import openpyxl

# Configuración de Matplotlib para evitar errores con Tkinter
import matplotlib
matplotlib.use('TkAgg')

2025-03-18 02:15:56.135851: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Constantes

In [3]:
# Definición de la ruta del proyecto
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
SUBJECTS_RELATIVE_PATH = "data/Subjects"
SUBJECTS_PATH = os.path.join(PROJECT_ROOT, SUBJECTS_RELATIVE_PATH)

# Crear directorios para resultados
FIGURES_DIR = os.path.join(PROJECT_ROOT, "figures", "various_models")
os.makedirs(FIGURES_DIR, exist_ok=True)
MODELS_DIR = os.path.join(PROJECT_ROOT, "models")
os.makedirs(MODELS_DIR, exist_ok=True)

subject_files = [f for f in os.listdir(SUBJECTS_PATH) if f.startswith("Subject") and f.endswith(".xlsx")]
print(f"Total sujetos: {len(subject_files)}")

Total sujetos: 54


## Preprocesamiento y Procesamiento de Datos

In [4]:
def get_cgm_window(bolus_time, cgm_df: pl.DataFrame, window_hours: int = 2) -> np.ndarray:
    """
    Obtiene la ventana de datos CGM para un tiempo de bolo específico.

    Parámetros:
    -----------
    bolus_time : datetime
        Tiempo del bolo de insulina
    cgm_df : pl.DataFrame
        DataFrame con datos CGM
    window_hours : int, opcional
        Horas de la ventana de datos (default: 2)

    Retorna:
    --------
    np.ndarray
        Ventana de datos CGM o None si no hay suficientes datos
    """
    window_start = bolus_time - timedelta(hours=window_hours)
    window = cgm_df.filter(
        (pl.col("date") >= window_start) & (pl.col("date") <= bolus_time)
    ).sort("date").tail(24)
    
    if window.height < 24:
        return None
    return window.get_column("mg/dl").to_numpy()

def calculate_iob(bolus_time, basal_df: pl.DataFrame, half_life_hours: float = 4.0) -> float:
    """
    Calcula la insulina activa en el cuerpo (IOB).

    Parámetros:
    -----------
    bolus_time : datetime
        Tiempo del bolo de insulina
    basal_df : pl.DataFrame
        DataFrame con datos de insulina basal
    half_life_hours : float, opcional
        Vida media de la insulina en horas (default: 4.0)

    Retorna:
    --------
    float
        Cantidad de insulina activa
    """
    if basal_df is None or basal_df.is_empty():
        return 0.0
    
    iob = 0.0
    for row in basal_df.iter_rows(named=True):
        start_time = row["date"]
        duration_hours = row["duration"] / (1000 * 3600)
        end_time = start_time + timedelta(hours=duration_hours)
        rate = row["rate"] if row["rate"] is not None else 0.9
        
        if start_time <= bolus_time <= end_time:
            time_since_start = (bolus_time - start_time).total_seconds() / 3600
            remaining = rate * (1 - (time_since_start / half_life_hours))
            iob += max(0.0, remaining)
    return iob

def process_subject(subject_path: str, idx: int) -> list:
    """
    Procesa los datos de un sujeto.

    Parámetros:
    -----------
    subject_path : str
        Ruta al archivo del sujeto
    idx : int
        Índice del sujeto

    Retorna:
    --------
    list
        Lista de diccionarios con características procesadas
    """
    print(f"Procesando {os.path.basename(subject_path)} ({idx+1}/{len(subject_files)})...")
    
    try:
        cgm_df = pl.read_excel(subject_path, sheet_name="CGM")
        bolus_df = pl.read_excel(subject_path, sheet_name="Bolus")
        try:
            basal_df = pl.read_excel(subject_path, sheet_name="Basal")
        except Exception:
            basal_df = None
    except Exception as e:
        print(f"Error al cargar {os.path.basename(subject_path)}: {e}")
        return []

    # Conversión de fechas
    cgm_df = cgm_df.with_columns(pl.col("date").cast(pl.Datetime))
    bolus_df = bolus_df.with_columns(pl.col("date").cast(pl.Datetime))
    if basal_df is not None:
        basal_df = basal_df.with_columns(pl.col("date").cast(pl.Datetime))
    
    cgm_df = cgm_df.sort("date")

    processed_data = []
    for row in bolus_df.iter_rows(named=True):
        bolus_time = row["date"]
        cgm_window = get_cgm_window(bolus_time, cgm_df)
        
        if cgm_window is not None:
            iob = calculate_iob(bolus_time, basal_df)
            hour_of_day = bolus_time.hour / 23.0
            bg_input = row["bgInput"] if row["bgInput"] is not None else cgm_window[-1]
            normal = row["normal"] if row["normal"] is not None else 0.0
            
            # Cálculo del factor de sensibilidad personalizado
            isf_custom = 50.0
            if normal > 0 and bg_input > 100:
                isf_custom = (bg_input - 100) / normal
            
            features = {
                'subject_id': idx,
                'cgm_window': cgm_window,
                'carbInput': row["carbInput"] if row["carbInput"] is not None else 0.0,
                'bgInput': bg_input,
                'insulinCarbRatio': row["insulinCarbRatio"] if row["insulinCarbRatio"] is not None else 10.0,
                'insulinSensitivityFactor': isf_custom,
                'insulinOnBoard': iob,
                'hour_of_day': hour_of_day,
                'normal': normal
            }
            processed_data.append(features)
    
    return processed_data

# Ejecución en paralelo
all_processed_data = Parallel(n_jobs=-1)(
    delayed(process_subject)(
        os.path.join(SUBJECTS_PATH, f), 
        idx
    ) for idx, f in enumerate(subject_files)
)

all_processed_data = [item for sublist in all_processed_data for item in sublist]

# Conversión a DataFrame
df_processed = pl.DataFrame(all_processed_data)
print("Muestra de datos procesados combinados:")
print(df_processed.head())
print(f"Total muestras: {len(df_processed)}")

Procesando Subject17.xlsx (3/54)...
Procesando Subject37.xlsx (2/54)...
Procesando Subject21.xlsx (1/54)...
Procesando Subject7.xlsx (6/54)...
Procesando Subject40.xlsx (4/54)...
Procesando Subject6.xlsx (5/54)...
Procesando Subject41.xlsx (7/54)...
Procesando Subject16.xlsx (8/54)...
Procesando Subject36.xlsx (9/54)...
Procesando Subject20.xlsx (10/54)...
Procesando Subject11.xlsx (11/54)...
Procesando Subject46.xlsx (12/54)...
Procesando Subject50.xlsx (13/54)...
Procesando Subject27.xlsx (14/54)...
Procesando Subject31.xlsx (15/54)...
Procesando Subject30.xlsx (16/54)...
Procesando Subject26.xlsx (17/54)...
Procesando Subject1.xlsx (18/54)...
Procesando Subject51.xlsx (19/54)...
Procesando Subject47.xlsx (20/54)...
Procesando Subject10.xlsx (21/54)...
Procesando Subject29.xlsx (22/54)...
Procesando Subject2.xlsx (23/54)...
Procesando Subject52.xlsx (24/54)...
Procesando Subject44.xlsx (25/54)...
Procesando Subject13.xlsx (26/54)...


Could not determine dtype for column 5, falling back to string


Procesando Subject33.xlsx (27/54)...
Procesando Subject25.xlsx (28/54)...
Procesando Subject48.xlsx (29/54)...
Procesando Subject49.xlsx (30/54)...
Procesando Subject24.xlsx (31/54)...
Procesando Subject32.xlsx (32/54)...
Procesando Subject12.xlsx (33/54)...
Procesando Subject45.xlsx (34/54)...
Procesando Subject53.xlsx (35/54)...
Procesando Subject3.xlsx (36/54)...
Procesando Subject28.xlsx (37/54)...
Procesando Subject35.xlsx (38/54)...
Procesando Subject23.xlsx (39/54)...
Procesando Subject8.xlsx (40/54)...
Procesando Subject19.xlsx (41/54)...
Procesando Subject39.xlsx (42/54)...
Procesando Subject4.xlsx (43/54)...
Procesando Subject54.xlsx (44/54)...
Procesando Subject42.xlsx (45/54)...
Procesando Subject15.xlsx (46/54)...
Procesando Subject14.xlsx (47/54)...
Procesando Subject43.xlsx (48/54)...
Procesando Subject5.xlsx (49/54)...
Procesando Subject38.xlsx (50/54)...
Procesando Subject18.xlsx (51/54)...
Procesando Subject9.xlsx (52/54)...
Procesando Subject22.xlsx (53/54)...
Proces

### División de Ventana CGM y Valores Nulos

In [5]:
# Dividir ventana CGM y otras características
cgm_columns = [f'cgm_{i}' for i in range(24)]
df_cgm = pl.DataFrame({
    col: [row['cgm_window'][i] for row in all_processed_data]
    for i, col in enumerate(cgm_columns)
}, schema={col: pl.Float64 for col in cgm_columns})

# Combinar con otras características
df_processed = pl.concat([
    df_cgm,
    df_processed.drop('cgm_window')
], how="horizontal")

# Verificar valores nulos
print("Verificación de valores nulos en df_processed:")
print(df_processed.null_count())
df_processed = df_processed.drop_nulls()

Verificación de valores nulos en df_processed:
shape: (1, 32)
┌───────┬───────┬───────┬───────┬───┬──────────────────────┬────────────────┬─────────────┬────────┐
│ cgm_0 ┆ cgm_1 ┆ cgm_2 ┆ cgm_3 ┆ … ┆ insulinSensitivityFa ┆ insulinOnBoard ┆ hour_of_day ┆ normal │
│ ---   ┆ ---   ┆ ---   ┆ ---   ┆   ┆ ctor                 ┆ ---            ┆ ---         ┆ ---    │
│ u32   ┆ u32   ┆ u32   ┆ u32   ┆   ┆ ---                  ┆ u32            ┆ u32         ┆ u32    │
│       ┆       ┆       ┆       ┆   ┆ u32                  ┆                ┆             ┆        │
╞═══════╪═══════╪═══════╪═══════╪═══╪══════════════════════╪════════════════╪═════════════╪════════╡
│ 0     ┆ 0     ┆ 0     ┆ 0     ┆ … ┆ 0                    ┆ 0              ┆ 0           ┆ 0      │
└───────┴───────┴───────┴───────┴───┴──────────────────────┴────────────────┴─────────────┴────────┘


### Normalización de Datos

In [6]:
# Normalizar características
scaler_cgm = MinMaxScaler(feature_range=(0, 1))
scaler_other = StandardScaler()

# Normalizar CGM
X_cgm = scaler_cgm.fit_transform(df_processed.select(cgm_columns).to_numpy())
X_cgm = X_cgm.reshape(X_cgm.shape[0], X_cgm.shape[1], 1)

# Normalizar otras características (incluyendo hour_of_day)
other_features = ['carbInput', 'bgInput', 'insulinOnBoard', 'insulinCarbRatio', 
                  'insulinSensitivityFactor', 'subject_id', 'hour_of_day']
X_other = scaler_other.fit_transform(df_processed.select(other_features).to_numpy())

# Etiquetas
y = df_processed.get_column('normal').to_numpy()

# Verificar NaN
print("NaN en X_cgm:", np.isnan(X_cgm).sum())
print("NaN en X_other:", np.isnan(X_other).sum())
print("NaN en y:", np.isnan(y).sum())
if np.isnan(X_cgm).sum() > 0 or np.isnan(X_other).sum() > 0 or np.isnan(y).sum() > 0:
    raise ValueError("Valores NaN detectados en X_cgm, X_other o y")

NaN en X_cgm: 0
NaN en X_other: 0
NaN en y: 0


### División por Sujeto de los Datos

In [7]:
# División por sujeto
subject_ids = df_processed.get_column('subject_id').unique().to_numpy()
train_subjects, temp_subjects = train_test_split(subject_ids, test_size=0.2, random_state=42)
val_subjects, test_subjects = train_test_split(temp_subjects, test_size=0.5, random_state=42)

### Creación de Máscaras

In [8]:
# Crear máscaras
train_mask = df_processed.get_column('subject_id').is_in(train_subjects).to_numpy()
val_mask = df_processed.get_column('subject_id').is_in(val_subjects).to_numpy()
test_mask = df_processed.get_column('subject_id').is_in(test_subjects).to_numpy()

X_cgm_train, X_cgm_val, X_cgm_test = X_cgm[train_mask], X_cgm[val_mask], X_cgm[test_mask]
X_other_train, X_other_val, X_other_test = X_other[train_mask], X_other[val_mask], X_other[test_mask]
y_train, y_val, y_test = y[train_mask], y[val_mask], y[test_mask]
subject_test = df_processed.filter(pl.col('subject_id').is_in(test_subjects)).get_column('subject_id').to_numpy()

print(f"Entrenamiento CGM: {X_cgm_train.shape}, Validación CGM: {X_cgm_val.shape}, Prueba CGM: {X_cgm_test.shape}")
print(f"Entrenamiento Otros: {X_other_train.shape}, Validación Otros: {X_other_val.shape}, Prueba Otros: {X_other_test.shape}")
print(f"Sujetos de prueba: {test_subjects}")

Entrenamiento CGM: (33272, 24, 1), Validación CGM: (2743, 24, 1), Prueba CGM: (8636, 24, 1)
Entrenamiento Otros: (33272, 7), Validación Otros: (2743, 7), Prueba Otros: (8636, 7)
Sujetos de prueba: [ 5 19 32 13 48 49]


## Modelos

### Constantes

In [9]:
TCN_CONFIG = {
    'filters': 64,
    'kernel_size': 3,
    'dilations': [2**i for i in range(4)],
    'dropout_rate': [0.3, 0.2],
    'epsilon': 1e-6
}

TRANSFORMER_CONFIG = {
    'num_heads': 4,
    'key_dim': 32,
    'ff_dim': 128,
    'dropout_rate': 0.2,
    'epsilon': 1e-6
}

WAVENET_CONFIG = {
    'filters': [32, 64, 128],
    'kernel_size': 2,
    'dilations': [2**i for i in range(8)],  # [1, 2, 4, 8, 16, 32, 64, 128]
    'dropout_rate': 0.2
}

TABNET_CONFIG = {
    'feature_dim': 64,
    'output_dim': 32,
    'num_decision_steps': 5,
    'relaxation_factor': 1.5,
    'sparsity_coefficient': 1e-5,
    'batch_momentum': 0.98
}

ATTENTION_CONFIG = {
    'num_heads': 8,
    'key_dim': 64,
    'num_layers': 4,
    'ff_dim': 256,
    'dropout_rate': 0.1
}

GRU_CONFIG = {
    'hidden_units': [128, 64],
    'dropout_rate': 0.2
}

CNN_CONFIG = {
    'filters': [32, 64, 128, 256],
    'kernel_size': 3,
    'pool_size': 2,
    'dropout_rate': 0.2
}

RNN_CONFIG = {
    'hidden_units': [128, 64, 32],
    'dropout_rate': 0.3,
    'recurrent_dropout': 0.2,
    'bidirectional': True,
    'epsilon': 1e-6
}

### Attention-Only Model

In [10]:
def create_attention_block(x: tf.Tensor, num_heads: int, key_dim: int, ff_dim: int, dropout_rate: float) -> tf.Tensor:
    """
    Crea un bloque de atención con feed-forward network.

    Parámetros:
    -----------
    x : tf.Tensor
        Tensor de entrada
    num_heads : int
        Número de cabezas de atención
    key_dim : int
        Dimensión de la clave
    ff_dim : int
        Dimensión de la red feed-forward
    dropout_rate : float
        Tasa de dropout
    
    Retorna:
    --------
    tf.Tensor
        Tensor de salida del bloque de atención
    """
    # Multi-head attention
    attention_output = MultiHeadAttention(
        num_heads=num_heads,
        key_dim=key_dim
    )(x, x)
    attention_output = Dropout(dropout_rate)(attention_output)
    x = LayerNormalization(epsilon=1e-6)(x + attention_output)
    
    # Feed-forward network
    ffn = Dense(ff_dim, activation='relu')(x)
    ffn = Dense(x.shape[-1])(ffn)
    ffn = Dropout(dropout_rate)(ffn)
    
    return LayerNormalization(epsilon=1e-6)(x + ffn)

def create_attention_model(cgm_shape: tuple, other_features_shape: tuple) -> Model:
    """
    Crea un modelo basado únicamente en mecanismos de atención.

    Parámetros:
    -----------
    cgm_shape : tuple
        Forma de los datos CGM (samples, timesteps, features)
    other_features_shape : tuple
        Forma de otras características (samples, features)

    Retorna:
    --------
    Model
        Modelo de atención compilado
    """
    cgm_input = Input(shape=cgm_shape[1:])
    other_input = Input(shape=(other_features_shape[1],))
    
    x = cgm_input
    
    # Stack attention blocks
    for _ in range(ATTENTION_CONFIG['num_layers']):
        x = create_attention_block(
            x,
            ATTENTION_CONFIG['num_heads'],
            ATTENTION_CONFIG['key_dim'],
            ATTENTION_CONFIG['ff_dim'],
            ATTENTION_CONFIG['dropout_rate']
        )
    
    x = GlobalAveragePooling1D()(x)
    x = Concatenate()([x, other_input])
    
    x = Dense(128, activation='relu')(x)
    x = Dropout(ATTENTION_CONFIG['dropout_rate'])(x)
    
    output = Dense(1)(x)
    
    return Model(inputs=[cgm_input, other_input], outputs=output)

### Convolutional Neural Network (CNN)

In [11]:
def create_cnn_model(cgm_shape: tuple, other_features_shape: tuple) -> Model:
    """
    Crea un modelo CNN (Convolutional Neural Network) con entrada dual para datos CGM y otras características.
    
    Parámetros:
    -----------
    cgm_shape : tuple
        Forma de los datos CGM (samples, timesteps, features)
    other_features_shape : tuple
        Forma de otras características (samples, features)
        
    Retorna:
    --------
    Model
        Modelo CNN compilado
    """
    # Entrada CGM
    cgm_input = Input(shape=cgm_shape[1:], name='cgm_input')
    
    # Capas CNN
    conv = Conv1D(filters=CNN_CONFIG['filters'][1], kernel_size=CNN_CONFIG['kernel_size'], activation='relu')(cgm_input)
    conv = BatchNormalization()(conv)
    conv = MaxPooling1D(pool_size=2)(conv)
    
    conv = Conv1D(filters=CNN_CONFIG['filters'][0], kernel_size=CNN_CONFIG['kernel_size'], activation='relu')(conv)
    conv = BatchNormalization()(conv)
    conv = GlobalAveragePooling1D()(conv)
    
    # Entrada de otras características
    other_input = Input(shape=(other_features_shape[1],), name='other_input')
    
    # Combinar características
    combined = Concatenate()([conv, other_input])
    
    # Capas densas
    dense = Dense(64, activation='relu')(combined)
    dense = BatchNormalization()(dense)
    dense = Dropout(CNN_CONFIG['dropout_rate'])(dense)
    
    output = Dense(1, activation='linear')(dense)
    
    return Model(inputs=[cgm_input, other_input], outputs=output)

### Gated Recurrent Unit (GRU)

In [12]:
def create_gru_model(cgm_shape: tuple, other_features_shape: tuple) -> Model:
    '''
    Crea un modelo GRU (Gated Recurrent Unit) con entrada dual para datos CGM y otras características.

    Parámetros:
    -----------
    cgm_shape : tuple
        Forma de los datos CGM (samples, timesteps, features)
    other_features_shape : tuple
        Forma de otras características (samples, features)

    Retorna:
    --------
    Model
        Modelo GRU compilado
    '''
    cgm_input = Input(shape=cgm_shape[1:])
    other_input = Input(shape=(other_features_shape[1],))
    
    x = GRU(GRU_CONFIG['hidden_units'][0], return_sequences=True)(cgm_input)
    x = GRU(GRU_CONFIG['hidden_units'][1])(x)
    x = BatchNormalization()(x)
    
    x = Concatenate()([x, other_input])
    x = Dense(GRU_CONFIG['hidden_units'][1], activation='relu')(x)
    x = Dropout(GRU_CONFIG['dropout_rate'])(x)
    output = Dense(1)(x)
    
    return Model(inputs=[cgm_input, other_input], outputs=output)

### Recurrent Neural Network (RNN)

In [13]:
def create_rnn_model(cgm_shape: tuple, other_features_shape: tuple) -> Model:
    """
    Crea un modelo RNN con capas bidireccionales y skip connections.
    
    Parámetros:
    -----------
    cgm_shape : tuple
        Forma de los datos CGM (samples, timesteps, features)
    other_features_shape : tuple
        Forma de otras características (samples, features)
        
    Retorna:
    --------
    Model
        Modelo RNN compilado
    """
    # Entradas
    cgm_input = Input(shape=cgm_shape[1:])
    other_input = Input(shape=(other_features_shape[1],))
    
    # Capas RNN
    x = cgm_input
    skip_connections = []
    
    for units in RNN_CONFIG['hidden_units']:
        rnn_layer = SimpleRNN(
            units,
            dropout=RNN_CONFIG['dropout_rate'],
            recurrent_dropout=RNN_CONFIG['recurrent_dropout'],
            return_sequences=True
        )
        
        if RNN_CONFIG['bidirectional']:
            x = Bidirectional(rnn_layer)(x)
        else:
            x = rnn_layer(x)
            
        x = BatchNormalization(epsilon=RNN_CONFIG['epsilon'])(x)
        skip_connections.append(x)
    
    # Último RNN sin return_sequences
    final_rnn = SimpleRNN(
        RNN_CONFIG['hidden_units'][-1],
        dropout=RNN_CONFIG['dropout_rate'],
        recurrent_dropout=RNN_CONFIG['recurrent_dropout']
    )
    
    if RNN_CONFIG['bidirectional']:
        x = Bidirectional(final_rnn)(x)
    else:
        x = final_rnn(x)
    
    # Combinar con otras características
    x = Concatenate()([x, other_input])
    
    # Capas densas finales
    x = Dense(64, activation='relu')(x)
    x = BatchNormalization(epsilon=RNN_CONFIG['epsilon'])(x)
    x = Dropout(RNN_CONFIG['dropout_rate'])(x)
    
    output = Dense(1)(x)
    
    return Model(inputs=[cgm_input, other_input], outputs=output)

### TabNet

In [35]:
class GLU(tf.keras.layers.Layer):
    """
    Gated Linear Unit como capa personalizada.
    """
    def __init__(self, units, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.dense = Dense(units * 2)

    def call(self, inputs):
        x = self.dense(inputs)
        return x[:, :self.units] * tf.nn.sigmoid(x[:, self.units:])

class FeatureTransformer(tf.keras.layers.Layer):
    """
    Transformador de características como capa personalizada.
    """
    def __init__(self, feature_dim, batch_momentum=0.98, **kwargs):
        super().__init__(**kwargs)
        self.glu = GLU(feature_dim)
        self.bn = BatchNormalization(momentum=batch_momentum)

    def call(self, inputs):
        x = self.glu(inputs)
        return self.bn(x)

def custom_softmax(x: tf.Tensor, axis: int=-1) -> tf.Tensor:
    """
    Implementación de softmax con estabilidad numérica.

    Parámetros:
    -----------
    x : tf.Tensor
        Tensor de entrada
    axis : int
        Eje de normalización
    
    Retorna:
    --------
    tf.Tensor
        Tensor normal
    """
    exp_x = tf.exp(x - tf.reduce_max(x, axis=axis, keepdims=True))
    return exp_x / tf.reduce_sum(exp_x, axis=axis, keepdims=True)

def glu(x: tf.Tensor, n_units: int) -> tf.Tensor:
    """
    Gated Linear Unit.
    
    Parámetros:
    -----------
    x : tf.Tensor
        Tensor de entrada
    n_units : int
        Número de unidades

    Retorna:
    --------
    tf.Tensor
        Tensor GLU
    """
    return x[:, :n_units] * tf.nn.sigmoid(x[:, n_units:])

def feature_transformer(x: tf.Tensor, feature_dim: int, batch_momentum: float=0.98) -> tf.Tensor:
    """
    Transformador de características.

    Parámetros:
    -----------
    x : tf.Tensor
        Tensor de entrada
    feature_dim : int
        Dimensión de las características
    batch_momentum : float
        Momento de la normalización por lotes
    
    Retorna:
    --------
    tf.Tensor
        Tensor transform
    """
    transform = Dense(feature_dim * 2)(x)
    transform = glu(transform, feature_dim)
    return BatchNormalization(momentum=batch_momentum)(transform)

def create_tabnet_model(cgm_shape: tuple, other_features_shape: tuple) -> Model:
    """
    Crea un modelo TabNet modificado para procesamiento de datos tabulares.
    
    Parámetros:
    -----------
    cgm_shape : tuple
        Forma de los datos CGM
    other_features_shape : tuple
        Forma de otras características
        
    Retorna:
    --------
    Model
        Modelo TabNet compilado
    """
    cgm_input = Input(shape=cgm_shape[1:])
    other_input = Input(shape=(other_features_shape[1],))
    
    cgm_flat = tf.keras.layers.Flatten()(cgm_input)
    x = Concatenate()([cgm_flat, other_input])
    
    for _ in range(TABNET_CONFIG['num_decision_steps']):
        transformer = FeatureTransformer(
            TABNET_CONFIG['feature_dim'],
            TABNET_CONFIG['batch_momentum']
        )
        x = transformer(x)
        
        mask = Dense(x.shape[-1], activation='softmax')(x)
        x = tf.keras.layers.Multiply()([x, mask])
    
    # Final layers
    x = Dense(TABNET_CONFIG['output_dim'], activation='relu')(x)
    x = BatchNormalization()(x)
    output = Dense(1)(x)
    
    return Model(inputs=[cgm_input, other_input], outputs=output)

### Temporal Convolutional Network (TCN)

In [64]:
class CausalPadding(tf.keras.layers.Layer):
    """
    Capa personalizada para padding causal.
    """
    def __init__(self, padding_size, **kwargs):
        super().__init__(**kwargs)
        self.padding_size = padding_size

    def call(self, inputs):
        return tf.pad(inputs, [[0, 0], [self.padding_size, 0], [0, 0]])

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1] + self.padding_size, input_shape[2])

def create_tcn_block(input_layer: tf.Tensor, filters: int, kernel_size: int, 
                    dilation_rate: int, dropout_rate: float) -> tf.Tensor:
    """
    Crea un bloque TCN (Temporal Convolutional Network).
    
    Parámetros:
    -----------
    input_layer : tf.Tensor
        Capa de entrada
    filters : int
        Número de filtros
    kernel_size : int
        Tamaño del kernel
    dilation_rate : int
        Tasa de dilatación
    dropout_rate : float
        Tasa de dropout
    
    Retorna:
    --------
    tf.Tensor
        Salida del bloque TCN
    """
    # Padding causal para mantener causalidad temporal
    padding_size = (kernel_size - 1) * dilation_rate
    padded_input = CausalPadding(padding_size)(input_layer)
    
    # Convolución dilatada
    conv = Conv1D(
        filters=filters,
        kernel_size=kernel_size,
        dilation_rate=dilation_rate,
        padding='valid',
        activation='relu'
    )(padded_input)
    
    # Normalización y regularización
    conv = LayerNormalization(epsilon=TCN_CONFIG['epsilon'])(conv)
    conv = Dropout(dropout_rate)(conv)
    
    # Conexión residual si las dimensiones coinciden
    if input_layer.shape[-1] == filters:
        cropped_input = input_layer[:, -conv.shape[1]:, :]
        return Add()([conv, cropped_input])
    return conv

def create_tcn_model(input_shape: tuple, other_features_shape: tuple) -> Model:
    """
    Crea un modelo TCN completo.
    
    Parámetros:
    -----------
    input_shape : tuple
        Forma de los datos CGM
    other_features_shape : tuple
        Forma de otras características
    
    Retorna:
    --------
    Model
        Modelo TCN compilado
    """
    # Entradas
    cgm_input = Input(shape=input_shape[1:], name='cgm_input')
    other_input = Input(shape=(other_features_shape[1],), name='other_input')
    
    # Bloques TCN
    x = cgm_input
    skip_connections = []
    
    for dilation_rate in TCN_CONFIG['dilations']:
        tcn_out = create_tcn_block(
            x,
            filters=TCN_CONFIG['filters'],
            kernel_size=TCN_CONFIG['kernel_size'],
            dilation_rate=dilation_rate,
            dropout_rate=TCN_CONFIG['dropout_rate'][0]  # Using first dropout rate for TCN blocks
        )
        skip_connections.append(tcn_out)
        x = tcn_out
    
    # Combinar skip connections
    if skip_connections:
        target_len = skip_connections[-1].shape[1]
        aligned_skips = [
            skip[:, -target_len:, :] for skip in skip_connections
        ]
        x = Add()(aligned_skips)
    
    # Global pooling
    x = GlobalAveragePooling1D()(x)
    
    # Combinar con otras características
    x = tf.keras.layers.Concatenate()([x, other_input])
    
    # Capas densas finales
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(TCN_CONFIG['dropout_rate'][0])(x)  # First dropout rate
    x = Dense(64, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(TCN_CONFIG['dropout_rate'][1])(x)  # Second dropout rate
    
    output = Dense(1, activation='linear')(x)
    
    return Model(inputs=[cgm_input, other_input], outputs=output)

### Transformer

In [65]:
def create_transformer_model(cgm_shape: tuple, other_features_shape: tuple) -> Model:
    """
    Crea un modelo Transformer con entrada dual para datos CGM y otras características.
    
    Parámetros:
    -----------
    cgm_shape : tuple
        Forma de los datos CGM (samples, timesteps, features)
    other_features_shape : tuple
        Forma de otras características (samples, features)
        
    Retorna:
    --------
    Model
        Modelo Transformer compilado
    """
    # Entrada CGM
    cgm_input = Input(shape=cgm_shape[1:], name='cgm_input')
    
    # Transformer block
    attention = MultiHeadAttention(num_heads=TRANSFORMER_CONFIG['num_heads'], key_dim=TRANSFORMER_CONFIG['key_dim'])(cgm_input, cgm_input)
    attention = LayerNormalization(epsilon=TRANSFORMER_CONFIG['epsilon'])(attention + cgm_input)
    
    # Feed-forward network
    ff = Dense(128, activation='relu')(attention)
    ff = Dense(cgm_shape[-1])(ff)
    ff = LayerNormalization(epsilon=TRANSFORMER_CONFIG['epsilon'])(ff + attention)
    
    # Global pooling
    pooled = GlobalAveragePooling1D()(ff)
    
    # Entrada de otras características
    other_input = Input(shape=(other_features_shape[1],), name='other_input')
    
    # Combinar características
    combined = Concatenate()([pooled, other_input])
    
    # Capas densas finales
    dense = Dense(64, activation='relu')(combined)
    dense = LayerNormalization(epsilon=TRANSFORMER_CONFIG['epsilon'])(dense)
    dense = Dropout(TRANSFORMER_CONFIG['dropout_rate'])(dense)
    
    output = Dense(1, activation='linear')(dense)
    
    return Model(inputs=[cgm_input, other_input], outputs=output)

### Wavenet

In [78]:
class WaveNetBlock(tf.keras.layers.Layer):
    """
    Bloque WaveNet personalizado.
    """
    def __init__(self, filters, kernel_size, dilation_rate, dropout_rate, **kwargs):
        super().__init__(**kwargs)
        self.filters = filters
        self.conv = Conv1D(
            filters=filters,
            kernel_size=kernel_size,
            dilation_rate=dilation_rate,
            padding='causal'
        )
        self.bn = BatchNormalization()
        self.activation = Activation('relu')
        self.dropout = Dropout(dropout_rate)
        self.add = Add()
        self.residual_proj = Conv1D(filters, 1, padding='same')

    def call(self, inputs):
        x = self.conv(inputs)
        x = self.bn(x)
        x = self.activation(x)
        x = self.dropout(x)
        
        # Ensure residual has same number of filters
        residual = self.residual_proj(inputs)
        
        # Match temporal dimension
        residual = residual[:, -x.shape[1]:, :]
        return self.add([x, residual])

def create_wavenet_block(x, filters, kernel_size, dilation_rate, dropout_rate):
    """
    Crea un bloque WaveNet con conexiones residuales y skip connections.

    Parámetros:
    -----------
    x : tf.Tensor
        Tensor de entrada
    filters : int
        Número de filtros de la capa convolucional
    kernel_size : int
        Tamaño del kernel de la capa convolucional
    dilation_rate : int
        Tasa de dilatación de la capa convolucional
    dropout_rate : float
        Tasa de dropout

    Retorna:
    --------
    tf.Tensor
        Tensor de salida del bloque WaveNet
    """
    # Convolución dilatada
    conv = Conv1D(filters=filters, kernel_size=kernel_size,
                 dilation_rate=dilation_rate, padding='causal')(x)
    conv = BatchNormalization()(conv)
    conv = Activation('relu')(conv)
    conv = Dropout(dropout_rate)(conv)
    
    # Conexión residual con proyección 1x1 si es necesario
    if x.shape[-1] != filters:
        x = Conv1D(filters, 1, padding='same')(x)
    
    # Alinear dimensiones temporales
    x = x[:, -conv.shape[1]:, :]
    res = Add()([conv, x])
    
    return res, conv

def create_wavenet_model(cgm_shape: tuple, other_features_shape: tuple) -> Model:
    """
    Crea un modelo WaveNet para predicción de series temporales.

    Parámetros:
    -----------
    cgm_shape : tuple
        Forma de los datos CGM (samples, timesteps, features)
    other_features_shape : tuple
        Forma de otras características (samples, features)

    Retorna:
    --------
    Model
        Modelo WaveNet compilado
    """
    cgm_input = Input(shape=cgm_shape[1:])
    other_input = Input(shape=(other_features_shape[1],))
    
    x = Conv1D(WAVENET_CONFIG['filters'][0], 1, padding='same')(cgm_input)
    current_filters = WAVENET_CONFIG['filters'][0]
    
    skip_outputs = []
    
    for filters in WAVENET_CONFIG['filters']:
        for dilation in WAVENET_CONFIG['dilations']:
            wavenet_block = WaveNetBlock(
                filters=filters,
                kernel_size=WAVENET_CONFIG['kernel_size'],
                dilation_rate=dilation,
                dropout_rate=WAVENET_CONFIG['dropout_rate']
            )
            x = wavenet_block(x)
            
            # Project skip connection to match final filter size
            skip_proj = Conv1D(WAVENET_CONFIG['filters'][-1], 1, padding='same')(x)
            skip_outputs.append(skip_proj)
    
    # Combinar skip connections
    if skip_outputs:
        target_len = skip_outputs[-1].shape[1]
        aligned_skips = [
            skip[:, -target_len:, :] for skip in skip_outputs
        ]
        x = Add()(aligned_skips)
    
    x = Activation('relu')(x)
    x = GlobalAveragePooling1D()(x)
    x = Concatenate()([x, other_input])
    
    # Combinar con otras características
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(WAVENET_CONFIG['dropout_rate'])(x)
    
    output = Dense(1)(x)
    
    return Model(inputs=[cgm_input, other_input], outputs=output)

## Funciones Visualización

In [79]:
def plot_training_history(histories: dict, model_names: list):
    """
    Visualiza el historial de entrenamiento de múltiples modelos.
    
    Parámetros:
    -----------
    histories : dict
        Diccionario con historiales de entrenamiento por modelo
    model_names : list
        Lista de nombres de modelos
    """
    plt.figure(figsize=(12, 6))
    
    for name, history in histories.items():
        plt.plot(history.history['loss'], label=f'{name} (train)')
        plt.plot(history.history['val_loss'], label=f'{name} (val)', linestyle='--')
    
    plt.xlabel('Épocas')
    plt.ylabel('Pérdida MSE')
    plt.title('Comparación de Historiales de Entrenamiento')
    plt.legend()
    plt.savefig(os.path.join(FIGURES_DIR, 'training_comparison.png'), dpi=300, bbox_inches='tight')
    plt.close()

def plot_predictions_comparison(y_test: np.ndarray, predictions: dict):
    """
    Visualiza comparación de predicciones de múltiples modelos.
    
    Parámetros:
    -----------
    y_test : np.ndarray
        Valores reales de prueba
    predictions : dict
        Diccionario con predicciones por modelo
    """
    plt.figure(figsize=(15, 5))
    
    # Scatter plot
    plt.subplot(1, 2, 1)
    for name, y_pred in predictions.items():
        plt.scatter(y_test, y_pred, alpha=0.5, label=name)
    plt.plot([0, 15], [0, 15], 'r--')
    plt.xlabel('Dosis Real (u. de insulina)')
    plt.ylabel('Dosis Predicha (u. de insulina)')
    plt.legend()
    plt.title('Predicción vs. Real (Todos los Modelos)')
    
    # Residuals
    plt.subplot(1, 2, 2)
    for name, y_pred in predictions.items():
        plt.hist(y_test - y_pred, bins=20, alpha=0.5, label=name)
    plt.xlabel('Residuo (u. de insulina)')
    plt.ylabel('Frecuencia')
    plt.legend()
    plt.title('Distribución de Residuos')
    
    plt.tight_layout()
    plt.savefig(os.path.join(FIGURES_DIR, 'predictions_comparison.png'), dpi=300, bbox_inches='tight')
    plt.close()


## Función de Entrenamiento

## Entrenamiento y Evaluación de los Modelos

In [80]:
def train_and_evaluate_model(model: Model, model_name: str, 
                           X_cgm_train: np.ndarray, X_other_train: np.ndarray, 
                           y_train: np.ndarray, X_cgm_val: np.ndarray, 
                           X_other_val: np.ndarray, y_val: np.ndarray,
                           X_cgm_test: np.ndarray, X_other_test: np.ndarray, 
                           y_test: np.ndarray) -> tuple:
    """
    Entrena y evalúa un modelo específico.
    
    Parámetros:
    -----------
    model : Model
        Modelo a entrenar
    model_name : str
        Nombre del modelo para guardado/logging
    X_cgm_train, X_other_train, y_train : np.ndarray
        Datos de entrenamiento
    X_cgm_val, X_other_val, y_val : np.ndarray
        Datos de validación
    X_cgm_test, X_other_test, y_test : np.ndarray
        Datos de prueba
        
    Retorna:
    --------
    tuple
        (history, y_pred, metrics_dict)
    """
    # Compilar modelo
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='mse'
    )
    
    # Entrenar modelo
    history = model.fit(
        [X_cgm_train, X_other_train],
        y_train,
        validation_data=([X_cgm_val, X_other_val], y_val),
        epochs=100,
        batch_size=32,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=10,
                restore_best_weights=True
            )
        ],
        verbose=1
    )
    
    # Predecir y evaluar
    y_pred = model.predict([X_cgm_test, X_other_test]).flatten()
    
    # Calcular métricas
    metrics = {
        'mae': mean_absolute_error(y_test, y_pred),
        'rmse': np.sqrt(mean_squared_error(y_test, y_pred)),
        'r2': r2_score(y_test, y_pred)
    }
    
    # Guardar modelo
    model.save(os.path.join(MODELS_DIR, f'{model_name}.keras'))
    
    return history, y_pred, metrics

In [81]:
# Entrenamiento y evaluación de modelos
print("\nCreando y entrenando modelos...")

models = {
    'CNN': create_cnn_model(X_cgm_train.shape, X_other_train.shape),
    'Transformer': create_transformer_model(X_cgm_train.shape, X_other_train.shape),
    'GRU': create_gru_model(X_cgm_train.shape, X_other_train.shape),
    'Attention': create_attention_model(X_cgm_train.shape, X_other_train.shape),
    'RNN': create_rnn_model(X_cgm_train.shape, X_other_train.shape),
    'TabNet': create_tabnet_model(X_cgm_train.shape, X_other_train.shape),
    'TCN': create_tcn_model(X_cgm_train.shape, X_other_train.shape),
    'WaveNet': create_wavenet_model(X_cgm_train.shape, X_other_train.shape),
}


histories = {}
predictions = {}
metrics = {}

for name, model in models.items():
    print(f"\nEntrenando modelo {name}...")
    history, y_pred, model_metrics = train_and_evaluate_model(
        model, name,
        X_cgm_train, X_other_train, y_train,
        X_cgm_val, X_other_val, y_val,
        X_cgm_test, X_other_test, y_test
    )
    
    histories[name] = history
    predictions[name] = y_pred
    metrics[name] = model_metrics

# Evaluación por sujeto
print("\nRendimiento por sujeto:")
for subject_id in test_subjects:
    mask = subject_test == subject_id
    y_test_sub = y_test[mask]
    
    print(f"\nSujeto {subject_id}:")
    print("-" * 40)
    for name, y_pred in predictions.items():
        y_pred_sub = y_pred[mask]
        mae_sub = mean_absolute_error(y_test_sub, y_pred_sub)
        rmse_sub = np.sqrt(mean_squared_error(y_test_sub, y_pred_sub))
        r2_sub = r2_score(y_test_sub, y_pred_sub)
        print(f"{name:<15} MAE={mae_sub:.2f}, RMSE={rmse_sub:.2f}, R²={r2_sub:.2f}")



Creando y entrenando modelos...

Entrenando modelo CNN...
Epoch 1/100
[1m1040/1040[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 12ms/step - loss: 10.6842 - val_loss: 1.5433
Epoch 2/100
[1m1040/1040[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 15ms/step - loss: 4.0211 - val_loss: 0.7095
Epoch 3/100
[1m1040/1040[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 16ms/step - loss: 3.2494 - val_loss: 0.8790
Epoch 4/100
[1m1040/1040[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 18ms/step - loss: 2.8521 - val_loss: 0.8370
Epoch 5/100
[1m1040/1040[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 17ms/step - loss: 2.6801 - val_loss: 1.1820
Epoch 6/100
[1m1040/1040[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 18ms/step - loss: 2.6114 - val_loss: 1.3664
Epoch 7/100
[1m1040/1040[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 27ms/step - loss: 2.4738 - val_loss: 1.1093
Epoch 8/100
[1m1040/1040[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

## Visualización de los Resultados

In [82]:
# Visualización de resultados
plot_training_history(histories, list(models.keys()))
plot_predictions_comparison(y_test, predictions)

## Métricas Comparativas

In [83]:
# Imprimir métricas comparativas
print("\nComparación de métricas:")
print("-" * 50)
print(f"{'Modelo':<15} {'MAE':>8} {'RMSE':>8} {'R²':>8}")
print("-" * 50)
for name, metric in metrics.items():
    print(f"{name:<15} {metric['mae']:8.2f} {metric['rmse']:8.2f} {metric['r2']:8.2f}")


Comparación de métricas:
--------------------------------------------------
Modelo               MAE     RMSE       R²
--------------------------------------------------
CNN                 0.85     1.97     0.32
Transformer         0.64     1.53     0.59
GRU                 0.93     1.56     0.57
Attention           0.55     1.11     0.79
RNN                 1.13     3.07    -0.65
TabNet              0.62     2.68    -0.27
TCN                 0.96     2.52    -0.12
WaveNet             1.60     2.32     0.05
