# Gemelo Digital HVAC - Modelo Híbrido SINDy + TCN-VAE

**Proyecto:** Gemelos Digitales - Cuarto Año

## Objetivo

Crear un gemelo digital para sistema HVAC combinando:
- **PySINDy**: Descubrir ecuaciones físicas del sistema
- **TCN-VAE**: Red neuronal temporal con autoencoder variacional
- **Modelo Híbrido**: Fusión de física + datos para mejores predicciones

---

## Parte 1: Importar Librerías y Configuración

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Importar módulos del proyecto
from src.data_consolidation import HVACDataConsolidator
from src.physics_discovery import run_physics_discovery_pipeline, HVACPhysicsExtractor
from src.hybrid_sindy_tcnvae import HybridSINDyTCNVAE, HybridConfig, HybridModelTrainer

# Configuración
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 5)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Usando: {device}")

## Parte 2: Consolidar Datos (Verano e Invierno)

In [None]:
# Consolidar CSVs
consolidator = HVACDataConsolidator(dataset_path="dataset")
summer_df, winter_df = consolidator.save_consolidated_datasets(output_dir="data")

print(f"Verano: {summer_df.shape[0]} muestras, {summer_df.shape[1]} variables")
print(f"Invierno: {winter_df.shape[0]} muestras, {winter_df.shape[1]} variables")

# Visualizar algunas variables clave
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
summer_df['UCAOT'][:1000].plot(ax=axes[0], title='Temp. Salida Aire')
summer_df['UCAOH'][:1000].plot(ax=axes[1], title='Humedad Salida')
summer_df['CPMEP'][:1000].plot(ax=axes[2], title='Potencia Compresor')
plt.tight_layout()
plt.savefig('results/exploracion_datos.png')
plt.show()

## Parte 3: Descubrir Ecuaciones Físicas con PySINDy

In [None]:
# Usar subset para descubrimiento (más rápido)
summer_sample = summer_df.sample(n=5000, random_state=42)

# Descubrir ecuaciones
print("Descubriendo ecuaciones físicas...")
discovery, equations = run_physics_discovery_pipeline(
    summer_sample,
    season='summer',
    output_dir='models'
)

# Mostrar ecuaciones descubiertas
print("\nECUACIONES DESCUBIERTAS:")
print("="*60)
for i, (var, eq) in enumerate(list(equations.items())[:3]):
    print(f"\n{i+1}. d({var})/dt = {eq}")

# Obtener coeficientes
sindy_coefficients = discovery.get_coefficients()
print(f"\nMatriz de coeficientes: {sindy_coefficients.shape}")

## Parte 4: Preparar Datos para Modelo Híbrido

In [None]:
# Agregar características derivadas
extractor = HVACPhysicsExtractor()
summer_df = extractor.compute_derived_features(summer_df)

# Definir variables de entrada (features) y salida (targets)
feature_cols = ['UCAIT', 'UCAOT', 'UCWIT', 'UCWOT', 'UCAIH', 'UCAOH', 
                'UCWF', 'MVWF1', 'UCWDP', 'MVDP', 'T_setpoint', 'T_error']
feature_cols = [c for c in feature_cols if c in summer_df.columns]

target_cols = ['UCAOT', 'UCAOH', 'UCWOT', 'CPMEP']
target_cols = [c for c in target_cols if c in summer_df.columns]

print(f"Entrada: {len(feature_cols)} variables")
print(f"Salida: {len(target_cols)} variables")
print(f"\nVariables entrada: {feature_cols}")
print(f"Variables salida: {target_cols}")

In [None]:
# Crear secuencias temporales
def create_sequences(df, feature_cols, target_cols, seq_length=30):
    features = df[feature_cols].values
    targets = df[target_cols].values
    
    features = np.nan_to_num(features, nan=0.0)
    targets = np.nan_to_num(targets, nan=0.0)
    
    X_seq, y_seq = [], []
    for i in range(len(features) - seq_length):
        X_seq.append(features[i:i+seq_length].T)  # (features, seq_len)
        y_seq.append(targets[i+seq_length])
    
    return np.array(X_seq), np.array(y_seq)

SEQ_LENGTH = 30
X, y = create_sequences(summer_df, feature_cols, target_cols, SEQ_LENGTH)
print(f"X: {X.shape} (muestras, features, tiempo)")
print(f"y: {y.shape} (muestras, targets)")

In [None]:
# Dataset de PyTorch
class HVACDataset(Dataset):
    def __init__(self, X, y):
        self.X_scaler = StandardScaler()
        self.y_scaler = StandardScaler()
        
        X_flat = X.reshape(X.shape[0], -1)
        X_scaled = self.X_scaler.fit_transform(X_flat).reshape(X.shape)
        y_scaled = self.y_scaler.fit_transform(y)
        
        self.X = torch.FloatTensor(X_scaled)
        y_expanded = np.repeat(y_scaled[:, np.newaxis, :], SEQ_LENGTH, axis=1)
        self.y = torch.FloatTensor(y_expanded).permute(0, 2, 1)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Crear dataset y split train/val
dataset = HVACDataset(X, y)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(
    dataset, [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

print(f"Train: {len(train_dataset)} | Val: {len(val_dataset)}")

## Parte 5: Crear y Entrenar Modelo Híbrido SINDy + TCN-VAE

In [None]:
# Configuración del modelo
config = HybridConfig(
    input_dim=len(feature_cols),
    output_dim=len(target_cols),
    sequence_length=SEQ_LENGTH,
    latent_dim=32,                    # Dimensión latente VAE
    encoder_channels=[32, 64, 128],   # TCN encoder
    decoder_channels=[128, 64, 32],   # TCN decoder
    physics_weight=0.3,               # Peso de la física
    learning_rate=1e-3,
    device=device
)

# Crear modelo
model = HybridSINDyTCNVAE(config, sindy_coefficients)
model = model.to(device)

print(f"Modelo creado con {sum(p.numel() for p in model.parameters()):,} parámetros")
print(f"\nComponentes:")
print(f"  - Física (SINDy): {len(equations)} ecuaciones descubiertas")
print(f"  - TCN-VAE: Latent dim = {config.latent_dim}")
print(f"  - Fusión: Peso física = {config.physics_weight}")

In [None]:
# Entrenar
trainer = HybridModelTrainer(model, config)

EPOCHS = 50  # Ajustar según necesidad (50-100 para buenos resultados)
print(f"Entrenando por {EPOCHS} épocas...\n")

trainer.train(train_loader, val_loader, epochs=EPOCHS)

## Parte 6: Visualizar Resultados de Entrenamiento

In [None]:
history = trainer.history

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

axes[0].plot(history['train_loss'], label='Train')
axes[0].plot(history['val_loss'], label='Validation')
axes[0].set_title('Training Loss')
axes[0].set_xlabel('Epoch')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].plot(history['physics_loss'], color='green')
axes[1].set_title('Physics Loss')
axes[1].set_xlabel('Epoch')
axes[1].grid(True, alpha=0.3)

axes[2].plot(history['vae_loss'], color='orange')
axes[2].set_title('VAE Loss')
axes[2].set_xlabel('Epoch')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('results/training_history.png', dpi=150)
plt.show()

## Parte 7: Evaluación del Modelo

In [None]:
# Predicciones en validación
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for batch_x, batch_y in val_loader:
        batch_x = batch_x.to(device)
        outputs = model(batch_x)
        all_preds.append(outputs['hybrid'].cpu())
        all_targets.append(batch_y)

all_preds = torch.cat(all_preds, dim=0).numpy()
all_targets = torch.cat(all_targets, dim=0).numpy()

# Métricas
print("\nMÉTRICAS DE EVALUACIÓN")
print("="*60)
for i, target in enumerate(target_cols):
    pred = all_preds[:, i, :].flatten()
    true = all_targets[:, i, :].flatten()
    
    mse = mean_squared_error(true, pred)
    mae = mean_absolute_error(true, pred)
    r2 = r2_score(true, pred)
    
    print(f"\n{target}:")
    print(f"  MSE: {mse:.6f}")
    print(f"  MAE: {mae:.6f}")
    print(f"  R²:  {r2:.6f}")

In [None]:
# Visualizar predicciones vs reales
sample_idx = 50
fig, axes = plt.subplots(2, 2, figsize=(14, 8))
axes = axes.flatten()

for i, target in enumerate(target_cols):
    axes[i].plot(all_targets[sample_idx, i, :], label='Real', linewidth=2)
    axes[i].plot(all_preds[sample_idx, i, :], label='Predicción', 
                linestyle='--', linewidth=2)
    axes[i].set_title(f'{target}')
    axes[i].legend()
    axes[i].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('results/predicciones.png', dpi=150)
plt.show()

## Parte 8: Guardar Modelo

In [None]:
# Guardar modelo entrenado
import joblib
from pathlib import Path

Path('models').mkdir(exist_ok=True)

model_path = 'models/hvac_hybrid_model.pt'
trainer.save_model(model_path)

# Guardar escaladores
joblib.dump({
    'X_scaler': dataset.dataset.X_scaler,
    'y_scaler': dataset.dataset.y_scaler,
    'feature_cols': feature_cols,
    'target_cols': target_cols
}, 'models/scalers.pkl')

print("✓ Modelo guardado")

## Resumen y Conclusiones

Este notebook implementó un **gemelo digital híbrido** para sistema HVAC que combina:

### 1. PySINDy (Física)
- Descubrió ecuaciones gobernantes del sistema automáticamente
- Proporciona interpretabilidad física
- Mejora generalización

### 2. TCN-VAE (Datos)
- Captura dinámicas temporales complejas
- Representación latente comprimida
- Maneja incertidumbre (VAE)

### 3. Fusión Híbrida
- Combina lo mejor de física y datos
- Parámetro de fusión aprendible (α)
- Restricciones físicas aplicadas

### Ventajas del Enfoque:
- ✅ Más preciso que solo física o solo datos
- ✅ Generaliza mejor fuera del dominio de entrenamiento
- ✅ Interpretable (ecuaciones explícitas)
- ✅ Robusto (restricciones físicas)

---

**Proyecto:** Gemelos Digitales - Cuarto Año