In [None]:
# ============================================================
# EXPERIMENTO 5 — ABLACIÓN: RUIDO (T) & CAPACIDAD (Pixel-Space DDPM)
# ============================================================

import os
import time
import numpy as np
import matplotlib.pyplot as plt
import mlflow
import tensorflow as tf
from tensorflow.keras import layers, Model, optimizers
from sklearn.decomposition import PCA

# ============================================================
# 1. CONFIGURACIÓN Y CARGA DE DATOS
# ============================================================

# Configuración MLflow (Ajustar URL si cambia ngrok)
MLFLOW_URL = "https://fleecier-rufus-decadently.ngrok-free.dev"  
mlflow.set_tracking_uri(MLFLOW_URL)
mlflow.set_experiment("Exp5-Ablation_Scientific")

# Rutas de artefactos (Coherencia con Exp1)
ARTIFACTS_DIR = "artifacts_exp5"
os.makedirs(ARTIFACTS_DIR, exist_ok=True)

# Cargar Datos
npz_path = "./data/intermediate/pixel_art_data.npz"
data = np.load(npz_path)
images = data["images"]  # (N,16,16,3) normalizado [0,1]

# Convertir a Tensor
images_tf = tf.convert_to_tensor(images, dtype=tf.float32)
dataset_size = len(images)
print(f"Dataset cargado: {images.shape}")

# ============================================================
# 2. LÓGICA DE DIFUSIÓN (DDPM MATH)
# ============================================================

def make_beta_schedule(T, start=1e-4, end=2e-2):
    """Genera el schedule lineal de ruido."""
    betas = np.linspace(start, end, T, dtype=np.float32)
    alphas = 1.0 - betas
    alpha_cumprod = np.cumprod(alphas)
    return betas, alphas, alpha_cumprod

def extract(a, t, shape):
    """Extrae constantes para el paso t."""
    out = tf.gather(a, t)
    return tf.reshape(out, (tf.shape(t)[0],) + (1,)*(len(shape)-1))

def q_sample(x0, t, noise, sqrt_ac, sqrt_1mac):
    """Proceso Forward: Añadir ruido."""
    return extract(sqrt_ac, t, x0.shape) * x0 + \
           extract(sqrt_1mac, t, x0.shape) * noise

# ============================================================
# 3. MODELO U-NET (VARIABLE POR CAPACIDAD)
# ============================================================

def build_unet(base_channels=32, image_size=16):
    """
    Construye una U-Net con capacidad variable.
    CORRECCIÓN: Se usa layers.Lambda para el Time Embedding.
    """
    def conv_block(x, channels):
        x = layers.Conv2D(channels, 3, padding="same", activation="relu")(x)
        x = layers.Conv2D(channels, 3, padding="same", activation="relu")(x)
        return x

    # --- Lógica de Embedding (Definida como función pura) ---
    def sinusoidal_embedding_fn(t):
        dim = 64  # Dimensión fija del embedding
        half = dim // 2
        # Frecuencias constantes
        freqs = tf.exp(tf.range(half, dtype=tf.float32) * -np.log(10000.0)/(half-1))
        # Cálculo de argumentos
        args = tf.cast(t, tf.float32)[:, None] * freqs[None, :]
        emb = tf.concat([tf.sin(args), tf.cos(args)], axis=-1)
        # Padding si es impar
        if dim % 2 == 1:
            emb = tf.pad(emb, [[0,0],[0,1]])
        return emb

    # --- Entradas del Modelo ---
    inp = layers.Input(shape=(image_size, image_size, 3), name="img_input")
    t_in = layers.Input(shape=(), dtype=tf.int32, name="time_input")

    # --- Aplicar Embedding usando Lambda Layer ---
    # Esto soluciona el ValueError "KerasTensor cannot be used as input to a TensorFlow function"
    t_emb = layers.Lambda(sinusoidal_embedding_fn, output_shape=(64,), name="time_embedding")(t_in)
    
    # Procesar el embedding (Dense + Reshape)
    t_emb = layers.Dense(128, activation="relu")(t_emb)
    t_emb = layers.Reshape((1,1,128))(t_emb)

    # --- Encoder ---
    c1 = conv_block(inp, base_channels)          # Level 1
    x = layers.MaxPool2D()(c1)

    c2 = conv_block(x, base_channels*2)          # Level 2
    x = layers.MaxPool2D()(c2)

    # --- Bottleneck ---
    b = conv_block(x, base_channels*4)
    
    # Inyectar tiempo en el bottleneck (Concatenación)
    # Nota: tf.tile también debe ser manejado con cuidado, pero usualmente Keras lo infiere.
    # Si falla, usaremos una Lambda para el tile, pero probemos directo primero.
    
    # Opción segura para Tile usando Lambda también:
    t_emb_tiled = layers.Lambda(lambda x: tf.tile(x, [1, 4, 4, 1]))(t_emb)
    b = layers.Concatenate()([b, t_emb_tiled])

    # --- Decoder ---
    x = layers.UpSampling2D()(b)
    x = layers.Concatenate()([x, c2])            # Skip Connection 2
    x = conv_block(x, base_channels*2)

    x = layers.UpSampling2D()(x)
    x = layers.Concatenate()([x, c1])            # Skip Connection 1
    x = conv_block(x, base_channels)

    # Salida
    out = layers.Conv2D(3, 1, padding="same", name="prediction")(x)

    return Model([inp, t_in], out, name=f"UNet_C{base_channels}")

# ============================================================
# 4. SAMPLING Y MÉTRICAS (Feature-FID)
# ============================================================

def sample_ddpm(model, T, betas, alphas, ac, num=64):
    """Genera muestras nuevas a partir de ruido puro."""
    z = tf.random.normal((num, 16, 16, 3))
    
    # Constantes en tensores para eficiencia
    betas_tf = tf.constant(betas)
    alphas_tf = tf.constant(alphas)
    
    for t in reversed(range(T)):
        t_b = tf.fill((num,), t)
        eps = model([z, t_b], training=False)

        beta_t = betas_tf[t]
        alpha_t = alphas_tf[t]
        ac_t = ac[t]

        # Ecuación de Reverse Diffusion
        mean = (1 / tf.sqrt(alpha_t)) * (z - beta_t * eps / tf.sqrt(1 - ac_t))

        if t > 0:
            sigma_t = tf.sqrt(beta_t)
            z = mean + sigma_t * tf.random.normal(tf.shape(z))
        else:
            z = mean

    return tf.clip_by_value(z, 0, 1).numpy()

def compute_feature_fid(real_imgs, gen_imgs, n_components=20):
    """
    Calcula una distancia FID aproximada usando PCA (Feature-FID).
    Más rápido que InceptionV3 para iteraciones científicas rápidas.
    """
    # Aplanar imágenes
    real_flat = real_imgs.reshape(len(real_imgs), -1)
    gen_flat = gen_imgs.reshape(len(gen_imgs), -1)
    
    # Ajustar PCA sobre reales y transformar ambas
    pca = PCA(n_components=n_components)
    real_f = pca.fit_transform(real_flat)
    gen_f = pca.transform(gen_flat) # Usar el mismo espacio latente

    # Estadísticas
    mu_real = real_f.mean(axis=0)
    mu_gen  = gen_f.mean(axis=0)
    cov_real = np.cov(real_f, rowvar=False)
    cov_gen  = np.cov(gen_f, rowvar=False)

    # Distancia de Fréchet (simplificada: traza + diff medias)
    mean_dist = np.sum((mu_real - mu_gen)**2)
    # Nota: La fórmula completa de FID usa sqrt(cov_real * cov_gen), 
    # pero para ablación relativa, la suma de diferencias cuadráticas de covarianza es un proxy aceptable.
    cov_dist  = np.sum((cov_real - cov_gen)**2) 

    return float(mean_dist + cov_dist)

# ============================================================
# 5. CORE DE ABLACIÓN (TRAINING LOOP)
# ============================================================

def run_ablation_experiment(T_steps, base_channels, epochs=10, batch_size=128):
    """
    Ejecuta un ciclo completo de entrenamiento y evaluación para una configuración dada.
    """
    run_name = f"DDPM_T{T_steps}_C{base_channels}"
    print(f"\n>>> INICIANDO RUN: {run_name} (Epochs={epochs})")

    # 1. Preparar Schedule
    betas, alphas, ac = make_beta_schedule(T_steps)
    sqrt_ac_tf = tf.constant(np.sqrt(ac), dtype=tf.float32)
    sqrt_1mac_tf = tf.constant(np.sqrt(1-ac), dtype=tf.float32)

    # 2. Construir Modelo y Optimizador
    model = build_unet(base_channels=base_channels)
    optimizer = optimizers.Adam(learning_rate=2e-4)
    
    # 3. Dataset Shuffling
    ds = tf.data.Dataset.from_tensor_slices(images_tf)
    ds = ds.shuffle(buffer_size=10000).batch(batch_size)

    # 4. Iniciar MLflow Run
    with mlflow.start_run(run_name=run_name):
        # Parametros
        mlflow.log_param("T_steps", T_steps)
        mlflow.log_param("base_channels", base_channels)
        mlflow.log_param("epochs", epochs)
        mlflow.log_param("batch_size", batch_size)
        mlflow.log_param("model_params", model.count_params())

        start_time = time.time()

        # --- Training Loop Custom ---
        for epoch in range(1, epochs + 1):
            epoch_losses = []
            
            for x0_batch in ds:
                b_size = tf.shape(x0_batch)[0]
                
                # Muestrear t y ruido
                t = tf.random.uniform((b_size,), 0, T_steps, dtype=tf.int32)
                noise = tf.random.normal(tf.shape(x0_batch))
                
                # Forward diffussion
                x_t = q_sample(x0_batch, t, noise, sqrt_ac_tf, sqrt_1mac_tf)

                with tf.GradientTape() as tape:
                    noise_pred = model([x_t, t], training=True)
                    # Loss: MSE simple entre ruido real y predicho
                    loss = tf.reduce_mean(tf.square(noise - noise_pred))
                
                grads = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads, model.trainable_variables))
                epoch_losses.append(loss.numpy())

            # Log metrics per epoch
            avg_loss = np.mean(epoch_losses)
            mlflow.log_metric("train_loss", avg_loss, step=epoch)
            print(f"   [Ep {epoch}/{epochs}] Loss: {avg_loss:.5f}")

        training_time = time.time() - start_time
        mlflow.log_metric("training_time_sec", training_time)

        # --- Evaluación y Generación ---
        print("   > Generando muestras y calculando FID...")
        
        # 1. Generar Muestras
        generated_imgs = sample_ddpm(model, T_steps, betas, alphas, ac, num=64)
        
        # 2. Calcular FID-like (Usando subset real aleatorio)
        idx_real = np.random.choice(len(images), size=min(1000, len(images)), replace=False)
        real_subset = images[idx_real]
        fid_score = compute_feature_fid(real_subset, generated_imgs)
        mlflow.log_metric("feature_fid", fid_score)
        print(f"   > Feature-FID: {fid_score:.4f}")

        # 3. Guardar Grid Visual
        fig, axes = plt.subplots(8, 8, figsize=(10, 10))
        k = 0
        for i in range(8):
            for j in range(8):
                axes[i, j].imshow(generated_imgs[k])
                axes[i, j].axis("off")
                k += 1
        plt.tight_layout()
        
        # Guardar localmente
        grid_filename = f"samples_{run_name}.png"
        grid_path = os.path.join(ARTIFACTS_DIR, grid_filename)
        plt.savefig(grid_path)
        plt.close()
        
        # Guardar Modelo
        model_filename = f"model_{run_name}.keras"
        model_path = os.path.join(ARTIFACTS_DIR, model_filename)
        model.save(model_path)

        # 4. Loggear Artefactos a MLflow
        mlflow.log_artifact(grid_path, r"D:\documentos\unal\diplomados\diplomado_ml_ds\mod6_metodologias_agiles_desarrollo_proyectos_ml\proyecto\entrega_4_aux\artifacts_exp5")
        mlflow.log_artifact(model_path, r"D:\documentos\unal\diplomados\diplomado_ml_ds\mod6_metodologias_agiles_desarrollo_proyectos_ml\proyecto\entrega_4_aux\artifacts_exp5")
        
        print(f"   >>> Run {run_name} finalizado.\n")

# ============================================================
# 6. EJECUCIÓN DE LAS VARIANTES (A y B)
# ============================================================

if __name__ == "__main__":
    
    # Configuración global
    EPOCHS_PER_RUN = 15  # Aumentado un poco para notar diferencias
    
    print("=== INICIANDO EXPERIMENTO 5: ABLACIÓN ===")

    # --- VARIANTE A: Barrido de T (Pasos de difusión) ---
    # Fijamos capacidad media (32) y variamos T
    print("\n--- VARIANTE A: Barrido de T ---")
    T_values = [50, 100, 200, 400]
    fixed_capacity = 32
    
    for T in T_values:
        run_ablation_experiment(T_steps=T, base_channels=fixed_capacity, epochs=EPOCHS_PER_RUN)

    # --- VARIANTE B: Barrido de Capacidad ---
    # Fijamos T razonable (200) y variamos canales
    # Nota: Si ya corrimos T=200, C=32 en la variante A, podríamos saltarlo,
    # pero para limpieza de los logs, lo volveremos a correr o lo filtramos.
    print("\n--- VARIANTE B: Barrido de Capacidad ---")
    C_values = [16, 64] # 32 ya se corrió arriba (T=200) típicamente
    fixed_T = 200
    
    for C in C_values:
        run_ablation_experiment(T_steps=fixed_T, base_channels=C, epochs=EPOCHS_PER_RUN)

    print("=== TODOS LOS EXPERIMENTOS COMPLETADOS ===")
    print(f"Revisar resultados en MLflow: {MLFLOW_URL}")

Dataset cargado: (89400, 16, 16, 3)
=== INICIANDO EXPERIMENTO 5: ABLACIÓN ===

--- VARIANTE A: Barrido de T ---

>>> INICIANDO RUN: DDPM_T50_C32 (Epochs=15)

   [Ep 1/15] Loss: 0.45128
