In [22]:
# ==========================================
# 0. IMPORTS
# ==========================================

#probando si esto se commitea Segunda prueba desde otro pc
#Nota de intiti: si van a trabajar desde un entorno local (Visual), 
# aseg√∫rense de tener instaladas las librer√≠as necesarias.
#tutorial: ctrl + √± para abrir el terminal y luego pegar los siguientes comandos:
#comando para instalar torch: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 -> En caso que quieran usar GPU.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from ortools.constraint_solver import pywrapcp, routing_enums_pb2
import concurrent.futures # LIBRER√çA MAGICA PARA PARALELISMO
import multiprocessing
import numpy as np
import os
import glob
import math
from tqdm import tqdm
import os
import requests 
import gc # Garbage Collector para gesti√≥n de memoria

In [23]:
# ==========================================
# 1. CONFIGURACI√ìN, GPU Y DESCARGA DE DATOS
# ==========================================


# --- A. CONFIGURACI√ìN DEL HARDWARE (DEVICE) ---
# Esto es vital para que el Bloque de entrenamiento sepa qu√© usar
if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
    print(f"‚úÖ GPU DETECTADA: {torch.cuda.get_device_name(0)}")
    print(f"   (Memoria disponible: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB)")
else:
    DEVICE = torch.device("cpu")
    print("‚ö†Ô∏è GPU NO DETECTADA: Entrenando en CPU (ser√° lento).")

# --- B. CONFIGURACI√ìN DEL REPOSITORIO ---
REPO_USER = "felipe-astudillo-s"
REPO_NAME = "TransformerTSP"
BRANCH = "main" # ‚ö†Ô∏è IMPORTANTE: Si tus datos no est√°n en 'main', cambia esto por el nombre de tu rama o commit.

REPO_FOLDERS = {
    "EASY":   "Data/Easy",
    "MEDIUM": "Data/Medium",
    "HARD":   "Data/Hard"
}

BASE_LOCAL_DIR = os.path.join(os.getcwd(), "data_repo")

def download_folder_from_github(user, repo, repo_folder_path, local_output_dir, branch="main"):
    """Descarga todos los .npz de una carpeta de GitHub usando la API."""
    api_url = f"https://api.github.com/repos/{user}/{repo}/contents/{repo_folder_path}?ref={branch}"
    
    print(f"üîç Consultando API para: {repo_folder_path}...")
    try:
        response = requests.get(api_url)
        if response.status_code == 404:
            print(f"‚ùå Error 404: No existe la carpeta '{repo_folder_path}' en la rama '{branch}'.")
            return local_output_dir
        if response.status_code != 200:
            print(f"‚ùå Error API ({response.status_code}): {response.text}")
            return local_output_dir

        files_list = response.json()
        
        if not os.path.exists(local_output_dir):
            os.makedirs(local_output_dir)

        if isinstance(files_list, dict) and 'message' in files_list:
            print("‚ùå Error: La ruta parece no ser una carpeta v√°lida.")
            return local_output_dir

        count = 0
        for item in files_list:
            if item['type'] == 'file' and item['name'].endswith('.npz'):
                local_path = os.path.join(local_output_dir, item['name'])
                if not os.path.exists(local_path):
                    try:
                        r = requests.get(item['download_url'])
                        with open(local_path, 'wb') as f:
                            f.write(r.content)
                        count += 1
                    except Exception as e:
                        print(f"  ‚ùå Fall√≥ {item['name']}: {e}")
                else:
                    count += 1 # Ya exist√≠a
        
        print(f"‚úÖ Fase {repo_folder_path}: {count} archivos listos en {local_output_dir}")
        return local_output_dir

    except Exception as e:
        print(f"‚ùå Error de conexi√≥n: {e}")
        return local_output_dir

# --- C. EJECUCI√ìN DE DESCARGA ---
PATHS = {}
print(f"\n‚öôÔ∏è Sincronizando con GitHub ({REPO_USER}/{REPO_NAME})...")

for phase_name, repo_path in REPO_FOLDERS.items():
    local_target = os.path.join(BASE_LOCAL_DIR, phase_name)
    final_path = download_folder_from_github(REPO_USER, REPO_NAME, repo_path, local_target, BRANCH)
    PATHS[phase_name] = final_path

# --- D. CURRICULUM ---
CURRICULUM = [
    {"phase": "EASY",   "epochs": 20, "lr": 1e-3, "bs": 128},
    {"phase": "MEDIUM", "epochs": 15, "lr": 1e-4, "bs": 64},
    {"phase": "HARD",   "epochs": 30, "lr": 1e-4, "bs": 32}
]

print(f"\nüìÇ Rutas configuradas correctamente.")
print(f"üöÄ Listo para ejecutar el Bloque de Entrenamiento.")

‚úÖ GPU DETECTADA: NVIDIA GeForce RTX 3050 Laptop GPU
   (Memoria disponible: 4.29 GB)

‚öôÔ∏è Sincronizando con GitHub (felipe-astudillo-s/TransformerTSP)...
üîç Consultando API para: Data/Easy...
‚úÖ Fase Data/Easy: 20 archivos listos en d:\VISUAL\gith\TransformerTSP\data_repo\EASY
üîç Consultando API para: Data/Medium...
‚úÖ Fase Data/Medium: 20 archivos listos en d:\VISUAL\gith\TransformerTSP\data_repo\MEDIUM
üîç Consultando API para: Data/Hard...
‚úÖ Fase Data/Hard: 10 archivos listos en d:\VISUAL\gith\TransformerTSP\data_repo\HARD

üìÇ Rutas configuradas correctamente.
üöÄ Listo para ejecutar el Bloque de Entrenamiento.


In [24]:
"""ARQUITECTURA DE INTITI (Encoder-Only)
===================================================================

[ ENTRADA: Coordenadas (N ciudades) ]
             ‚îÇ
             ‚ñº
+-----------------------------------------+
|           CAPA DE EMBEDDING             |  ‚ûî Convierte (x,y) en
+-----------------------------------------+     vectores de alta dimensi√≥n.
             ‚îÇ
             ‚ñº
+-----------------------------------------+
|     TRANSFORMER ENCODER (x capas)       |  ‚ûî El "Cerebro".
|   [ Self-Attention + FeedForward ]      |  ‚ûî Las ciudades "hablan" entre s√≠
|                                         |     para entender el mapa global.
+-----------------------------------------+
             ‚îÇ
             ‚ñº
    [ VECTORES DE CIUDAD ENRIQUECIDOS ]      ‚ûî Ahora cada nodo tiene contexto.
             ‚îÇ
             ‚ñº
+-----------------------------------------+
|       CAPA DE PROYECCI√ìN (MLP)          |  ‚ûî La "Calculadora de Afinidad".
|    (Compara cada par de vectores)       |  ‚ûî Calcula qu√© tanto se atraen
+-----------------------------------------+     la Ciudad A y la Ciudad B.
             ‚îÇ
             ‚ñº
 [ SALIDA: MATRIZ DE PROBABILIDAD NxN ]      ‚ûî Un mapa de calor de conexiones."""



In [25]:
# ==========================================
# 2. ARQUITECTURA DEL MODELO (ENCODER-ONLY / INTITI ARCH)
# ==========================================


class IntitiEncoderModel(nn.Module):
    def __init__(self, input_dim=2, d_model=128, nhead=8, num_layers=4, dim_feedforward=512, dropout=0.1):
        super().__init__()
        
        # 1. CAPA DE EMBEDDING
        # Convierte coordenadas (x,y) en vectores de alta dimensi√≥n
        self.embedding = nn.Linear(input_dim, d_model)
        
        # 2. TRANSFORMER ENCODER (El "Cerebro")
        # Permite que las ciudades "hablen" entre s√≠ (Self-Attention)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True, # Importante: Batch en la primera dimensi√≥n
            norm_first=True   # Estabilidad de entrenamiento (Pre-Norm)
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        # 3. CAPA DE PROYECCI√ìN (MLP / Calculadora de Afinidad)
        # Esta red neuronal peque√±a toma DOS ciudades y decide si se conectan.
        # Entrada: d_model * 2 (Ciudad A + Ciudad B)
        # Salida: 1 (Score de conexi√≥n)
        self.edge_mlp = nn.Sequential(
            nn.Linear(d_model * 2, d_model),
            nn.ReLU(),
            nn.Linear(d_model, 1) # Salida escalar (score)
        )
        
        self.d_model = d_model

    def forward(self, x, return_probs=False):
        """
        x: [Batch, N_ciudades, 2]
        Retorna: [Batch, N_ciudades, N_ciudades] -> Matriz de Probabilidad
        """
        B, N, _ = x.size()
        
        # --- PASO 1: EMBEDDING ---
        h = self.embedding(x)  # [B, N, d_model]
        
        # --- PASO 2: ENCODER (Contexto Global) ---
        # Ahora 'h' contiene informaci√≥n rica de cada ciudad y sus vecinos
        h = self.encoder(h)    # [B, N, d_model]
        
        # --- PASO 3: PROYECCI√ìN DE PARES (Broadcasting) ---
        # Queremos comparar TODAS las ciudades contra TODAS las ciudades.
        # Creamos dos vistas de los datos para combinarlos:
        
        # Vista Filas: Repetimos N veces hacia abajo
        h_src = h.unsqueeze(2).expand(-1, -1, N, -1) # [B, N, N, d_model]
        
        # Vista Columnas: Repetimos N veces hacia el lado
        h_tgt = h.unsqueeze(1).expand(-1, N, -1, -1) # [B, N, N, d_model]
        
        # Concatenamos: Ahora cada celda (i, j) tiene el vector de la ciudad i Y la ciudad j
        h_pairs = torch.cat([h_src, h_tgt], dim=-1)  # [B, N, N, d_model * 2]
        
        # --- PASO 4: CALCULAR SCORES ---
        # Pasamos cada par por la MLP
        edge_scores = self.edge_mlp(h_pairs)         # [B, N, N, 1]
        
        # Quitamos la √∫ltima dimensi√≥n para que quede una matriz cuadrada
        edge_scores = edge_scores.squeeze(-1)        # [B, N, N]
        
        # Mascarar la diagonal (No queremos conexiones Ciudad A -> Ciudad A)
        # Llenamos la diagonal con un n√∫mero muy negativo
        mask_diag = torch.eye(N, device=x.device).bool().unsqueeze(0).expand(B, -1, -1)
        edge_scores = edge_scores.masked_fill(mask_diag, float('-1e9'))

        if return_probs:
            # Si queremos probabilidades (0 a 1), aplicamos Sigmoid
            return torch.sigmoid(edge_scores)
        
        # Retornamos Logits (scores crudos) para usar con BCEWithLogitsLoss
        return edge_scores

In [26]:

# ==========================================
# 3. UTILIDADES DE EVALUACI√ìN
# ==========================================
def calculate_gap(model, loader, device):
    """Calcula el Optimality GAP (%) usando Greedy Decoding en un batch."""
    model.eval()
    try:
        # Tomamos solo el primer batch para no demorar el entrenamiento
        batch_x, batch_y = next(iter(loader))
    except StopIteration:
        return 0.0 # Loader vac√≠o

    batch_x, batch_y = batch_x.to(device), batch_y.to(device)
    batch_size, n_nodes, _ = batch_x.size()

    with torch.no_grad():
        # Inferencia Greedy (Teacher Forcing = False)
        # El modelo genera la secuencia de √≠ndices autom√°ticamente
        logits = model(batch_x, teacher_forcing=False)
        # logits: [Batch, N, N_nodes]

        pred_indices = logits.argmax(dim=2) # [Batch, N]

        # Stackear para formar tour
        pred_tour = pred_indices

    # --- C√°lculo de Distancias ---
    def get_dist(pts, idx):
        # pts: [B, N, 2], idx: [B, N]
        gathered = torch.gather(pts, 1, idx.unsqueeze(-1).expand(-1, -1, 2))
        next_pts = torch.roll(gathered, -1, dims=1)
        return torch.norm(gathered - next_pts, dim=2).sum(dim=1)

    cost_model = get_dist(batch_x, pred_tour)
    cost_oracle = get_dist(batch_x, batch_y)

    gap = ((cost_model - cost_oracle) / cost_oracle).mean().item() * 100
    return gap

In [33]:
# ==========================================
# 4. BUCLE DE ENTRENAMIENTO (INTITI ARCH)
# ==========================================


# --- UTILIDADES COMPACTAS ---
def tours_to_adjacency(tours, n_nodes):
    """Genera matriz target [B, N, N] con 1s en las aristas de la ruta."""
    B = tours.size(0)
    targets = torch.zeros(B, n_nodes, n_nodes, device=tours.device)
    # Scatter pone 1s conectando nodo i -> nodo i+1
    targets.scatter_(2, torch.roll(tours, -1, dims=1).unsqueeze(2), 1)
    # Ajuste fino para asegurar alineaci√≥n batch/nodo
    for b in range(B): targets[b, tours[b], torch.roll(tours, -1, dims=1)[b]] = 1
    return targets

def decode_matrix_greedy(scores):
    """Greedy decoding simple: elige el vecino con mayor score no visitado."""
    B, N, _ = scores.size()
    tours = torch.zeros(B, N, dtype=torch.long, device=scores.device)
    visited = torch.zeros(B, N, dtype=torch.bool, device=scores.device)
    visited[:, 0] = True # Empezamos en 0
    curr = torch.zeros(B, dtype=torch.long, device=scores.device)
    
    for t in range(1, N):
        # Scores de los vecinos del nodo actual
        row = scores.gather(1, curr.view(B,1,1).expand(-1,1,N)).squeeze(1)
        row.masked_fill_(visited, float('-inf')) # Bloquear visitados
        next_node = row.argmax(dim=1)
        
        tours[:, t] = next_node
        visited.scatter_(1, next_node.unsqueeze(1), True)
        curr = next_node
    return tours

def get_gap(model, loader, device):
    """Calcula GAP en el primer batch del loader."""
    model.eval()
    bx, by = next(iter(loader))
    bx, by = bx.to(device), by.to(device)
    with torch.no_grad():
        tour_pred = decode_matrix_greedy(model(bx))
    
    # Funci√≥n lambda local para distancia
    dist = lambda p, t: torch.norm(torch.gather(p,1,t.unsqueeze(-1).expand(-1,-1,2)) - torch.roll(torch.gather(p,1,t.unsqueeze(-1).expand(-1,-1,2)),-1,dims=1), dim=2).sum(1)
    return ((dist(bx, tour_pred) - dist(bx, by)) / dist(bx, by)).mean().item() * 100

# ---------------------------------------------------------
# ENTRENAMIENTO
# ---------------------------------------------------------
model = IntitiEncoderModel(input_dim=2, d_model=128, nhead=8, num_layers=4, dim_feedforward=512).to(DEVICE)
criterion = nn.BCEWithLogitsLoss()
optimizer = None # Se define en el bucle por fase

print("\nüöÄ ENTRENAMIENTO INTITI (ENCODER-ONLY) OPTIMIZADO")

for stage in CURRICULUM:
    phase, folder = stage['phase'], PATHS[stage['phase']]
    files = glob.glob(os.path.join(folder, "*.npz"))
    if not files: print(f"‚ö†Ô∏è Salto {phase}: Sin datos."); continue
    
    print(f"\n{'='*40}\nüéì FASE: {phase} | Epochs: {stage['epochs']}\n{'='*40}")
    
    optimizer = optim.Adam(model.parameters(), lr=stage['lr'])
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

    for epoch in range(stage['epochs']):
        model.train()
        loss_acc = 0
        gap_val = 0
        
        # Iteramos archivos (Lazy Loading)
        for i, f_path in enumerate(files):
            try:
                # Carga y Normalizaci√≥n r√°pida
                data = np.load(f_path)
                pts = torch.from_numpy(data['points']).float()
                sols = torch.from_numpy(data['solutions']).long()
                if pts.max() > 1.0: pts /= pts.max()
                
                loader = DataLoader(TensorDataset(pts, sols), batch_size=stage['bs'], shuffle=True)
                
                # Bucle de Batches
                for bx, by in tqdm(loader, desc=f"Ep {epoch+1} | {os.path.basename(f_path)}", leave=False):
                    bx, by = bx.to(DEVICE), by.to(DEVICE)
                    
                    optimizer.zero_grad()
                    preds = model(bx) # [B, N, N]
                    target = tours_to_adjacency(by, bx.size(1)) # [B, N, N]
                    
                    loss = criterion(preds, target)
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                    optimizer.step()
                    loss_acc += loss.item()

                # Calcular GAP solo en el √∫ltimo archivo para validar
                if i == len(files) - 1: gap_val = get_gap(model, loader, DEVICE)

                # Limpieza
                del data, pts, sols, loader; gc.collect(); torch.cuda.empty_cache()

            except Exception as e: print(f"‚ùå Error en {f_path}: {e}")

        avg_loss = loss_acc / (len(files) * (10000 // stage['bs'])) # Aprox
        print(f"   üìâ Epoca {epoch+1} | Loss: {avg_loss:.2f} | üìä GAP: {gap_val:.2f}%")
        
        scheduler.step(avg_loss)
        torch.save(model.state_dict(), os.path.join(folder, f"checkpoint_{phase}_best.pth"))

print("\nüèÜ FIN.")


üöÄ ENTRENAMIENTO INTITI (ENCODER-ONLY) OPTIMIZADO

üéì FASE: EASY | Epochs: 20


                                                                          

   üìâ Epoca 1 | Loss: 120653.40 | üìä GAP: 54.53%


                                                                          

   üìâ Epoca 2 | Loss: 120672.83 | üìä GAP: 12.08%


                                                                          

   üìâ Epoca 3 | Loss: 120583.46 | üìä GAP: 10.83%


                                                                          

   üìâ Epoca 4 | Loss: 120557.55 | üìä GAP: 13.49%


                                                                          

   üìâ Epoca 5 | Loss: 120648.22 | üìä GAP: 12.44%


                                                                          

KeyboardInterrupt: 

In [30]:
# ==========================================
# 5. VALIDACI√ìN FINAL COMPLETA (MULTI-PART)
# ==========================================

# --- CONFIGURACI√ìN DE RUTAS ---
PATHS_CONFIG = {
    "EASY": {
        "ckpt": "data_repo/EASY/checkpoint_EASY_best.pth",
        "val_folder": "Data/Validation/Easy",
        "val_prefix": "tsp_easy"
    },
    "MEDIUM": {
        "ckpt": "data_repo/MEDIUM/checkpoint_MEDIUM_best.pth",
        "val_folder": "Data/Validation/Medium",
        "val_prefix": "tsp_medium"
    },
    "HARD": {
        "ckpt": "data_repo/HARD/checkpoint_HARD_best.pth",
        "val_folder": "Data/Validation/Hard",
        "val_prefix": "tsp_hard"
    }
}

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def get_tour_distance(points, tour_indices):
    """Calcula distancia total de la ruta."""
    gathered = torch.gather(points, 1, tour_indices.unsqueeze(-1).expand(-1, -1, 2))
    next_pts = torch.roll(gathered, -1, dims=1)
    dist = torch.norm(gathered - next_pts, dim=2).sum(dim=1)
    return dist

def load_all_validation_parts(folder, prefix):
    """
    Busca TODAS las partes (part_0, part_1...) y las une en un solo dataset gigante.
    """
    if not os.path.exists(folder):
        print(f"‚ùå Carpeta no existe: {folder}")
        return None, None
    
    # Buscar todos los archivos que coincidan
    search_pattern = os.path.join(folder, f"{prefix}*.npz")
    all_files = sorted(glob.glob(search_pattern))
    
    if not all_files:
        print(f"‚ùå No encontr√© archivos {prefix}*.npz en {folder}")
        return None, None
    
    print(f"üìö Uniendo {len(all_files)} archivos de validaci√≥n encontrados...")
    
    all_points = []
    all_solutions = []
    
    for f_path in all_files:
        try:
            data = np.load(f_path, allow_pickle=True)
            all_points.append(data['points'])
            
            # Conversi√≥n m√°gica de lista de objetos a matriz int64
            raw_sols = data['solutions']
            # Verificamos si ya es matriz o lista de listas
            if raw_sols.dtype == np.object_:
                sols_mat = np.vstack(raw_sols).astype(np.int64)
            else:
                sols_mat = raw_sols.astype(np.int64)
                
            all_solutions.append(sols_mat)
            
        except Exception as e:
            print(f"‚ö†Ô∏è Error leyendo {os.path.basename(f_path)}: {e}")
            continue

    if not all_points:
        return None, None

    # Pegamos todo en arrays gigantes
    # np.concatenate une los arrays uno detr√°s de otro
    final_points = np.concatenate(all_points) 
    final_solutions = np.concatenate(all_solutions)
    
    return torch.FloatTensor(final_points), torch.from_numpy(final_solutions)

def validate_phase(phase_name, config):
    print(f"\n{'='*60}")
    print(f"üìä VALIDANDO FASE: {phase_name} (MODO COMPLETO)")
    print(f"{'='*60}")

    # 1. Cargar Checkpoint
    if not os.path.exists(config["ckpt"]):
        print(f"‚ö†Ô∏è Salto Fase: No existe checkpoint en {config['ckpt']}")
        return

    model = EncoderPointerModel(input_dim=2, d_model=128, nhead=8, enc_layers=3, dec_layers=2, max_seq_len=150).to(DEVICE)
    
    try:
        model.load_state_dict(torch.load(config["ckpt"], map_location=DEVICE, weights_only=False))
        model.eval()
        print(f"üß† Modelo cargado: {os.path.basename(config['ckpt'])}")
    except Exception as e:
        print(f"‚ùå Error cargando modelo: {e}")
        return

    # 2. Cargar TODA la data
    points, solutions = load_all_validation_parts(config["val_folder"], config["val_prefix"])
    
    if points is None:
        return
    
    print(f"üìÇ Total muestras cargadas: {len(points)}")
    
    # Normalizaci√≥n
    if points.max() > 1.0: points /= points.max()

    # Dataset completo
    dataset = TensorDataset(points, solutions)
    loader = DataLoader(dataset, batch_size=64, shuffle=False) # Batch grande para ir r√°pido

    # 3. Inferencia
    gap_accum = 0
    total_samples = 0
    
    pbar = tqdm(loader, desc="Benchmarking")
    
    with torch.no_grad():
        for bx, by in pbar:
            bx, by = bx.to(DEVICE), by.to(DEVICE)
            
            logits = model(bx, teacher_forcing=False)
            pred_tour = logits.argmax(dim=2) 

            cost_model = get_tour_distance(bx, pred_tour)
            cost_ortools = get_tour_distance(bx, by)

            gap = ((cost_model - cost_ortools) / cost_ortools)
            gap_accum += gap.sum().item()
            total_samples += bx.size(0)
            
            pbar.set_postfix({'GAP Acum': f"{(gap_accum/total_samples)*100:.2f}%"})

    final_gap = (gap_accum / total_samples) * 100
    print(f"\nüèÜ RESULTADO FINAL {phase_name}: GAP GLOBAL {final_gap:.2f}%")

# --- EJECUTAR ---
# Nota: Como detuviste el entrenamiento en MEDIUM, probablemente solo EASY funcione bien.
for phase in ["EASY", "MEDIUM", "HARD"]:
    validate_phase(phase, PATHS_CONFIG[phase])


üìä VALIDANDO FASE: EASY (MODO COMPLETO)
‚ùå Error cargando modelo: Error(s) in loading state_dict for EncoderPointerModel:
	Missing key(s) in state_dict: "encoder.input_proj.weight", "encoder.input_proj.bias", "encoder.encoder.layers.0.self_attn.in_proj_weight", "encoder.encoder.layers.0.self_attn.in_proj_bias", "encoder.encoder.layers.0.self_attn.out_proj.weight", "encoder.encoder.layers.0.self_attn.out_proj.bias", "encoder.encoder.layers.0.linear1.weight", "encoder.encoder.layers.0.linear1.bias", "encoder.encoder.layers.0.linear2.weight", "encoder.encoder.layers.0.linear2.bias", "encoder.encoder.layers.0.norm1.weight", "encoder.encoder.layers.0.norm1.bias", "encoder.encoder.layers.0.norm2.weight", "encoder.encoder.layers.0.norm2.bias", "encoder.encoder.layers.1.self_attn.in_proj_weight", "encoder.encoder.layers.1.self_attn.in_proj_bias", "encoder.encoder.layers.1.self_attn.out_proj.weight", "encoder.encoder.layers.1.self_attn.out_proj.bias", "encoder.encoder.layers.1.linear1.weigh

In [31]:
# ==========================================
# üß™ PRUEBA DE GENERALIZACI√ìN (EASY -> MEDIUM)
# ==========================================

# Definimos una configuraci√≥n h√≠brida:
# üß† CEREBRO: Checkpoint de EASY (Entrenado con 20 nodos)
# üìù EXAMEN: Datos de MEDIUM (Problemas de 50 nodos)

CROSS_TEST_CONFIG = {
    "ckpt": "data_repo/EASY/checkpoint_EASY_best.pth",   # Usamos el modelo peque√±o
    "val_folder": "Data/Validation/Medium",              # Usamos la data mediana
    "val_prefix": "tsp_medium"
}

print(f"\n{'#'*60}")
print("üß™ EXPERIMENTO: ¬øPuede un modelo de 20 ciudades resolver uno de 50?")
print(f"{'#'*60}")

# Llamamos a tu funci√≥n de validaci√≥n existente
validate_phase("GENERALIZATION_TEST", CROSS_TEST_CONFIG)


############################################################
üß™ EXPERIMENTO: ¬øPuede un modelo de 20 ciudades resolver uno de 50?
############################################################

üìä VALIDANDO FASE: GENERALIZATION_TEST (MODO COMPLETO)
‚ùå Error cargando modelo: Error(s) in loading state_dict for EncoderPointerModel:
	Missing key(s) in state_dict: "encoder.input_proj.weight", "encoder.input_proj.bias", "encoder.encoder.layers.0.self_attn.in_proj_weight", "encoder.encoder.layers.0.self_attn.in_proj_bias", "encoder.encoder.layers.0.self_attn.out_proj.weight", "encoder.encoder.layers.0.self_attn.out_proj.bias", "encoder.encoder.layers.0.linear1.weight", "encoder.encoder.layers.0.linear1.bias", "encoder.encoder.layers.0.linear2.weight", "encoder.encoder.layers.0.linear2.bias", "encoder.encoder.layers.0.norm1.weight", "encoder.encoder.layers.0.norm1.bias", "encoder.encoder.layers.0.norm2.weight", "encoder.encoder.layers.0.norm2.bias", "encoder.encoder.layers.1.self_attn.in_p

In [32]:
# ==========================================
# 6. VISUALIZACI√ìN COMPARATIVA (VISUALIZER)
# ==========================================


# --- CONFIGURACI√ìN ---
# Usamos la misma configuraci√≥n de rutas que antes
PATHS_CONFIG = {
    "EASY":   {"ckpt": "data_repo/EASY/checkpoint_EASY_best.pth",   "val_folder": "Data/Validation/Easy",   "val_prefix": "tsp_easy"},
    "MEDIUM": {"ckpt": "data_repo/MEDIUM/checkpoint_MEDIUM_best.pth", "val_folder": "Data/Validation/Medium", "val_prefix": "tsp_medium"},
    "HARD":   {"ckpt": "data_repo/HARD/checkpoint_HARD_best.pth",   "val_folder": "Data/Validation/Hard",   "val_prefix": "tsp_hard"}
}

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def plot_route(ax, points, tour, title, color):
    """Dibuja una ruta en el subplot dado."""
    # points: array numpy [N, 2]
    # tour: array numpy [N] (indices)
    
    # Reordenamos los puntos seg√∫n el tour
    route_points = points[tour]
    # Cerramos el ciclo (a√±adimos el primer punto al final)
    route_points = np.vstack([route_points, route_points[0]])
    
    # Dibujar l√≠neas
    ax.plot(route_points[:, 0], route_points[:, 1], c=color, linewidth=1.5, linestyle='-')
    # Dibujar nodos
    ax.scatter(points[:, 0], points[:, 1], c='black', s=15, zorder=5)
    # Marcar inicio (rojo)
    ax.scatter(route_points[0, 0], route_points[0, 1], c='red', s=40, zorder=6, label='Inicio')
    
    ax.set_title(title)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_aspect('equal')

def visualize_comparison(phase_name, config):
    print(f"\nüé® GENERANDO VISUALIZACIONES PARA: {phase_name}")
    
    if not os.path.exists(config["ckpt"]):
        print(f"‚ö†Ô∏è No hay modelo para {phase_name}, saltando...")
        return

    # 1. Cargar Modelo
    model = EncoderPointerModel(input_dim=2, d_model=128, nhead=8, enc_layers=3, dec_layers=2, max_seq_len=150).to(DEVICE)
    try:
        model.load_state_dict(torch.load(config["ckpt"], map_location=DEVICE, weights_only=False))
        model.eval()
    except Exception as e:
        print(f"‚ùå Error cargando modelo: {e}")
        return

    # 2. Buscar archivos parciales
    search_pattern = os.path.join(config["val_folder"], f"{config['val_prefix']}*.npz")
    files = sorted(glob.glob(search_pattern))
    
    if not files:
        print("‚ùå No encontr√© archivos de validaci√≥n.")
        return

    # 3. Iterar sobre cada archivo encontrado
    print(f"üì∏ Se encontraron {len(files)} archivos. Generando 1 ejemplo de cada uno...")

    for i, f_path in enumerate(files):
        try:
            # Cargar archivo
            data = np.load(f_path, allow_pickle=True)
            points_all = data['points']
            sols_all = data['solutions']
            
            # --- SELECCIONAR UN EJEMPLO ALEATORIO O EL PRIMERO ---
            idx = 0 # Tomamos el primero de cada archivo (puedes cambiar a np.random.randint)
            
            sample_points = points_all[idx] # [N, 2]
            
            # Fix conversi√≥n object -> int64 para la soluci√≥n real
            raw_sol = sols_all[idx]
            if isinstance(raw_sol, list) or raw_sol.dtype == np.object_:
                 sample_sol_true = np.array(raw_sol).astype(np.int64)
            else:
                 sample_sol_true = raw_sol.astype(np.int64)

            # Normalizar puntos para el modelo (0-1)
            max_val = sample_points.max()
            input_points = torch.tensor(sample_points / max_val, dtype=torch.float32).unsqueeze(0).to(DEVICE)

            # --- INFERENCIA DEL MODELO ---
            with torch.no_grad():
                logits = model(input_points, teacher_forcing=False)
                sample_sol_pred = logits.argmax(dim=2).squeeze(0).cpu().numpy()

            # --- DIBUJAR ---
            fig, axs = plt.subplots(1, 2, figsize=(10, 5))
            
            # Gr√°fica Izquierda: Tu IA
            plot_route(axs[0], sample_points, sample_sol_pred, f"Tu Modelo (IA)\nArchivo: {os.path.basename(f_path)}", 'blue')
            
            # Gr√°fica Derecha: OR-Tools (El Maestro)
            plot_route(axs[1], sample_points, sample_sol_true, "OR-Tools (Ground Truth)", 'green')
            
            plt.tight_layout()
            plt.show()
            
            # Limite de seguridad: Si hay 50 archivos, no queremos 50 popups.
            # Comenta estas dos l√≠neas si quieres verlos TODOS.
            if i >= 2: 
                print("üõë Deteniendo visualizaci√≥n para no saturar la pantalla (3 ejemplos mostrados).")
                break

        except Exception as e:
            print(f"‚ö†Ô∏è Error visualizando {os.path.basename(f_path)}: {e}")
            continue

# --- EJECUTAR ---
visualize_comparison("EASY", PATHS_CONFIG["EASY"])
visualize_comparison("MEDIUM", PATHS_CONFIG["MEDIUM"])


üé® GENERANDO VISUALIZACIONES PARA: EASY
‚ùå Error cargando modelo: Error(s) in loading state_dict for EncoderPointerModel:
	Missing key(s) in state_dict: "encoder.input_proj.weight", "encoder.input_proj.bias", "encoder.encoder.layers.0.self_attn.in_proj_weight", "encoder.encoder.layers.0.self_attn.in_proj_bias", "encoder.encoder.layers.0.self_attn.out_proj.weight", "encoder.encoder.layers.0.self_attn.out_proj.bias", "encoder.encoder.layers.0.linear1.weight", "encoder.encoder.layers.0.linear1.bias", "encoder.encoder.layers.0.linear2.weight", "encoder.encoder.layers.0.linear2.bias", "encoder.encoder.layers.0.norm1.weight", "encoder.encoder.layers.0.norm1.bias", "encoder.encoder.layers.0.norm2.weight", "encoder.encoder.layers.0.norm2.bias", "encoder.encoder.layers.1.self_attn.in_proj_weight", "encoder.encoder.layers.1.self_attn.in_proj_bias", "encoder.encoder.layers.1.self_attn.out_proj.weight", "encoder.encoder.layers.1.self_attn.out_proj.bias", "encoder.encoder.layers.1.linear1.weigh