In [None]:
# ============================================================
# NOTEBOOK 7 — T-GCN: Temporal Graph Convolutional Network
# ============================================================
# En este notebook implementamos un modelo T-GCN, que combina:
#  - GCN para capturar relaciones espaciales entre años consecutivos.
#  - GRU para capturar regulación temporal en la serie 2015–2022.
#
# El objetivo es predecir puntajes nacionales 2023–2026 basados
# en los embeddings generados en el Notebook 3.
# ============================================================


In [None]:
# Importaciones

In [6]:
import pandas as pd
import numpy as np
import torch

In [7]:
# Configuración de rutas

In [8]:
ruta_embeddings = r"C:/Users/john/Desktop/Saber_11_2025/resultados/embeddings"
ruta_puntajes   = r"C:/Users/john/Desktop/Saber_11_2025/resultados/puntajes"

print("Rutas listas.")


Rutas listas.


In [9]:
# Cargar embeddings reales (2015–2022)

In [10]:
import os

file_emb = os.path.join(ruta_embeddings, "embeddings_2015_2022.csv")

print("Buscando archivo:", file_emb)

if not os.path.exists(file_emb):
    raise FileNotFoundError("No se encontró embeddings_2015_2022.csv")

df_emb = pd.read_csv(file_emb, index_col=0)

print("Embeddings cargados:", df_emb.shape)
display(df_emb.head())


Buscando archivo: C:/Users/john/Desktop/Saber_11_2025/resultados/embeddings\embeddings_2015_2022.csv
Embeddings cargados: (8, 16)


Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
anio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2015,-0.013285,-0.09036,0.014918,-0.193424,-0.085967,0.108265,0.36162,0.111973,-0.0943,-0.068606,-0.195047,-0.181644,-0.096827,0.127936,-0.013351,-0.16177
2016,-0.195073,0.281308,-0.024735,0.117337,0.042593,-0.216274,-0.04827,-0.019949,-0.045212,-0.292472,-0.254604,0.290325,-0.053898,-0.112172,0.069332,-0.137538
2017,-0.084518,-0.202669,-0.163925,0.03446,0.00333,0.24771,0.005441,-0.120514,-0.071859,0.015435,0.019399,-0.03772,-0.173882,0.04821,0.088221,0.077796
2018,0.111962,-0.017034,0.506638,-0.220439,0.359079,-0.125214,0.166344,-0.070628,0.164679,0.043943,0.401484,-0.114283,0.120481,0.054075,0.678628,-0.328001
2019,0.095019,-0.421412,-0.07754,0.175998,-0.088899,-0.119443,0.002129,-0.127941,-0.104979,0.110968,-0.003392,0.242536,-0.329259,-0.234814,0.20809,-0.141206


In [11]:
# Cargar puntajes nacionales (2015–2022)

In [12]:
file_scores = os.path.join(ruta_puntajes, "puntajes_agregados_2015_2022.csv")

print("Buscando:", file_scores)

if not os.path.exists(file_scores):
    raise FileNotFoundError("No se encontró puntajes_agregados_2015_2022.csv")

df_scores = pd.read_csv(file_scores)

print("Puntajes cargados:", df_scores.shape)
display(df_scores.head())


Buscando: C:/Users/john/Desktop/Saber_11_2025/resultados/puntajes\puntajes_agregados_2015_2022.csv
Puntajes cargados: (248, 3)


Unnamed: 0,COLE_DEPTO_UBICACION,PUNTAJE_GLOBAL_PROMEDIO,ANIO
0,AMAZONAS,220.590085,2015
1,ANTIOQUIA,247.540718,2015
2,ARAUCA,246.008498,2015
3,ATLANTICO,245.708802,2015
4,BOGOTA,270.212909,2015


In [13]:
# Convertir puntaje departamental → puntaje nacional por año

In [14]:
df_scores_year = (
    df_scores.groupby("ANIO", as_index=False)
             .agg(PUNTAJE_GLOBAL_PROMEDIO=("PUNTAJE_GLOBAL_PROMEDIO", "mean"))
)

print("Puntaje nacional por año:")
display(df_scores_year)


Puntaje nacional por año:


Unnamed: 0,ANIO,PUNTAJE_GLOBAL_PROMEDIO
0,2015,243.005971
1,2016,250.967215
2,2017,248.747612
3,2018,256.157674
4,2019,241.799973
5,2020,269.514077
6,2021,271.848492
7,2022,242.407903


In [15]:
# Construir grafo temporal (2015–2022)

In [16]:
years = list(df_emb.index)   # 2015–2022
N = len(years)

A = np.zeros((N, N))

for i in range(N-1):
    A[i, i+1] = 1
    A[i+1, i] = 1

A = torch.tensor(A, dtype=torch.float32)

print("Matriz de adyacencia temporal:")
print(A)


Matriz de adyacencia temporal:
tensor([[0., 1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 1., 0., 0., 0.],
        [0., 0., 0., 1., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 1., 0.]])


In [None]:
# Preparación de features (X) y targets (y)

In [18]:
# PREPARAR DATOS PARA T-GCN

# X = embeddings reales (8 años × 16 variables)
X = torch.tensor(df_emb.values, dtype=torch.float32)   # shape = (8, 16)

# y = puntaje nacional promedio (8 años)
y = torch.tensor(df_scores_year["PUNTAJE_GLOBAL_PROMEDIO"].values,
                 dtype=torch.float32)

print("Shape X:", X.shape)   # (8, 16)
print("Shape y:", y.shape)   # (8,)
print("\nPrimeros 3 embeddings:\n", X[:3])
print("\nPrimeros valores de y:", y[:5])


Shape X: torch.Size([8, 16])
Shape y: torch.Size([8])

Primeros 3 embeddings:
 tensor([[-0.0133, -0.0904,  0.0149, -0.1934, -0.0860,  0.1083,  0.3616,  0.1120,
         -0.0943, -0.0686, -0.1950, -0.1816, -0.0968,  0.1279, -0.0134, -0.1618],
        [-0.1951,  0.2813, -0.0247,  0.1173,  0.0426, -0.2163, -0.0483, -0.0199,
         -0.0452, -0.2925, -0.2546,  0.2903, -0.0539, -0.1122,  0.0693, -0.1375],
        [-0.0845, -0.2027, -0.1639,  0.0345,  0.0033,  0.2477,  0.0054, -0.1205,
         -0.0719,  0.0154,  0.0194, -0.0377, -0.1739,  0.0482,  0.0882,  0.0778]])

Primeros valores de y: tensor([243.0060, 250.9672, 248.7476, 256.1577, 241.8000])


In [None]:
# Crear secuencias 3×16 y targets

In [19]:
# CREAR SECUENCIAS TEMPORALES PARA ENTRENAMIENTO (WINDOW = 3)

def crear_muestras_TGCN(X, y, window=3):
    X_seq, y_seq = [], []
    for i in range(len(X) - window):
        X_seq.append(X[i:i+window])     # ventana 3×16
        y_seq.append(y[i+window])       # valor objetivo del año siguiente
    return torch.stack(X_seq), torch.tensor(y_seq, dtype=torch.float32)

# Generar secuencias
X_seq, y_seq = crear_muestras_TGCN(X, y, window=3)

print("Shape X_seq:", X_seq.shape)    # Esperado: (5, 3, 16)
print("Shape y_seq:", y_seq.shape)    # Esperado: (5,)
print("\nEjemplo 1 de secuencia:\n", X_seq[0])
print("\nTarget 1:", y_seq[0])


Shape X_seq: torch.Size([5, 3, 16])
Shape y_seq: torch.Size([5])

Ejemplo 1 de secuencia:
 tensor([[-0.0133, -0.0904,  0.0149, -0.1934, -0.0860,  0.1083,  0.3616,  0.1120,
         -0.0943, -0.0686, -0.1950, -0.1816, -0.0968,  0.1279, -0.0134, -0.1618],
        [-0.1951,  0.2813, -0.0247,  0.1173,  0.0426, -0.2163, -0.0483, -0.0199,
         -0.0452, -0.2925, -0.2546,  0.2903, -0.0539, -0.1122,  0.0693, -0.1375],
        [-0.0845, -0.2027, -0.1639,  0.0345,  0.0033,  0.2477,  0.0054, -0.1205,
         -0.0719,  0.0154,  0.0194, -0.0377, -0.1739,  0.0482,  0.0882,  0.0778]])

Target 1: tensor(256.1577)


In [None]:
# Modelo T-GCN (PyTorch)

In [20]:
# MODELO T-GCN (compacto y optimizado para 3×16)

import torch.nn as nn
import torch.nn.functional as F

class TGCN(nn.Module):
    def __init__(self, in_features=16, hidden_gcn=32, hidden_rnn=32):
        super(TGCN, self).__init__()

        # --- Proyección tipo GCN (simple y estable) ---
        self.gcn_linear = nn.Linear(in_features, hidden_gcn)

        # --- GRU para captura temporal (núcleo del T-GCN) ---
        self.gru = nn.GRU(
            input_size=hidden_gcn,
            hidden_size=hidden_rnn,
            batch_first=True
        )

        # --- Capa final ---
        self.out = nn.Linear(hidden_rnn, 1)

    def forward(self, x):
        # x: (batch, seq_len, features) = (5, 3, 16)

        # 1. Proyección GCN-like
        x = self.gcn_linear(x)        # (batch, 3, hidden_gcn)
        x = torch.relu(x)

        # 2. GRU temporal
        out, h = self.gru(x)          # h = hidden state final (1, batch, hidden_rnn)

        # 3. Predicción con último estado
        out = self.out(h[-1])         # (batch, 1)

        return out.squeeze()


In [None]:
# Entrenamiento del T-GCN

In [21]:
# ENTRENAMIENTO T-GCN

model = TGCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.MSELoss()

EPOCHS = 1500

for epoch in range(EPOCHS):
    optimizer.zero_grad()

    pred = model(X_seq)      # shape (5)
    loss = criterion(pred, y_seq)

    loss.backward()
    optimizer.step()

    if epoch % 200 == 0:
        print(f"Epoch {epoch} — Loss {loss.item():.4f}")

print("Entrenamiento completado.")


Epoch 0 — Loss 65917.2734
Epoch 200 — Loss 35995.4219
Epoch 400 — Loss 18557.0312
Epoch 600 — Loss 8663.6484
Epoch 800 — Loss 3623.3599
Epoch 1000 — Loss 1378.2194
Epoch 1200 — Loss 524.2823
Epoch 1400 — Loss 252.6726
Entrenamiento completado.


In [None]:
# Predicción autoregresiva 2023–2026

In [22]:
# PREDICCIÓN 2023–2026 — T-GCN AUTORREGRESIVO

model.eval()
preds = []

# Usamos la última ventana real (2020–2022)
ventana = X_seq[-1].clone()   # (3,16)

for year in [2023, 2024, 2025, 2026]:

    with torch.no_grad():
        y_pred = model(ventana.unsqueeze(0)).item()

    preds.append(y_pred)

    # Construcción del nuevo "embedding" temporal generado por el T-GCN
    # Usamos una proyección a 16 valores para mantener la estructura
    emb_pred = torch.tensor(ventana[-1].numpy(), dtype=torch.float32)

    # Actualizar ventana temporal con autoregresión
    ventana = torch.vstack([ventana[1:], emb_pred])


# Exportar DataFrame final
df_pred_tgcn = pd.DataFrame({
    "ANIO": [2023, 2024, 2025, 2026],
    "PUNTAJE_PRED_TGCN": preds
})

print("=== Predicciones T-GCN ===")
display(df_pred_tgcn)


=== Predicciones T-GCN ===


Unnamed: 0,ANIO,PUNTAJE_PRED_TGCN
0,2023,249.955307
1,2024,249.955307
2,2025,249.955307
3,2026,249.955307


In [None]:
# EXPORTACIÓN FINAL — NOTEBOOK 7 (T-GCN)

In [23]:
# EXPORTACIÓN FINAL — NOTEBOOK 7 (T-GCN)

# Ruta de salida para modelos temporales
folder_out = r"C:/Users/john/Desktop/Saber_11_2025/resultados/modelos_temporales"
os.makedirs(folder_out, exist_ok=True)

# 1) Exportar predicciones T-GCN 2023–2026

file_tgcn = os.path.join(folder_out, "predicciones_tgcn_2023_2026.csv")
df_pred_tgcn.to_csv(file_tgcn, index=False)

print("Archivo exportado correctamente:")
print(" →", file_tgcn)


# 2) Exportar predicciones ST-GNN 2023–2026  
#     (valores recuperados de Notebook 6)

df_pred_stgnn = pd.DataFrame({
    "ANIO": [2023, 2024, 2025, 2026],
    "PUNTAJE_PRED_STGNN": [
        247.270844,
        247.270844,
        247.270844,
        247.270844
    ]
})

file_stgnn = os.path.join(folder_out, "predicciones_stgnn_2023_2026.csv")
df_pred_stgnn.to_csv(file_stgnn, index=False)

print("Archivo exportado correctamente:")
print(" →", file_stgnn)



# 3) Confirmación final

print("\n=== EXPORTACIÓN COMPLETADA ===")
print("Archivos generados en:", folder_out)
print(" - predicciones_tgcn_2023_2026.csv")
print(" - predicciones_stgnn_2023_2026.csv")


Archivo exportado correctamente:
 → C:/Users/john/Desktop/Saber_11_2025/resultados/modelos_temporales\predicciones_tgcn_2023_2026.csv
Archivo exportado correctamente:
 → C:/Users/john/Desktop/Saber_11_2025/resultados/modelos_temporales\predicciones_stgnn_2023_2026.csv

=== EXPORTACIÓN COMPLETADA ===
Archivos generados en: C:/Users/john/Desktop/Saber_11_2025/resultados/modelos_temporales
 - predicciones_tgcn_2023_2026.csv
 - predicciones_stgnn_2023_2026.csv
