In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout, Masking
from tensorflow.keras.optimizers import Adam




In [None]:
# -------------------------------------------------------------
# 1. Cargar los datos
# -------------------------------------------------------------
df = pd.read_csv("tu_archivo.csv")

variables = [
    'var1', 'var2', 'var3', 'var4', 'var5',
    # agrega aquÃ­ tus ~32 variables
]

target = 'deserto'  # 0 sigue, 1 deserta

# -------------------------------------------------------------
# 2. Escalar variables
# -------------------------------------------------------------
scaler = StandardScaler()
df[variables] = scaler.fit_transform(df[variables])

# -------------------------------------------------------------
# 3. Convertir a formato 3D para GRU
# -------------------------------------------------------------
X_list = []
y_list = []

for est in df["estudiante_id"].unique():
    temp = df[df["estudiante_id"] == est].sort_values("semana")
    X_list.append(temp[variables].values)
    y_list.append(temp[target].values[-1])

X = np.array(X_list)
y = np.array(y_list)

print("X shape:", X.shape)  # Ej: (1500, 16 semanas, 32 features)
print("y shape:", y.shape)



In [None]:
# -------------------------------------------------------------
# 4. Train / Test split
# -------------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -------------------------------------------------------------
# 5. Modelo GRU
# -------------------------------------------------------------
model = Sequential()
model.add(Masking(mask_value=np.nan, input_shape=(X.shape[1], X.shape[2])))
model.add(GRU(units=64, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(1, activation="sigmoid"))

model.compile(
    loss="binary_crossentropy",
    optimizer=Adam(learning_rate=0.001),
    metrics=["accuracy"]
)

model.summary()

# Ejemplo: Si quieres que la clase 1 valga 5 veces mÃ¡s que la 0
mis_pesos = {0: 1.0, 1: 5.0} 

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=40,
    batch_size=32,
    verbose=1,
    class_weight=mis_pesos  # 
)



# -------------------------------------------------------------
# 6. Entrenamiento
# -------------------------------------------------------------
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=40,
    batch_size=32,
    verbose=1
)



In [None]:
# -------------------------------------------------------------
# 7. EvaluaciÃ³n en test
# -------------------------------------------------------------
loss, acc = model.evaluate(X_test, y_test)
print(f"\nðŸ“Œ Accuracy en test: {acc:.4f}")
print(f"ðŸ“Œ Loss en test: {loss:.4f}")

# -------------------------------------------------------------
# 8. Predicciones
# -------------------------------------------------------------
y_pred_prob = model.predict(X_test).ravel()
y_pred = (y_pred_prob >= 0.5).astype(int)

# -------------------------------------------------------------
# 9. MATRIZ DE CONFUSIÃ“N
# -------------------------------------------------------------
cm = confusion_matrix(y_test, y_pred)
print("\nðŸ”µ MATRIZ DE CONFUSIÃ“N:")
print(cm)

# -------------------------------------------------------------
# 10. Classification Report (precision / recall / F1)
# -------------------------------------------------------------
print("\nðŸ”µ CLASSIFICATION REPORT:")
print(classification_report(y_test, y_pred))



In [None]:
# -------------------------------------------------------------
# 11. ROC AUC
# -------------------------------------------------------------
roc_auc = roc_auc_score(y_test, y_pred_prob)
print(f"\nðŸ”µ ROC AUC: {roc_auc:.4f}")

# -------------------------------------------------------------
# 12. Curva ROC
# -------------------------------------------------------------
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)

plt.figure(figsize=(7,5))
plt.plot(fpr, tpr, linewidth=2, label=f"AUC = {roc_auc:.3f}")
plt.plot([0,1], [0,1], linestyle="--")
plt.title("Curva ROC - Modelo GRU")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.grid(True)
plt.show()