In [12]:
import pandas as pd
import numpy as np
from pymongo import MongoClient
import joblib

# =====================================================
# 0. CONFIGURACI√ìN: PON AQU√ç TU URI REAL DE ATLAS
# =====================================================
MONGO_URI = "mongodb+srv://aitorherran:pEPEgOTIlIO@tfm.jwpe2w1.mongodb.net/?retryWrites=true&w=majority&appName=tfm"

MODEL_PATH = r"C:\Users\aitor.herran\Desktop\incendios\modelo_rf_riesgo_aemet.joblib"

# =====================================================
# 1. PROBAR CONEXI√ìN A MONGO ATLAS
# =====================================================
try:
    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
    client.admin.command("ping")
    print("‚úÖ Conexi√≥n OK a MongoDB Atlas")
except Exception as e:
    print("‚ùå Error de autenticaci√≥n o conexi√≥n:")
    print(e)
    raise SystemExit("Deteniendo ejecuci√≥n: la URI es incorrecta o la contrase√±a ha cambiado")


# =====================================================
# 2. LEER COLECCI√ìN aemet_predicciones
# =====================================================
db = client["incendios_espana"]
col_aemet = db["aemet_predicciones"]

docs = list(col_aemet.find({}))
print(f"Documentos le√≠dos: {len(docs)}")

def extraer_max(arr, campo=None):
    if not isinstance(arr, list) or len(arr) == 0:
        return np.nan
    if campo and isinstance(arr[0], dict):
        vals = [pd.to_numeric(x.get(campo), errors="coerce") for x in arr]
    else:
        vals = pd.to_numeric(pd.Series(arr), errors="coerce")
    return np.nanmax(vals)


# =====================================================
# 3. TRANSFORMAR DOCUMENTOS ‚Üí DATAFRAME
# =====================================================
filas = []

for d in docs:
    fila = {}
    fila["provincia"] = str(d.get("provincia")).strip()
    fila["fecha"] = pd.to_datetime(d.get("fecha"), errors="coerce")

    temp = d.get("temperatura", {}) or {}
    hum  = d.get("humedadRelativa", {}) or {}

    fila["meteo_temp_max"] = temp.get("maxima")
    fila["meteo_temp_min"] = temp.get("minima")
    fila["meteo_humidity_max"] = hum.get("maxima")
    fila["meteo_humidity_min"] = hum.get("minima")

    fila["meteo_wind_max"] = extraer_max(d.get("viento", []), campo="velocidad")
    fila["meteo_precip_sum"] = extraer_max(d.get("probPrecipitacion", []), campo="value")

    filas.append(fila)

df_aemet = pd.DataFrame(filas).dropna(subset=["fecha"])
print("\nüìÑ Preview DF AEMET (raw):")
print(df_aemet.head())


# =====================================================
# 4. LIMPIAR Y ASEGURAR COLUMNAS NUM√âRICAS
# =====================================================
feat_cols = [
    "meteo_temp_max",
    "meteo_temp_min",
    "meteo_precip_sum",
    "meteo_wind_max",
    "meteo_humidity_max",
    "meteo_humidity_min",
]

for c in feat_cols:
    df_aemet[c] = pd.to_numeric(df_aemet[c], errors="coerce")

df_aemet = df_aemet.dropna(subset=feat_cols)
print(f"\nFilas v√°lidas para predicci√≥n: {len(df_aemet)}")


# =====================================================
# 5. CARGAR MODELO Y PREDECIR PROBABILIDAD DE RIESGO
# =====================================================
rf = joblib.load(MODEL_PATH)

df_aemet["prob_riesgo_alto"] = rf.predict_proba(df_aemet[feat_cols])[:, 1]

df_aemet = df_aemet.sort_values(["provincia", "fecha"])

print("\nüîÆ Predicciones de riesgo (TOP 20):")
print(df_aemet[["provincia", "fecha", "prob_riesgo_alto"] + feat_cols].head(20))

# (Opcional) Exportar
# df_aemet.to_csv("predicciones_aemet_con_riesgo.csv", index=False)


‚úÖ Conexi√≥n OK a MongoDB Atlas
Documentos le√≠dos: 322

üìÑ Preview DF AEMET (raw):
  provincia      fecha  meteo_temp_max  meteo_temp_min  meteo_humidity_max  \
0     √Ålava 2025-12-10              12               8                 100   
1     √Ålava 2025-12-11              13               5                 100   
2     √Ålava 2025-12-12              12               4                  90   
3     √Ålava 2025-12-13              13               3                  85   
4     √Ålava 2025-12-14              12               4                  95   

   meteo_humidity_min  meteo_wind_max  meteo_precip_sum  
0                  85               0               100  
1                  70              10                 0  
2                  70               5                55  
3                  65              10                50  
4                  65              10                 0  

Filas v√°lidas para predicci√≥n: 322

üîÆ Predicciones de riesgo (TOP 20):
    provincia 