# Librer√≠as

In [1]:
from pathlib import Path
import json
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from keras import metrics
from keras.callbacks import EarlyStopping, ModelCheckpoint
import os
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, LeakyReLU
import plotly.graph_objects as go
import datetime as dt

# Lectura de datos

In [2]:

ruta_datos = Path("./datos/hist√≥ricos/")  
archivos = sorted(ruta_datos.glob("meteorolog√≠a_*.json"))

# === FUNCI√ìN PARA PROCESAR UN ARCHIVO ===
def procesar_archivo_estacion_dinamico(ruta_json, estacion_objetivo="FAC"):
    """
    Lee un archivo JSON y devuelve los datos de una estaci√≥n espec√≠fica.
    Solo crea columnas para los par√°metros que tienen al menos un valor.
    """
    with open(ruta_json, "r", encoding="utf-8") as f:
        data = json.load(f)
    registros = data["pollutionMeasurements"]["date"]

    filas = []
    for fecha, params in registros.items():
        for param, estaciones in params.items():
            # Solo procesamos la estaci√≥n objetivo
            if estacion_objetivo in estaciones:
                valor = estaciones[estacion_objetivo]
                if valor != "":
                    filas.append({
                        "fecha": fecha,
                        "estacion": estacion_objetivo,
                        "parametro": param,
                        "valor": float(valor)
                    })
    df = pd.DataFrame(filas)

    if df.empty:
        print(f"‚ö†Ô∏è No se encontraron datos para {estacion_objetivo} en {ruta_json.name}")
        return pd.DataFrame()

    # Pivotar solo los par√°metros que realmente existen
    df_wide = df.pivot(index=["fecha", "estacion"],
                       columns="parametro", values="valor").reset_index()

    return df_wide


# === PROCESAR TODOS LOS A√ëOS ===
dfs = []
for archivo in archivos:
    print(f"Procesando {archivo.name} ...")
    df_anual = procesar_archivo_estacion_dinamico(archivo, estacion_objetivo="FAC")
    if not df_anual.empty:
        dfs.append(df_anual)

if not dfs:
    raise ValueError("No se generaron DataFrames. Verifica los archivos JSON o la estaci√≥n seleccionada.")

df_total = pd.concat(dfs, ignore_index=True)

# === CORRECCI√ìN DE FECHAS (24:00 ‚Üí 00:00 del d√≠a siguiente) ===
df_fechas = df_total[["fecha"]].copy()
df_fechas["fecha_str"] = df_fechas["fecha"].astype(str)
df_fechas["fecha_dt"] = pd.to_datetime(df_fechas["fecha_str"], errors="coerce")

mask_nulos = df_fechas["fecha_dt"].isna()
if mask_nulos.any():
    nulos = df_fechas.loc[mask_nulos, "fecha_str"]
    corr = (
        pd.to_datetime(
            nulos.str.replace("24:00", "00:00", regex=False), errors="coerce"
        ) + pd.Timedelta(days=1)
    )
    df_fechas.loc[mask_nulos, "fecha_dt"] = corr

df_total["fecha"] = df_fechas["fecha_dt"]
df_total = df_total.sort_values("fecha").reset_index(drop=True)

print(f"‚úÖ Datos combinados para estaci√≥n FAC: {len(df_total)} registros")
print(f"üìä Par√°metros detectados: {list(df_total.columns[2:])}")

df_total.head()


Procesando meteorolog√≠a_2013.json ...
Procesando meteorolog√≠a_2014.json ...
Procesando meteorolog√≠a_2015.json ...
Procesando meteorolog√≠a_2016.json ...
Procesando meteorolog√≠a_2017.json ...
Procesando meteorolog√≠a_2018.json ...
Procesando meteorolog√≠a_2019.json ...
Procesando meteorolog√≠a_2020.json ...
Procesando meteorolog√≠a_2021.json ...
Procesando meteorolog√≠a_2022.json ...
Procesando meteorolog√≠a_2023.json ...
‚úÖ Datos combinados para estaci√≥n FAC: 87678 registros
üìä Par√°metros detectados: ['RH', 'TMP', 'WDR', 'WSP']


parametro,fecha,estacion,RH,TMP,WDR,WSP
0,2013-01-01 01:00:00,FAC,,,234.0,0.7
1,2013-01-01 02:00:00,FAC,,,201.0,0.4
2,2013-01-01 03:00:00,FAC,,,214.0,0.8
3,2013-01-01 04:00:00,FAC,,,226.0,1.0
4,2013-01-01 05:00:00,FAC,,,214.0,1.0


In [3]:
pd.set_option("display.max_rows", None)     
pd.set_option("display.max_columns", None)  
pd.set_option("display.width", None)     

In [4]:
datos=pd.read_excel('./meteorologia_historica_completa.xlsx')

In [5]:
datos_c=datos.copy()
datos_c["fecha_a√±o"]=datos_c["fecha"].dt.year
datos_c["fecha_mes"]=datos_c["fecha"].dt.month

In [6]:
datos_c[(datos_c["fecha_a√±o"]==2013)&(datos_c["TMP"].isna())].tail()

Unnamed: 0,fecha,estacion,RH,TMP,WDR,WSP,fecha_a√±o,fecha_mes
6910,2013-10-22 20:00:00,FAC,,,348.0,2.8,2013,10
6911,2013-10-22 21:00:00,FAC,,,343.0,2.7,2013,10
6912,2013-10-22 22:00:00,FAC,,,4.0,2.7,2013,10
6913,2013-10-22 23:00:00,FAC,,,326.0,1.9,2013,10
6914,2013-10-23 00:00:00,FAC,,,313.0,1.3,2013,10


In [7]:
datos_c[(datos_c["TMP"].isna())][["fecha_a√±o","fecha_mes"]].value_counts().sort_index()

fecha_a√±o  fecha_mes
2013       1            741
           2            669
           3            742
           4            187
           5             48
           10           315
2014       3            131
           6             24
           7              1
           11           166
2018       1              1
           5            713
           6            708
           7            743
           8            544
           9             23
           10           121
2021       3              1
           7             12
2022       3            696
           4            717
           5            743
           6            710
           7            706
           8            744
           9            715
           10           336
           11             1
Name: count, dtype: int64

In [8]:
datos_c[~(datos_c["TMP"].isna())][["fecha_a√±o","fecha_mes"]].value_counts().sort_index()

fecha_a√±o  fecha_mes
2013       4            398
           5            694
           6            720
           7            743
           8            744
           9            707
           10           422
           11           719
           12           733
2014       1            744
           2            669
           3            613
           4            664
           5            742
           6            696
           7            719
           8            729
           9            720
           10           744
           11           552
           12           744
2015       1            740
           2            672
           3            336
           4            720
           5            658
           6            720
           7            656
           8            744
           9            682
           10           744
           11           719
           12           731
2016       1            744
           2            69

In [9]:
datos["fecha_dia"] = datos["fecha"].dt.date

# Agrupaci√≥n de datos e imputaci√≥n por interpolaci√≥n

In [10]:

df_diario = (
    datos.groupby("fecha_dia")
    .agg({
        "RH": ["mean", "min", "max", "std"],
        "TMP": ["mean", "count"],
        "WDR": ["mean", "min", "max"],
        "WSP": ["mean", "min", "max"],
    })
)

df_diario.columns = ["_".join(col).strip() for col in df_diario.columns.values]
df_diario = df_diario.reset_index()

df_diario["TMP_bueno"] = df_diario["TMP_count"] >= 16
df_diario["TMP_regular"] = (df_diario["TMP_count"] >= 8) & (df_diario["TMP_count"] < 16)


# Interpolamos solo huecos peque√±os (2 d√≠as seguidos como m√°ximo)
df_diario["TMP_mean_interp"] = df_diario["TMP_mean"].interpolate(
    method="linear", limit=2, limit_direction="both"
)

df_diario = df_diario[df_diario["TMP_count"] >= 8].copy()

for col in df_diario.columns:
    if col.startswith(("RH_", "WDR_", "WSP_")):
        df_diario[col] = df_diario[col].interpolate(
            method="linear", limit=2, limit_direction="both"
        )

df_diario.head()


Unnamed: 0,fecha_dia,RH_mean,RH_min,RH_max,RH_std,TMP_mean,TMP_count,WDR_mean,WDR_min,WDR_max,WSP_mean,WSP_min,WSP_max,TMP_bueno,TMP_regular,TMP_mean_interp
98,2013-04-14,16.521739,4.0,30.0,10.02211,21.008696,23,164.73913,1.0,360.0,2.069565,0.6,3.3,True,False,21.008696
99,2013-04-15,21.041667,6.0,39.0,11.086534,20.158333,24,160.375,44.0,223.0,1.866667,0.8,3.3,True,False,20.158333
100,2013-04-16,23.666667,8.0,42.0,11.408108,21.3875,24,165.0,52.0,267.0,1.7125,0.4,3.0,True,False,21.3875
101,2013-04-17,25.75,9.0,49.0,14.317062,21.833333,24,166.791667,6.0,328.0,1.525,0.6,2.7,True,False,21.833333
102,2013-04-18,22.583333,9.0,42.0,11.992449,22.154167,24,155.583333,5.0,259.0,1.55,0.4,2.8,True,False,22.154167


In [11]:
col_drop=["TMP_mean","TMP_count", "TMP_bueno", "TMP_regular"]
df_diario_clean=df_diario.drop(columns=col_drop)

Se busca representar la componente temporal de los datos para que el modelo pueda reconocer patrones estacionales y peri√≥dicos.
Dado que los meses y los d√≠as del mes tienen una naturaleza c√≠clica, se codifican mediante funciones seno y coseno.
Esta codificaci√≥n convierte las fechas en variables continuas que preservan su relaci√≥n circular, facilitando que la red neuronal interprete la secuencia temporal correctamente.
Estas variables no se escalan despu√©s pues perder√≠an su interpretabilidad

In [12]:

df_diario_clean["fecha_dia"] = pd.to_datetime(df_diario_clean["fecha_dia"])


df_diario_clean["mes"] = df_diario_clean["fecha_dia"].dt.month


df_diario_clean["mes_sin"] = np.sin(2 * np.pi * df_diario_clean["mes"] / 12)
df_diario_clean["mes_cos"] = np.cos(2 * np.pi * df_diario_clean["mes"] / 12)

df_diario_clean["dia"] = df_diario_clean["fecha_dia"].dt.day
df_diario_clean["dias_en_mes"] = df_diario_clean["fecha_dia"].dt.days_in_month
df_diario_clean["dia_rel"] = df_diario_clean["dia"] / df_diario_clean["dias_en_mes"]

df_diario_clean["dia_sin"] = np.sin(2 * np.pi * df_diario_clean["dia_rel"])
df_diario_clean["dia_cos"] = np.cos(2 * np.pi * df_diario_clean["dia_rel"])

df_diario_clean.head()


Unnamed: 0,fecha_dia,RH_mean,RH_min,RH_max,RH_std,WDR_mean,WDR_min,WDR_max,WSP_mean,WSP_min,WSP_max,TMP_mean_interp,mes,mes_sin,mes_cos,dia,dias_en_mes,dia_rel,dia_sin,dia_cos
98,2013-04-14,16.521739,4.0,30.0,10.02211,164.73913,1.0,360.0,2.069565,0.6,3.3,21.008696,4,0.866025,-0.5,14,30,0.466667,0.2079117,-0.978148
99,2013-04-15,21.041667,6.0,39.0,11.086534,160.375,44.0,223.0,1.866667,0.8,3.3,20.158333,4,0.866025,-0.5,15,30,0.5,1.224647e-16,-1.0
100,2013-04-16,23.666667,8.0,42.0,11.408108,165.0,52.0,267.0,1.7125,0.4,3.0,21.3875,4,0.866025,-0.5,16,30,0.533333,-0.2079117,-0.978148
101,2013-04-17,25.75,9.0,49.0,14.317062,166.791667,6.0,328.0,1.525,0.6,2.7,21.833333,4,0.866025,-0.5,17,30,0.566667,-0.4067366,-0.913545
102,2013-04-18,22.583333,9.0,42.0,11.992449,155.583333,5.0,259.0,1.55,0.4,2.8,22.154167,4,0.866025,-0.5,18,30,0.6,-0.5877853,-0.809017


In [13]:
df_dias_presentes = (
    df_diario_clean[["dia", "dia_sin", "dia_cos"]]
    .drop_duplicates()
    .sort_values("dia")
)

fig = go.Figure()

# Puntos de los d√≠as
fig.add_trace(go.Scatter(
    x=df_dias_presentes["dia_cos"],
    y=df_dias_presentes["dia_sin"],
    mode="markers+text",
    text=df_dias_presentes["dia"].astype(str),
    textposition="top center",
    marker=dict(size=8, color="deepskyblue", line=dict(width=1, color="white")),
    hovertemplate="D√≠a: %{text}<br>cos: %{x:.2f}<br>sin: %{y:.2f}<extra></extra>",
    name="D√≠as"
))

# C√≠rculo de cierre
fig.add_trace(go.Scatter(
    x=df_dias_presentes["dia_cos"].tolist() + [df_dias_presentes["dia_cos"].iloc[0]],
    y=df_dias_presentes["dia_sin"].tolist() + [df_dias_presentes["dia_sin"].iloc[0]],
    mode="lines",
    line=dict(color="gray", dash="dash"),
    name="C√≠rculo"
))

# --- Configuraci√≥n visual ---
fig.update_layout(
    title="üåÄ Codificaci√≥n c√≠clica de d√≠as del mes (d√≠a_sin / d√≠a_cos)",
    xaxis_title="dia_cos",
    yaxis_title="dia_sin",
    template="plotly_dark",
    width=1200,
    height=1500,
    xaxis=dict(scaleanchor="y", scaleratio=1),
    yaxis=dict(scaleanchor="x", scaleratio=1),
    showlegend=False
)


fig.show()

In [14]:
df_meses_presentes = (
    df_diario_clean[["mes", "mes_sin", "mes_cos"]]
    .drop_duplicates()
    .sort_values("mes")
)

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df_meses_presentes["mes_cos"],
    y=df_meses_presentes["mes_sin"],
    mode="markers+text",
    text=df_meses_presentes["mes"].astype(int).astype(str),  
    textposition="top center",
    marker=dict(size=12, color="deepskyblue", line=dict(width=1, color="white")),
    hovertemplate="Mes: %{text}<br>cos: %{x:.2f}<br>sin: %{y:.2f}<extra></extra>",
    name="Meses"
))

fig.add_trace(go.Scatter(
    x=df_meses_presentes["mes_cos"].tolist() + [df_meses_presentes["mes_cos"].iloc[0]],
    y=df_meses_presentes["mes_sin"].tolist() + [df_meses_presentes["mes_sin"].iloc[0]],
    mode="lines",
    line=dict(color="gray", dash="dash"),
    name="C√≠rculo"
))

fig.update_layout(
    title="Codificaci√≥n c√≠clica de meses (sin/cos)",
    xaxis_title="mes_cos",
    yaxis_title="mes_sin",
    template="plotly_dark",
    width=600,
    height=600,
    xaxis=dict(scaleanchor="y", scaleratio=1),
    yaxis=dict(scaleanchor="x", scaleratio=1),
    showlegend=False
)

fig.show()
df_diario_clean = df_diario_clean.drop(columns=["mes"])

In [15]:
df_modelo = df_diario_clean.dropna().reset_index(drop=True)
df_modelo1=df_modelo.copy()
df_modelo.drop(columns=["fecha_dia"], inplace=True)

In [16]:
df_modelo.head()

Unnamed: 0,RH_mean,RH_min,RH_max,RH_std,WDR_mean,WDR_min,WDR_max,WSP_mean,WSP_min,WSP_max,TMP_mean_interp,mes_sin,mes_cos,dia,dias_en_mes,dia_rel,dia_sin,dia_cos
0,16.521739,4.0,30.0,10.02211,164.73913,1.0,360.0,2.069565,0.6,3.3,21.008696,0.866025,-0.5,14,30,0.466667,0.2079117,-0.978148
1,21.041667,6.0,39.0,11.086534,160.375,44.0,223.0,1.866667,0.8,3.3,20.158333,0.866025,-0.5,15,30,0.5,1.224647e-16,-1.0
2,23.666667,8.0,42.0,11.408108,165.0,52.0,267.0,1.7125,0.4,3.0,21.3875,0.866025,-0.5,16,30,0.533333,-0.2079117,-0.978148
3,25.75,9.0,49.0,14.317062,166.791667,6.0,328.0,1.525,0.6,2.7,21.833333,0.866025,-0.5,17,30,0.566667,-0.4067366,-0.913545
4,22.583333,9.0,42.0,11.992449,155.583333,5.0,259.0,1.55,0.4,2.8,22.154167,0.866025,-0.5,18,30,0.6,-0.5877853,-0.809017


# Creaci√≥n de variable objetivo

In [17]:
df_modelo["TMP_prev"] = df_modelo["TMP_mean_interp"].shift(-1)

In [18]:
df_modelo.head()

Unnamed: 0,RH_mean,RH_min,RH_max,RH_std,WDR_mean,WDR_min,WDR_max,WSP_mean,WSP_min,WSP_max,TMP_mean_interp,mes_sin,mes_cos,dia,dias_en_mes,dia_rel,dia_sin,dia_cos,TMP_prev
0,16.521739,4.0,30.0,10.02211,164.73913,1.0,360.0,2.069565,0.6,3.3,21.008696,0.866025,-0.5,14,30,0.466667,0.2079117,-0.978148,20.158333
1,21.041667,6.0,39.0,11.086534,160.375,44.0,223.0,1.866667,0.8,3.3,20.158333,0.866025,-0.5,15,30,0.5,1.224647e-16,-1.0,21.3875
2,23.666667,8.0,42.0,11.408108,165.0,52.0,267.0,1.7125,0.4,3.0,21.3875,0.866025,-0.5,16,30,0.533333,-0.2079117,-0.978148,21.833333
3,25.75,9.0,49.0,14.317062,166.791667,6.0,328.0,1.525,0.6,2.7,21.833333,0.866025,-0.5,17,30,0.566667,-0.4067366,-0.913545,22.154167
4,22.583333,9.0,42.0,11.992449,155.583333,5.0,259.0,1.55,0.4,2.8,22.154167,0.866025,-0.5,18,30,0.6,-0.5877853,-0.809017,20.283333


In [19]:
df_modelo.shape

(3125, 19)

# Modelado

In [20]:
train_features = [x for x in df_modelo.columns if x != "TMP_prev"]

In [21]:
X = df_modelo[train_features]
y=df_modelo["TMP_prev"]

In [22]:
cols_ciclicas = ["mes_sin", "mes_cos", "dia_sin", "dia_cos"]
cols_numericas = [c for c in X.columns if c not in cols_ciclicas]

scaler = MinMaxScaler()
X_scaled = X.copy()
X_scaled[cols_numericas] = scaler.fit_transform(X[cols_numericas])

In [23]:

sc_y = MinMaxScaler()
y_scaled = sc_y.fit_transform(y.values.reshape(-1, 1))


### Creaci√≥n de las ventanas de tiempo

In [24]:

def crear_secuencias(X, y, n_steps=7):
    Xs, ys = [], []
    for i in range(n_steps, len(X)):
        Xs.append(X[i-n_steps:i])  # los √∫ltimos 7 d√≠as
        ys.append(y[i])            # d√≠a actual
    return np.array(Xs), np.array(ys)

X_seq, y_seq = crear_secuencias(X_scaled, y_scaled, n_steps=7)


In [25]:
train_size = int(len(X_seq) * 0.8)
X_train, X_test = X_seq[:train_size], X_seq[train_size:]
y_train, y_test = y_seq[:train_size], y_seq[train_size:]


In [26]:
mask_valid = ~np.isnan(y_test).ravel()
X_test = X_test[mask_valid]
y_test = y_test[mask_valid]


In [27]:
print(X_train.shape, y_train.shape)


(2494, 7, 18) (2494, 1)


In [28]:
model = Sequential()

In [29]:
model.add(LSTM(64, input_shape=X_train.shape[1:], activation="tanh"))
model.add(Dropout(0.2))


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [30]:
model.add(Dense(32))
model.add(LeakyReLU(negative_slope=0.1))

In [31]:
model.add(Dense(1))

In [32]:
model.summary()

In [33]:
kmetrics = [
    metrics.RootMeanSquaredError(name='rms'),
    metrics.MeanAbsoluteError(name='mae'),
    metrics.MeanAbsolutePercentageError(name="pema")
]

In [34]:
early_stopping = EarlyStopping(monitor='val_mae', patience=20)

In [35]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath='models2/model_{val_mae:.3f}.keras',  
    monitor='val_mae',            
    mode='min',                   
    save_best_only=True,         
    save_weights_only=False,      
    verbose=1
)

In [36]:
model.compile(loss='mean_squared_error', optimizer='adam', metrics=kmetrics)

In [37]:
history = model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test), callbacks=[checkpoint, early_stopping])

Epoch 1/100
[1m140/156[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m‚îÅ‚îÅ‚îÅ[0m [1m0s[0m 2ms/step - loss: 0.0858 - mae: 0.2036 - pema: 1022152.5625 - rms: 0.2727
Epoch 1: val_mae improved from None to 0.07862, saving model to models2/model_0.079.keras
[1m156/156[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.0331 - mae: 0.1296 - pema: 379493.9375 - rms: 0.1821 - val_loss: 0.0097 - val_mae: 0.0786 - val_pema: 14.3940 - val_rms: 0.0985
Epoch 2/100
[1m142/156[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m‚îÅ‚îÅ[0m [1m0s[0m 2ms/step - loss: 0.0141 - mae: 0.0940 - pema: 33375.3516 - rms: 0.1187
Epoch 2: val_mae improved from 0.07862 to 0.06586, saving model to models2/model_0.066.keras
[1m156/156[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0137 - mae: 0.0931 - pema: 190625.3750 - rms: 0.1170 - val_

In [38]:
results = pd.DataFrame(history.history)

# Resultados

In [39]:

fig = go.Figure()


for col in results.columns:
    fig.add_trace(go.Scatter(
        y=results[col],
        mode="lines",
        name=col,
        line=dict(width=2)
    ))


fig.update_layout(
    title="üìà Resultados del Entrenamiento",
    xaxis_title="√âpocas",
    yaxis_title="Valor",
    template="plotly_dark",  
    hovermode="x unified",
    legend=dict(
        title="M√©tricas",
        bgcolor="rgba(0,0,0,0)",
        bordercolor="gray",
        borderwidth=1
    ),
    plot_bgcolor="rgba(20,20,20,1)",
    paper_bgcolor="rgba(10,10,10,1)",
    font=dict(color="white")
)

fig.show()


In [56]:
val_maes = [float(x.split("_")[1].split(".keras")[0]) for x in os.listdir("./models2/")]
best_val_mae = min(val_maes)
print("Mejor val_mae:", best_val_mae)

Mejor val_mae: 0.062


In [58]:
model = tf.keras.models.load_model(f'./models2/model_{best_val_mae}.keras')

In [59]:
y_pred = model.predict(X_test)
y_pred_inv = sc_y.inverse_transform(y_pred)
y_test_inv = sc_y.inverse_transform(y_test)


[1m20/20[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 6ms/step


In [60]:
df_pred = pd.DataFrame({
    "Real": y_test_inv.ravel(),
    "Predicho": y_pred_inv.ravel()
})
df_pred.head(20)


Unnamed: 0,Real,Predicho
0,16.083333,14.729931
1,13.873913,14.449829
2,14.7,14.411364
3,15.9875,13.928713
4,15.116667,13.613077
5,14.429167,14.25104
6,14.0625,14.520289
7,15.4375,14.083279
8,15.841667,14.376767
9,16.870833,14.908394


In [61]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=y_test_inv.ravel(), mode="lines", name="Real", line=dict(color="orange")))
fig.add_trace(go.Scatter(y=y_pred_inv.ravel(), mode="lines", name="Predicho", line=dict(color="dodgerblue")))

fig.update_layout(
    title="Predicci√≥n de temperatura diaria (LSTM)",
    xaxis_title="D√≠as del conjunto de prueba",
    yaxis_title="Temperatura (¬∞C)",
    hovermode="x unified",
    template="plotly_dark"
)
fig.show()


# Predicci√≥n de los siguientes 31 d√≠as

In [62]:
ultimos_dias = X_scaled[-7:]

In [63]:
ventana = ultimos_dias.values  

predicciones = []

fecha_actual = df_modelo1["fecha_dia"].max()

for i in range(31):
    pred_scaled = model.predict(ventana.reshape(1, ventana.shape[0], ventana.shape[1]), verbose=0)
    pred_real = sc_y.inverse_transform(pred_scaled)[0, 0]
    predicciones.append(pred_real)

    fecha_actual = fecha_actual + pd.Timedelta(days=1)

    mes = fecha_actual.month
    dia = fecha_actual.day
    dias_en_mes = fecha_actual.days_in_month
    dia_rel = dia / dias_en_mes

    mes_sin = np.sin(2 * np.pi * mes / 12)
    mes_cos = np.cos(2 * np.pi * mes / 12)
    dia_sin = np.sin(2 * np.pi * dia_rel)
    dia_cos = np.cos(2 * np.pi * dia_rel)

    nuevo_dia = ventana[-1].copy()


    nuevo_dia[-5] = mes_sin
    nuevo_dia[-4] = mes_cos
    nuevo_dia[-3] = dia_sin
    nuevo_dia[-2] = dia_cos
    nuevo_dia[-1] = pred_scaled[0, 0]  

    ventana = np.vstack([ventana[1:], nuevo_dia])


In [64]:
ultima_fecha = df_modelo1["fecha_dia"].max()
fechas_pred = [ultima_fecha + dt.timedelta(days=i+1) for i in range(31)]

df_pred_futuro = pd.DataFrame({
    "fecha": fechas_pred,
    "TMP_predicho": predicciones
})

df_pred_futuro.head()

Unnamed: 0,fecha,TMP_predicho
0,2023-05-01,19.199482
1,2023-05-02,19.876341
2,2023-05-03,19.869448
3,2023-05-04,19.715916
4,2023-05-05,19.521559


In [65]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df_modelo1["fecha_dia"].tail(100),
    y=df_modelo1["TMP_mean_interp"].tail(100),
    mode="lines",
    name="Hist√≥rico",
    line=dict(color="lightgray")
))
fig.add_trace(go.Scatter(
    x=df_pred_futuro["fecha"],
    y=df_pred_futuro["TMP_predicho"],
    mode="lines+markers",
    name="Predicci√≥n 31 d√≠as",
    line=dict(color="deepskyblue", width=3)
))

fig.update_layout(
    template="plotly_dark",
    title="Predicci√≥n de temperatura diaria (31 d√≠as futuros)",
    xaxis_title="Fecha",
    yaxis_title="Temperatura (¬∞C)"
)
fig.show()

In [66]:
df_mayo = df_modelo1[df_modelo1["fecha_dia"].dt.month == 5]


fig = go.Figure()


for a√±o, df_a√±o in df_mayo.groupby(df_mayo["fecha_dia"].dt.year):
    fig.add_trace(go.Scatter(
        x=df_a√±o["fecha_dia"],
        y=df_a√±o["TMP_mean_interp"],
        mode="lines",
        name=f"Mayo {a√±o}",
        line=dict(width=2)
    ))


if "fecha" in df_pred_futuro.columns:
    df_pred_mayo = df_pred_futuro[df_pred_futuro["fecha"].dt.month == 5]
    if not df_pred_mayo.empty:
        fig.add_trace(go.Scatter(
            x=df_pred_mayo["fecha"],
            y=df_pred_mayo["TMP_predicho"],
            mode="lines+markers",
            name="Predicci√≥n Mayo",
            line=dict(color="deepskyblue", width=3)
        ))


fig.update_layout(
    template="plotly_dark",
    title="Temperatura promedio diaria en Mayo (todos los a√±os)",
    xaxis_title="Fecha",
    yaxis_title="Temperatura (¬∞C)",
    legend_title="A√±o",
    hovermode="x unified",
    width=900,
    height=500
)

fig.show()

In [67]:
df_pred_futuro = df_pred_futuro.rename(columns={'TMP_predicho': 'y_hat'})
df_pred_futuro

Unnamed: 0,fecha,y_hat
0,2023-05-01,19.199482
1,2023-05-02,19.876341
2,2023-05-03,19.869448
3,2023-05-04,19.715916
4,2023-05-05,19.521559
5,2023-05-06,19.333939
6,2023-05-07,19.232456
7,2023-05-08,19.211899
8,2023-05-09,19.334324
9,2023-05-10,19.470263


In [68]:
df_pred_futuro.to_csv('./Equipo_Alfa_Buena_Onda_Maravilla_Dinamita_Escuadron_Lobo_Los_Meros_Tuetanos_Del_Caldo.csv', index=False)