In [None]:
# Forecasting Template (Serie de Tiempo)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.tsa.seasonal import STL
from sklearn.metrics import mean_absolute_error, mean_squared_error
from prophet import Prophet
from sklearn.model_selection import TimeSeriesSplit

# -----------------------------
# 1. Carga de datos
# -----------------------------
df = pd.read_csv("ruta_a_tu_dataset.csv", parse_dates=['fecha'], index_col='fecha')
df = df.sort_index()

# -----------------------------
# 2. Limpieza y preprocesamiento
# -----------------------------
# Visualizar valores faltantes
df.isnull().sum()

# Imputación de valores nulos (ejemplo con forward fill)
df.fillna(method='ffill', inplace=True)

# Detección de outliers (z-score, rolling, etc.)
# Puedes agregar aquí detección más robusta según tu dominio

# -----------------------------
# 3. EDA
# -----------------------------
df.plot(figsize=(12, 4), title="Serie de tiempo")
plt.show()

# STL decomposition
stl = STL(df['valor'], period=12)
res = stl.fit()
res.plot()
plt.show()

# ACF y PACF
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plot_acf(df['valor'])
plot_pacf(df['valor'])
plt.show()

# ADF Test
result = adfuller(df['valor'])
print(f"ADF Statistic: {result[0]}")
print(f"p-value: {result[1]}")

# -----------------------------
# 4. Feature Engineering
# -----------------------------
df['mes'] = df.index.month
df['dia_semana'] = df.index.dayofweek
df['lag1'] = df['valor'].shift(1)
df['rolling_mean_3'] = df['valor'].rolling(3).mean()

# -----------------------------
# 5. Transformaciones (si aplica)
# -----------------------------
# df['log_valor'] = np.log(df['valor'])
# df['diferencia'] = df['valor'].diff()

# -----------------------------
# 6. División del dataset
# -----------------------------
split_idx = int(len(df) * 0.8)
df_train = df.iloc[:split_idx]
df_test = df.iloc[split_idx:]

# -----------------------------
# 7. Modelo base con Prophet
# -----------------------------
df_prophet = df_train.reset_index()[['fecha', 'valor']].rename(columns={'fecha': 'ds', 'valor': 'y'})
model = Prophet()
model.fit(df_prophet)

future = model.make_future_dataframe(periods=len(df_test))
forecast = model.predict(future)

# -----------------------------
# 8. Evaluación
# -----------------------------
forecast_df = forecast.set_index('ds')[['yhat']].join(df[['valor']], how='left')
forecast_df.dropna(inplace=True)

mae = mean_absolute_error(forecast_df['valor'], forecast_df['yhat'])
rmse = np.sqrt(mean_squared_error(forecast_df['valor'], forecast_df['yhat']))
print(f"MAE: {mae:.2f}, RMSE: {rmse:.2f}")

# Visualización
plt.figure(figsize=(12, 5))
plt.plot(forecast_df['valor'], label='Real')
plt.plot(forecast_df['yhat'], label='Forecast')
plt.legend()
plt.title('Forecast vs Real')
plt.show()

# -----------------------------
# 9. Siguiente paso
# -----------------------------
# Puedes ahora probar otros modelos: ARIMA, XGBoost, LSTM, etc.
# Y realizar tuning con validación cruzada temporal.
