In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import KNNImputer
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.api import VAR
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Cargar archivo CSV
df = pd.read_csv("Indicadores_Muestra.csv")
df['Fecha'] = pd.to_datetime(df['Fecha'])
df.set_index('Fecha', inplace=True)

# -----------------------------
# 3. Imputación de Valores Faltantes
# -----------------------------

# Usamos KNN como aproximación al enfoque de Mariano y Murasawa
imputer = KNNImputer(n_neighbors=5)
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns, index=df.index)


In [None]:

# -----------------------------
# 4. Test de Estacionariedad y Transformación
# -----------------------------
stationary_data = pd.DataFrame(index=df_imputed.index)
adf_results = {}

for col in df_imputed.columns:
    series = df_imputed[col]
    result = adfuller(series.dropna())
    pval = result[1]
    if pval < 0.05:
        stationary_data[col] = series
        adf_results[col] = "Estacionaria"
    else:
        if (series > 0).all():
            stationary_data[col] = np.log(series).diff().dropna()
            adf_results[col] = "No estacionaria - log-diff aplicada"
        else:
            stationary_data[col] = series.diff().dropna()
            adf_results[col] = "No estacionaria - diff aplicada"

stationary_data.dropna(inplace=True)

# -----------------------------
# Visualización y Estadísticos de Series Estacionarias
# -----------------------------

import matplotlib.pyplot as plt

# Gráfico de líneas
plt.figure(figsize=(18, 8))
for col in stationary_data.columns:
    plt.plot(stationary_data.index, stationary_data[col], label=col)
plt.title("Series Estacionarias Transformadas")
plt.xlabel("Fecha")
plt.ylabel("Valor")
plt.legend(loc="upper left", bbox_to_anchor=(1, 1))
plt.grid(True)
plt.tight_layout()
plt.show()

# Estadísticos básicos
stats_summary = stationary_data.describe().T[
    ['mean', '50%', 'std', 'min', 'max']
].rename(columns={'mean': 'Media', '50%': 'Mediana', 'std': 'Desv. típica', 'min': 'Mínimo', 'max': 'Máximo'})

print("\nEstadísticos básicos de las series estacionarias:\n")
print(stats_summary)





In [None]:

# -----------------------------
# 5. Modelado: ARIMA, VAR, SNARIMAX
# -----------------------------

# Dividir entre entrenamiento y prueba
train_size = int(len(stationary_data) * 0.8)
train, test = stationary_data.iloc[:train_size], stationary_data.iloc[train_size:]

# ARIMA sobre PIB
arima_model = ARIMA(train['PIB'], order=(1,0,1))
arima_fit = arima_model.fit()
arima_forecast = arima_fit.forecast(steps=len(test))
arima_mse = mean_squared_error(test['PIB'], arima_forecast)
arima_mae = mean_absolute_error(test['PIB'], arima_forecast)
print("\nARIMA resumen:\n", arima_fit.summary())
print(f"ARIMA MSE: {arima_mse:.4f}, MAE: {arima_mae:.4f}")

# -----------------------------
# VAR con todas las variables (sin Consumer_Confidence)
# -----------------------------
var_columns = [col for col in train.columns if col != 'Consumer_Confidence']
var_model = VAR(train[var_columns])
var_results = var_model.fit(maxlags=5, ic='aic')
var_forecast = var_results.forecast(train[var_columns].values[-var_results.k_ar:], steps=len(test))
var_forecast_df = pd.DataFrame(var_forecast, index=test.index, columns=var_columns)
var_mse = mean_squared_error(test['PIB'], var_forecast_df['PIB'])
var_mae = mean_absolute_error(test['PIB'], var_forecast_df['PIB'])

print("\nVAR resumen:\n", var_results.summary())
print(f"VAR MSE: {var_mse:.4f}, MAE: {var_mae:.4f}")

# -----------------------------
# SNARIMAX sobre PIB con variable exógena: Consumer_Confidence
# -----------------------------
exog_var = 'Consumer_Confidence'
snarimax_model = SARIMAX(
    train['PIB'],
    exog=train[[exog_var]],
    order=(1, 0, 1),
    seasonal_order=(1, 0, 1, 12)
)
snarimax_fit = snarimax_model.fit(disp=False)
snarimax_forecast = snarimax_fit.forecast(steps=len(test), exog=test[[exog_var]])
snarimax_mse = mean_squared_error(test['PIB'], snarimax_forecast)
snarimax_mae = mean_absolute_error(test['PIB'], snarimax_forecast)

print("\nSNARIMAX resumen:\n", snarimax_fit.summary())
print(f"SNARIMAX MSE: {snarimax_mse:.4f}, MAE: {snarimax_mae:.4f}")

In [None]:
# -----------------------------
# 5. Modelado: ARIMA, VAR, SNARIMAX y comparación
# -----------------------------

# Dividir entre entrenamiento y prueba
train_size = int(len(stationary_data) * 0.8)
train, test = stationary_data.iloc[:train_size], stationary_data.iloc[train_size:]

# ARIMA sobre PIB
arima_model = ARIMA(train['PIB'], order=(1,0,1))
arima_fit = arima_model.fit()
arima_forecast = arima_fit.forecast(steps=len(test))
arima_mse = mean_squared_error(test['PIB'], arima_forecast)
arima_mae = mean_absolute_error(test['PIB'], arima_forecast)
print("\nARIMA resumen:\n", arima_fit.summary())
print(f"ARIMA MSE: {arima_mse:.4f}, MAE: {arima_mae:.4f}")

# VAR con todas las variables
var_model = VAR(train)
var_results = var_model.fit(maxlags=5, ic='aic')
var_forecast = var_results.forecast(train.values[-var_results.k_ar:], steps=len(test))
var_forecast_df = pd.DataFrame(var_forecast, index=test.index, columns=train.columns)


In [None]:
# -----------------------------
# 4. Comparación VAR antes y después de 2008
# -----------------------------
cutoff_date = '2008-01-01'
pre_2008 = stationary_data.loc[:cutoff_date]
post_2008 = stationary_data.loc[cutoff_date:]

# Ajustar modelos VAR en ambos periodos
var_pre = VAR(pre_2008).fit(maxlags=5, ic='aic')
var_post = VAR(post_2008).fit(maxlags=5, ic='aic')

print("\nResumen VAR antes de 2008:")
print(var_pre.summary())
print("\nResumen VAR después de 2008:")
print(var_post.summary())

# Comparar coeficientes del PIB
coef_pre = var_pre.params.filter(like='PIB')
coef_post = var_post.params.filter(like='PIB')

print("\nCoeficientes relacionados con PIB - Antes de 2008:")
print(coef_pre)
print("\nCoeficientes relacionados con PIB - Después de 2008:")
print(coef_post)

# Visualización
plt.figure(figsize=(12, 6))
plt.plot(coef_pre.iloc[:, 0], label='Pre-2008')
plt.plot(coef_post.iloc[:, 0], label='Post-2008')
plt.title('Comparación de coeficientes del PIB (primeras columnas)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# -----------------------------
# 5. Métricas de ajuste del modelo
# -----------------------------
# Forecast y error para ambos modelos
forecast_pre = var_pre.forecast(pre_2008.values[-var_pre.k_ar:], steps=5)
forecast_post = var_post.forecast(post_2008.values[-var_post.k_ar:], steps=5)

true_pre = pre_2008.iloc[-5:]['PIB']
true_post = post_2008.iloc[-5:]['PIB']

pred_pre = pd.DataFrame(forecast_pre, columns=pre_2008.columns).iloc[:, 0]
pred_post = pd.DataFrame(forecast_post, columns=post_2008.columns).iloc[:, 0]

mse_pre = mean_squared_error(true_pre, pred_pre)
mse_post = mean_squared_error(true_post, pred_post)
mae_pre = mean_absolute_error(true_pre, pred_pre)
mae_post = mean_absolute_error(true_post, pred_post)

print(f"\nErrores en predicción VAR (últimos 5 puntos):")
print(f"Pre-2008 → MSE: {mse_pre:.4f}, MAE: {mae_pre:.4f}")
print(f"Post-2008 → MSE: {mse_post:.4f}, MAE: {mae_post:.4f}")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_squared_error, mean_absolute_error

cutoff = '2008-01-01'
pre_data = stationary_data.loc[:cutoff]
post_data = stationary_data.loc[cutoff:]

# Entrenamiento VAR
model_pre = VAR(pre_data).fit(maxlags=5, ic='aic')
model_post = VAR(post_data).fit(maxlags=5, ic='aic')

# Forecast comparativo
forecast_pre = model_pre.forecast(pre_data.values[-model_pre.k_ar:], steps=5)
forecast_post = model_post.forecast(post_data.values[-model_post.k_ar:], steps=5)

true_pre = pre_data.iloc[-5:]['PIB']
true_post = post_data.iloc[-5:]['PIB']

pred_pre = pd.DataFrame(forecast_pre, columns=pre_data.columns).iloc[:, 0]
pred_post = pd.DataFrame(forecast_post, columns=post_data.columns).iloc[:, 0]

print("Errores VAR Pre-2008:", mean_squared_error(true_pre, pred_pre), mean_absolute_error(true_pre, pred_pre))
print("Errores VAR Post-2008:", mean_squared_error(true_post, pred_post), mean_absolute_error(true_post, pred_post))

# Comparación visual de coeficientes
coef_pre = model_pre.params.filter(like='PIB')
coef_post = model_post.params.filter(like='PIB')

plt.figure(figsize=(14, 6))
plt.plot(coef_pre.index, coef_pre.iloc[:, 0], marker='o', label='Pre-2008')
plt.plot(coef_post.index, coef_post.iloc[:, 0], marker='x', label='Post-2008')
plt.xticks(rotation=90)
plt.title("Comparación de coeficientes del PIB (VAR)")
plt.legend()
plt.tight_layout()
plt.grid()
plt.savefig("comparacion_coeficientes_pib_VAR.png", dpi=300)
plt.show()
