## Librerías

In [1]:
library(data.table)
library(ggplot2)
library(forecast)

“unknown timezone 'zone/tz/2018c.1.0/zoneinfo/America/Mexico_City'”

## Lectura de datos

In [2]:
dt <- fread("variables_final.csv")
dt[, Fecha := as.Date(Fecha, format = "%d/%m/%Y")]

ts_inpc <- ts(dt$INPC, start = 1997, frequency = 24)

## Conjuntos de entrenamiento y prueba

In [3]:
train <- window(ts_inpc, start = 2001, end = c(2014, 24))
test <- window(ts_inpc, start = 2015)

## Modelo

In [4]:
m <- Arima(train, order = c(2, 1, 2), seasonal = c(1, 0, 0), 
           include.mean = FALSE, include.drift = TRUE)

# Series: train 
# ARIMA(2,1,2)(1,0,0)[24] with drift 

# Coefficients:
#          ar1      ar2      ma1     ma2    sar1   drift
#       1.3901  -0.7509  -1.1911  0.6909  0.6928  0.1529
# s.e.  0.0964   0.0968   0.1048  0.1095  0.0393  0.0323

# sigma^2 estimated as 0.02318:  log likelihood=150.38
# AIC=-286.76   AICc=-286.41   BIC=-260.06

## Predicciones

In [5]:
preds <- tail(dt, length(test))[, .(Fecha, INPC)]
preds$pred <- NA
preds$pred[1] <- as.numeric(forecast(m, 1)$mean)
ends <- data.table(y = c(rep(2015, 24), rep(2016, 24), rep(2017, 23)),
                   q = c(1:24, 1:24, 1:23))

for (i in 1:(length(test) - 1)) {
  mod <- Arima(window(ts_inpc, start = 2001, 
                      end = c(ends$y[i], ends$q[i])),
               order = c(2, 1, 2), seasonal = c(1, 0, 0),
               include.mean = FALSE, include.drift = TRUE)
  preds$pred[i + 1] <- as.numeric(forecast(mod, 1)$mean)
}

preds_sp <- preds[Fecha != "2017-01-17"]
test_rmse <- sqrt(mean((preds_sp$INPC - preds_sp$pred)^2))
print(test_rmse)
# 0.2288894

[1] 0.2288894
