/
07-arima.R
96 lines (76 loc) · 2.04 KB
/
07-arima.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
library(fable)
library(feasts)
library(timetk)
library(modeltime)
library(tidymodels)
library(tsibble)
library(tidyverse)
anac <- readr::read_rds("https://github.com/curso-r/main-series/blob/main/dados/anac-sp.rds?raw=true") %>%
mutate(DATA_ym = tsibble::yearmonth(paste(ANO, MES, sep = "-"))) %>%
mutate(
TEMPO_DESDE_INICIO = difftime(
DATA,
lubridate::ymd("1999-12-01"),
units = "days"
)/30,
LAG_1 = lag(PASSAGEIROS_PAGOS, 1, default = 0),
CARGA_LAG = lag(CARGA_PAGA_KG),
dif_CARGA_LAG = CARGA_LAG - lag(CARGA_LAG, 2)
) |>
filter(DATA <= as.Date("2018-12-31"))
anac_ts <- anac |>
as_tsibble(index = DATA_ym)
anac_ts |>
gg_tsdisplay(PASSAGEIROS_PAGOS, plot_type = "partial",
lag_max = 60)
anac_ts |>
gg_tsdisplay(
difference(PASSAGEIROS_PAGOS, 1),
plot_type = "partial",
lag_max = 60
)
anac_ts |>
gg_tsdisplay(
difference(difference(PASSAGEIROS_PAGOS, 1), 12),
plot_type = "partial",
lag_max = 60
)
split <- time_series_split(
anac,
DATA,
initial = "17 years",
assess = "2 year"
)
plot_time_series_cv_plan(
tk_time_series_cv_plan(split),
DATA, PASSAGEIROS_PAGOS
)
regressao_spec <- parsnip::linear_reg() |>
set_engine("lm")
arima_spec <- modeltime::arima_reg() |>
set_engine("auto_arima")
ets_spec <- modeltime::seasonal_reg()
regressao <- regressao_spec |>
fit(PASSAGEIROS_PAGOS ~ CARGA_LAG + TEMPO_DESDE_INICIO + as.factor(MES), training(split))
arima <- arima_spec |>
fit(PASSAGEIROS_PAGOS ~ DATA + CARGA_LAG, training(split))
arima_2 <- arima_spec |>
fit(PASSAGEIROS_PAGOS ~ DATA + dif_CARGA_LAG, training(split))
suavizacao <- ets_spec |>
fit(PASSAGEIROS_PAGOS ~ DATA, training(split))
modelo_tbl <- modeltime_table(
#regressao,
arima
#arima_2,
#suavizacao
)
calibration_tbl <- modelo_tbl |>
modeltime_calibrate(new_data = testing(split))
forecasts <- calibration_tbl |>
modeltime_forecast(
new_data = testing(split),
actual_data = anac
)
plot_modeltime_forecast(forecasts)
calibration_tbl |>
modeltime_accuracy()