In [None]:
# Import Required Libraries
library(dplyr)
library(tsibble)
library(fable)
library(ggplot2)
library(tidyr)
library(purrr)
library(feasts)

# Source Helper Functions
source("baseline_helpers.R")
source("../Data_Inspection/data_cleaning_helpers.R")


In [None]:
# Clean validation data
validation <- clean_validation_data(read.csv("../sales_test_validation_afcs2025.csv")) |>
  as_tsibble(index = day, key = product)


## Load and Clean Train Data

In [None]:
# Load train data
train <- clean_train_data(read.csv("../sales_train_validation_afcs2025.csv"))

data <- train
dates <- read.csv("../calendar_afcs2025.csv") |> mutate(date = as.Date(date, format = "%m/%d/%Y"))
dates <- dates |> rename(day = date)

# extending it by adding calander data
train <- inner_join(
    data,
    dates,
    by = c("day")
)
train <- train |> as_tsibble(index = day, key = product)

prices <- read.csv("../sell_prices_afcs2025.csv") |>
    rename(product = item_id) |>
    select(-store_id)

# extending it by price
train <- inner_join(
    train,
    prices,
    by = c("product", "wm_yr_wk")
)

train <- train |> mutate(log_sales = log(sales + 1))

# take subset of train and validation

In [None]:
train <- train |> filter(
    product == "FOODS_3_001" |
        product == "FOODS_3_002" |
        product == "FOODS_3_003" |
        product == "FOODS_3_004" |
        product == "FOODS_3_005" |
        product == "FOODS_3_006" |
        product == "FOODS_3_007" |
        product == "FOODS_3_008" |
        product == "FOODS_3_009" |
        product == "FOODS_3_010"
)
validation <- validation |> filter(
    product == "FOODS_3_001" |
        product == "FOODS_3_002" |
        product == "FOODS_3_003" |
        product == "FOODS_3_004" |
        product == "FOODS_3_005" |
        product == "FOODS_3_006" |
        product == "FOODS_3_007" |
        product == "FOODS_3_008" |
        product == "FOODS_3_009" |
        product == "FOODS_3_010"
)

## Baseline Prediction & Evaluation

Naive Baseline

In [None]:
path <- "models/baseline/naive.rds"

# load fit or fit it and store the fit
if (file.exists(path)) {
    fit <- readRDS(path)
} else {
    fit <- train |>
    model(
      NAIVE(sales)
    )
    saveRDS(fit, path)
}


naive_forecasts <- fit |>
  forecast(h = 28) |>
  as_tibble() |>
  mutate(sales = exp(.mean) - 1) |>
  select(-.model, -.mean)


aligned_data <- align_predictions(naive_forecasts, validation)
accuracy_naive <- calculate_metrics(aligned_data)

Seasonal Naive Baseline

In [None]:
path <- "models/baseline/snaive.rds"

# load fit or fit it and store the fit
if (file.exists(path)) {
    fit <- readRDS(path)
} else {
    fit <- train |>
    model(
      SNAIVE(sales)
    )
    saveRDS(fit, path)
}

snaive_forecasts <- fit |>
  forecast(h = 28) |>
  as_tibble() |>
  mutate(sales = exp(.mean) - 1) |>
  select(-.model, -.mean)


aligned_data <- align_predictions(snaive_forecasts, validation)
accuracy_snaive <- calculate_metrics(aligned_data)

ARIMA Baseline

In [None]:
path <- "models/baseline/arima.rds"

# load fit or fit it and store the fit
if (file.exists(path)) {
    fit <- readRDS(path)
} else {
    fit <- train |>
    model(
      ARIMA(sales)
    )
    saveRDS(fit, path)
}

arima_forecasts <- fit |>
  forecast(h = 28) |>
  as_tibble() |>
  mutate(sales = exp(.mean) - 1) |>
  select(-.model, -.mean)


aligned_data <- align_predictions(arima_forecasts, validation)
accuracy_arima <- calculate_metrics(aligned_data)

ETS Baseline

In [None]:
path <- "models/baseline/ets.rds"

# load fit or fit it and store the fit
if (file.exists(path)) {
    fit <- readRDS(path)
} else {
    fit <- train |>
    model(
      ETS(sales)
    )
    saveRDS(fit, path)
}

ets_forecasts <- fit |>
  forecast(h = 28) |>
  as_tibble() |>
  mutate(sales = exp(.mean) - 1) |>
  select(-.model, -.mean)


aligned_data <- align_predictions(ets_forecasts, validation)
accuracy_ets <- calculate_metrics(aligned_data)

Evaluate baselines

In [None]:
sums_naive <- naive_forecasts |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales, na.rm = TRUE))

sums_snaive <- snaive_forecasts |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales, na.rm = TRUE))

sums_arima <- arima_forecasts |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales, na.rm = TRUE))

sums_ets <- ets_forecasts |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales, na.rm = TRUE))


sums_val <- validation |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales, na.rm = TRUE))

# accuracy table
baseline_accuracy <- bind_rows(
  naive = accuracy_naive,
  snaive = accuracy_snaive,
  arima = accuracy_arima,
  ets = accuracy_ets,
  .id = "model"
)

print(baseline_accuracy)

# plotting against validation to see what is being captured
(sums_val |> autoplot(.vars = sum)) +
  autolayer(sums_naive, sum, color = "red") +
  autolayer(sums_snaive, sum, color = "purple") +
  autolayer(sums_arima, sum, color = "blue") +
  autolayer(sums_ets, sum, color = "orange") 

# Actual interesting model (basedline)

Dynamic regression on Sell Price as predictor

In [None]:
path <- "models/arima_model.rds"

if (file.exists(path)) {
    arima_fit <- readRDS(path)
} else {
  arima_fit <- train |>
  model(
    ARIMA(sales ~ sell_price)
  )

  saveRDS(arima_fit, path)
}


Naive Price

In [None]:
path <- "models/prices/naive.rds"

# load fit or fit it and store the fit
if (file.exists(path)) {
    price_fit_naive <- readRDS(path)
} else {
    # fitting a naive
    price_fit_naive <- train |>
    model(NAIVE(sell_price))

    saveRDS(price_fit_naive, path)
}

# fitting a naive
price_fit_naive <- train |>
  model(NAIVE(sell_price))

sell_price_future_naive <- price_fit_naive |>
  forecast(h = "28 days") |>
  as_tsibble() |>
  select(product, day, .mean) |>
  rename(sell_price = .mean)

naive_forecasts <- forecast(arima_fit, new_data = sell_price_future_naive) |>
  as_tibble() |>
  mutate(sales = exp(.mean) - 1) |>
  select(day, product, sales)


aligned_data <- align_predictions(naive_forecasts, validation)
accuracy_naive <- calculate_metrics(aligned_data)

Snaive Price

In [None]:
path <- "models/prices/snaive.rds"

# load fit or fit it and store the fit
if (file.exists(path)) {
    price_fit_snaive <- readRDS(path)
} else {
    # fitting a seasonal naive
    price_fit_snaive <- train |>
    model(SNAIVE(sell_price))

    saveRDS(price_fit_snaive, path)
}

sell_price_future_snaive <- price_fit_snaive |>
  forecast(h = "28 days") |>
  as_tsibble() |>
  select(product, day, .mean) |>
  rename(sell_price = .mean)

snaive_forecasts <- forecast(arima_fit, new_data = sell_price_future_snaive) |>
  as_tibble() |>
  mutate(sales = exp(.mean) - 1) |>
  select(day, product, sales)


aligned_data <- align_predictions(snaive_forecasts, validation)
accuracy_snaive <- calculate_metrics(aligned_data)

ARIMA price

In [None]:
path <- "models/prices/arima.rds"

# load fit or fit it and store the fit
if (file.exists(path)) {
    price_fit_arima <- readRDS(path)
} else {
    # fitting an arima, probably need to decide what kind of d is nice
    price_fit_arima <- train |>
    model(ARIMA(sell_price))

    saveRDS(price_fit_arima, path)
}

sell_price_future_arima <- price_fit_arima |>
  forecast(h = "28 days") |>
  as_tsibble() |>
  select(product, day, .mean) |>
  rename(sell_price = .mean)

arima_forecasts <- forecast(arima_fit, new_data = sell_price_future_arima) |>
  as_tibble() |>
  mutate(sales = exp(.mean) - 1) |>
  select(day, product, sales)


aligned_data <- align_predictions(arima_forecasts, validation)
accuracy_arima <- calculate_metrics(aligned_data)

ETS Price

In [None]:
path <- "models/prices/ets.rds"

# load fit or fit it and store the fit
if (file.exists(path)) {
    price_fit_ets <- readRDS(path)
} else {
    # fitting an ets
    price_fit_ets <- train |>
    model(ETS(sell_price))

    saveRDS(price_fit_ets, path)
}

sell_price_future_ets <- price_fit_ets |>
  forecast(h = "28 days") |>
  as_tsibble() |>
  select(product, day, .mean) |>
  rename(sell_price = .mean)

ets_forecasts <- forecast(arima_fit, new_data = sell_price_future_ets) |>
  as_tibble() |>
  mutate(sales = exp(.mean) - 1) |>
  select(day, product, sales)


aligned_data <- align_predictions(ets_forecasts, validation)
accuracy_ets <- calculate_metrics(aligned_data)

Evaluate them

In [None]:
sums_naive <- naive_forecasts |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales, na.rm = TRUE))

sums_snaive <- snaive_forecasts |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales, na.rm = TRUE))

sums_arima <- arima_forecasts |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales, na.rm = TRUE))

sums_ets <- ets_forecasts |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales, na.rm = TRUE))


sums_val <- validation |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales, na.rm = TRUE))

# accuracy table
dynamic_price_accuracy <- bind_rows(
  naive = accuracy_naive,
  snaive = accuracy_snaive,
  arima = accuracy_arima,
  ets = accuracy_ets,
  .id = "model"
)

print(dynamic_price_accuracy)

# plotting against validation to see what is being captured
(sums_val |> autoplot(.vars = sum)) +
  autolayer(sums_naive, sum, color = "red") +
  autolayer(sums_snaive, sum, color = "purple") +
  autolayer(sums_arima, sum, color = "blue") +
  autolayer(sums_ets, sum, color = "orange") 

In [None]:
# joined <- sell_price_future_naive

# arima_fit <- forecast(fit, new_data = joined) |>
#   as_tibble() |>
#   mutate(sales = exp(.mean) - 1) |>
#   select(day, product, sales)


# aligned_data <- align_predictions(arima_fit, validation)
# calculate_metrics(aligned_data)

### Here we check the sum of all sales to see if we in general are capturing this or not

In [None]:
# interesting variables

# price => we can probaby try out
# naive (easy, assumption that prices stay the same)
# ets (makes sense, prices change and most recent observations should weigh more)
# Arima (just to see what pops out)
# snaive?

# events
# probaby don't need to predict it,
# these we can probably look up from the years before

# here we might want to either focus on the category itself or on the event

# snap_TX might be interesting? can probably look it up

# weekday   wday month could be interesting, these we also don't need to predict,
# maybe include feature "week", so what week it is


In [None]:
# comparing all accuracies
print(baseline_accuracy)
print(dynamic_price_accuracy)

In [None]:
print(n = 28, sell_price_future_ets |> summarise(last_day = max(day)))

In [None]:
validation

In [None]:
ets_forecasts