In [None]:
# Import Required Libraries
library(dplyr)
library(tsibble)
library(fable)
library(ggplot2)
library(tidyr)
library(purrr)
library(feasts)

# Source Helper Functions
source("baseline_helpers.R")
source("../Data_Inspection/data_cleaning_helpers.R")


In [None]:
# Clean validation data
validation <- clean_validation_data(read.csv("../sales_test_validation_afcs2025.csv")) |>
  as_tsibble(index = day, key = product)


## Load and Clean Train Data

In [None]:
# Load train data
train <- clean_train_data(read.csv("../sales_train_validation_afcs2025.csv"))

data <- train
dates <- read.csv("../calendar_afcs2025.csv") |> mutate(date = as.Date(date, format = "%m/%d/%Y"))
dates <- dates |> rename(day = date)

# extending it by adding calander data
train <- inner_join(
    data,
    dates,
    by = c("day")
)
train <- train |> as_tsibble(index = day, key = product)

prices <- read.csv("../sell_prices_afcs2025.csv") |>
    rename(product = item_id) |>
    select(-store_id)

# extending it by price
train <- inner_join(
    train,
    prices,
    by = c("product", "wm_yr_wk")
)

train <- train |> mutate(log_sales = log(sales + 1))

train <- train |> filter(
    product == "FOODS_3_001" |
        product == "FOODS_3_002" |
        product == "FOODS_3_003"
)
validation <- validation |> filter(
    product == "FOODS_3_001" |
        product == "FOODS_3_002" |
        product == "FOODS_3_003"
)


## Baseline Prediction & Evaluation

In [None]:
fit <- train |>
  model(
    SNAIVE(sales)
  )

snaive_forecasts <- fit |>
  forecast(h = 28) |>
  as_tibble() |>
  mutate(sales = .mean) |>
  select(-.model, -.mean)


aligned_data <- align_predictions(snaive_forecasts, validation)
calculate_metrics(aligned_data)


In [None]:
fit <- train |>
  model(
    ARIMA(sales)
  )

arima_forecasts <- fit |>
  forecast(h = 28) |>
  as_tibble() |>
  mutate(sales = .mean) |>
  select(-.model, -.mean)


aligned_data <- align_predictions(arima_forecasts, validation)
calculate_metrics(aligned_data)


# Actual interesting model (basedline)

In [None]:
path <- "models/arima_model.rds"

if (file.exists(path)) {
    fit <- readRDS(path)
} else {}


In [None]:
price_fit <- train |>
  model(NAIVE(sell_price))

report(price_fit |> filter(product == "FOODS_3_003"))

sell_price_future <- price_fit |>
  forecast(h = "28 days") |>
  as_tsibble() |>
  select(product, day, .mean) |>
  rename(sell_price = .mean)

joined <- sell_price_future

model1_predictions <- forecast(fit, new_data = joined) |>
  as_tibble() |>
  mutate(sales = exp(.mean) - 1) |>
  select(day, product, sales)


aligned_data <- align_predictions(model1_predictions, validation)
calculate_metrics(aligned_data)


### Here we check the sum of all sales to see if we in general are capturing this or not

In [None]:
sums_arima <- arima_forecasts |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales))

sums_snaive <- snaive_forecasts |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales))

sums_val <- small_validation |>
  as_tsibble(index = day, key = product) |>
  index_by(day) |>
  summarise(sum = sum(sales))

(sums_arima |> autoplot(.vars = sum)) +
  autolayer(sums_val, sum, color = "red") +
  autolayer(sums_snaive, sum, color = "green")


In [None]:
# interesting variables

# price => we can probaby try out
# naive (easy, assumption that prices stay the same)
# ets (makes sense, prices change and most recent observations should weigh more)
# Arima (just to see what pops out)
# snaive?

# events
# probaby don't need to predict it,
# these we can probably look up from the years before

# here we might want to either focus on the category itself or on the event

# snap_TX might be interesting? can probably look it up

# weekday   wday month could be interesting, these we also don't need to predict,
# maybe include feature "week", so what week it is
