In [None]:
# Import Required Libraries
library(dplyr)
library(tsibble)
library(fable)
library(ggplot2)
library(tidyr)
library(purrr)

In [None]:
# Source Helper Functions
source("baseline_helpers.R")
source("../Data_Inspection/data_cleaning_helpers.R")

In [None]:
# Clean validation data
validation <- clean_validation_data(read.csv("../sales_test_validation_afcs2025.csv")) |>
  as_tsibble(index = day, key = product)
validation

## Load and Clean Train Data

In [None]:
# Load train data
train <- read.csv("../sales_train_validation_afcs2025.csv")

# Clean train data
cleaned_train <- clean_train_data(train)

# Display cleaned data
head(cleaned_train)

## Baseline Prediction

In [None]:
# Fit a seasonal naive model for each time series
baseline_model <- cleaned_train %>%
  pivot_longer(cols = -day, names_to = "product", values_to = "sales") %>%
  group_by(product) %>%
  model(SNAIVE(sales))

# Generate forecasts for the next 28 days
forecasts <- baseline_model %>%
  forecast(h = "28 days") %>%
  as_tibble() %>%
  mutate(sales = map_dbl(.mean, ~ .))

# Format predictions for submission
formatted_predictions <- forecasts %>%
  select(day, product, sales) %>%
  pivot_wider(names_from = product, values_from = sales) %>%

  format_predictions(start_date = "2016-04-25")

# Display formatted predictions
head(formatted_predictions)


## Evaluate Predictions

In [None]:
# Align predictions and validation data using helper function
aligned_data <- align_predictions(formatted_predictions, validation)

# Evaluate predictions using helper function
metrics <- calculate_metrics(aligned_data)

# Display evaluation metrics
print(metrics)

New shit

In [None]:
data <- cleaned_train %>%
  pivot_longer(cols = -day, names_to = "product", values_to = "sales") %>%
  group_by(product)
dates <- read.csv("../calendar_afcs2025.csv") |> mutate(date = as.Date(date, format = "%m/%d/%Y"))
dates <- dates |> rename(day = date)

#extending it by adding calander data
train <- inner_join(
 data,
 dates,
 by = c("day")
)
train <- train |> as_tsibble(index = day, key = product) 
train

In [None]:

prices <- read.csv("../sell_prices_afcs2025.csv") |> rename(product = item_id) |> select(-store_id)


#extending it by price
train <- inner_join(
 train,
 prices,
 by = c("product", "wm_yr_wk")
)
train

In [None]:
fit <- train |>
  model(
    SNAIVE(sales)
  )

forecasts <- fit |>
  forecast(h = 28)

accuracy <- forecasts |> accuracy(validation)
accuracy

In [None]:
fit <- train |>
  model(
    ARIMA(sales)
  )

forecasts <- fit |>
  forecast(h = 28)

accuracy <- forecasts |> accuracy(validation)
accuracy

In [None]:
# interesting variables

# price => we can probaby try out 
# naive (easy, assumption that prices stay the same)
# ets (makes sense, prices change and most recent observations should weigh more)
# Arima (just to see what pops out)
# snaive?

# events
# probaby don't need to predict it,
# these we can probably look up from the years before

# here we might want to either focus on the category itself or on the event

# snap_TX might be interesting? can probably look it up

# weekday   wday month could be interesting, these we also don't need to predict, 
# maybe include feature "week", so what week it is
