In [1]:
# Import Required Libraries
library(dplyr)
library(tsibble)
library(fable)
library(ggplot2)
library(tidyr)
library(purrr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Registered S3 method overwritten by 'tsibble':
  method               from 
  as_tibble.grouped_df dplyr


Attaching package: ‘tsibble’


The following objects are masked from ‘package:base’:

    intersect, setdiff, union


Loading required package: fabletools



In [2]:
# Source Helper Functions
source("baseline_helpers.R")
source("../Data_Inspection/data_cleaning_helpers.R")


Attaching package: ‘lubridate’


The following object is masked from ‘package:tsibble’:

    interval


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




In [3]:
# Clean validation data
validation <- clean_validation_data(read.csv("../sales_test_validation_afcs2025.csv"))

## Load and Clean Train Data

In [4]:
# Load train data
train <- read.csv("../sales_train_validation_afcs2025.csv")

# Clean train data
cleaned_train <- clean_train_data(train)

# Display cleaned data
head(cleaned_train)

day,FOODS_3_001,FOODS_3_002,FOODS_3_003,FOODS_3_004,FOODS_3_005,FOODS_3_006,FOODS_3_007,FOODS_3_008,FOODS_3_009,⋯,FOODS_3_818,FOODS_3_819,FOODS_3_820,FOODS_3_821,FOODS_3_822,FOODS_3_823,FOODS_3_824,FOODS_3_825,FOODS_3_826,FOODS_3_827
<date>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
2011-01-29,0,0,0,0,0,0,27,0,0,⋯,0,9,3,0,7,0,0,0,0,0
2011-01-30,2,0,0,0,1,0,27,0,0,⋯,0,0,2,0,5,0,1,0,0,0
2011-01-31,1,0,0,0,1,0,8,0,0,⋯,0,3,1,0,5,0,0,0,0,0
2011-02-01,3,0,0,0,0,0,0,0,0,⋯,0,0,3,0,3,0,1,2,0,0
2011-02-02,0,0,0,0,0,0,0,0,0,⋯,0,1,0,0,0,0,0,1,0,0
2011-02-03,0,0,0,0,1,0,11,0,0,⋯,0,6,0,0,3,0,2,1,0,0


## Baseline Prediction

In [5]:
# Fit a seasonal naive model for each time series
baseline_model <- cleaned_train %>%
  pivot_longer(cols = -day, names_to = "product", values_to = "sales") %>%
  group_by(product) %>%
  model(SNAIVE(sales))

# Generate forecasts for the next 28 days
forecasts <- baseline_model %>%
  forecast(h = "28 days") %>%
  as_tibble() %>%
  mutate(sales = map_dbl(.mean, ~ .))

# Format predictions for submission
formatted_predictions <- forecasts %>%
  select(day, product, sales) %>%
  pivot_wider(names_from = product, values_from = sales) %>%

  format_predictions(start_date = "2016-04-25")

# Display formatted predictions
head(formatted_predictions)


day,id,sales
<date>,<chr>,<dbl>
2016-04-25,FOODS_3_001,0
2016-04-25,FOODS_3_002,0
2016-04-25,FOODS_3_003,0
2016-04-25,FOODS_3_004,0
2016-04-25,FOODS_3_005,1
2016-04-25,FOODS_3_006,1


## Evaluate Predictions

In [6]:
# Align predictions and validation data using helper function
aligned_data <- align_predictions(formatted_predictions, validation)

# Evaluate predictions using helper function
metrics <- calculate_metrics(aligned_data)

# Display evaluation metrics
print(metrics)

[90m# A tibble: 1 × 2[39m
   RMSE   MAE
  [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m
[90m1[39m  3.68  1.70
