In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# My modules
from utils import *

# Notebook Settings
sns.set_style('whitegrid')
pd.set_option('display.max_rows', None)

warnings.filterwarnings("ignore") 

In [2]:
data = pd.read_parquet('data/n_forecast_preprocessed.parquet', engine='pyarrow', dtype_backend='numpy_nullable')

In [3]:
data.set_index('sales_date', inplace=True)
data.sort_index(inplace=True)
# convert to float the n_transactions column
data['n_transactions'] = data['n_transactions'].astype(float)

In [4]:
data['store_hashed'].nunique()

595

# Forecasting for all stores

1. First of all we need to make all stores data stationary to be able to use ARIMA model. We will use differencing to make data stationary.
2. Then we will use auto_arima function to find the best parameters for ARIMA model.
3. Finally we will use ARIMA model to forecast number of transactions for each store.



# Check stationarity for each store

In [5]:
stationary_stores, non_stationary_stores = test_stores_stationarity(data, plot=False, results=False)

Stationary stores: 407
Non stationary stores: 188


In [6]:
# stationary_stores will have d=0 for arima model
# stationary_stores_1 will have d=1 for arima model
# stationary_store_2 will have d=2 for arima model
stationary_stores_1, non_stationary_stores_1 = differencing(data, non_stationary_stores)

Stationary stores: 188
Non stationary stores: 0


In [7]:
# Check all stores stationarity after differencing
data = data.dropna()

stationary_stores_2, non_stationary_stores_2 = test_stores_stationarity(data, plot=False, results=False)

Stationary stores: 595
Non stationary stores: 0


# Hyperparameters tuning for each store

In [8]:
# Load again the dataset
data = pd.read_parquet('data/n_forecast_preprocessed.parquet', engine='pyarrow', dtype_backend='numpy_nullable')
data.set_index('sales_date', inplace=True)
data.sort_index(inplace=True)
# convert to float the n_transactions column
data['n_transactions'] = data['n_transactions'].astype(float)

In [9]:
# stationary_stores   (d=0)
# stationary_stores_1 (d=1)

In [10]:
# Create empty dataframe with pdq values
store_params = pd.DataFrame(columns=['store', 'pdq'])

In [11]:
# # Find the best hyperparameters for ARIMA model
for store in stationary_stores:
    print(f"Store: {store}")
                      
    data_store = data[data['store_hashed'] == store]
    
    best_params = arima_hyperparameters(data_store['n_transactions'], diff=0)
    print(best_params)
    store_params = pd.concat([store_params, pd.DataFrame({'store': [store], 'pdq': [best_params]})], ignore_index=True)
    
    # save the dataframe as csv file
    # store_params.to_csv('data/stores_arima_params.csv', index=False)


Store: a58149d355f02887dfbe55ebb2b64ba3
Best ARIMA parameters: (8, 0, 7) with AIC: 19118.62876072526
(8, 0, 7)
Store: bb57db42f77807a9c5823bd8c2d9aaef
Best ARIMA parameters: (9, 0, 8) with AIC: 21033.091028127637
(9, 0, 8)
Store: 28e209b61a52482a0ae1cb9f5959c792
Best ARIMA parameters: (9, 0, 9) with AIC: 23357.84907125705
(9, 0, 9)
Store: 1cecc7a77928ca8133fa24680a88d2f9
Best ARIMA parameters: (9, 0, 9) with AIC: 21293.733368880024
(9, 0, 9)
Store: a19883fca95d0e5ec7ee6c94c6c32028
Best ARIMA parameters: (9, 0, 9) with AIC: 20301.91453524185
(9, 0, 9)
Store: 23ef5cf238a3b88085d95adf94c24a25
Best ARIMA parameters: (8, 0, 9) with AIC: 19099.110533237726
(8, 0, 9)
Store: 5e5dd00d770ef3e9154a4257edcb80b8
Best ARIMA parameters: (9, 0, 8) with AIC: 18110.01323200047
(9, 0, 8)
Store: ae95296e27d7f695f891cd26b4f37078
Best ARIMA parameters: (9, 0, 7) with AIC: 17911.412951516744
(9, 0, 7)
Store: e21bd8ab999859f3642d2227e682e66f
Best ARIMA parameters: (8, 0, 9) with AIC: 16833.398627520415
(8, 0,

KeyboardInterrupt: 

In [None]:
for store in stationary_stores_1:
    print(f"Store: {store}")

    data_store = data[data['store_hashed'] == store]

    best_params = arima_hyperparameters(data_store['n_transactions'], diff=1)
    print(best_params)
    store_params = pd.concat([store_params, pd.DataFrame({'store': [store], 'pdq': [best_params]})], ignore_index=True)

    # save the dataframe as csv file

store_params.to_csv('data/stores_arima_params.csv', index=False)
    