# Predicting the Offline Shop Customer Frequentation

In [None]:
import os
import sys
import numpy as np
import pandas as pd
from dynaconf import settings
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_percentage_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [None]:
# add the path of the results folder so we can access it without specifying the relative path
path_results_folder = os.path.join(settings.APP_PATH, "data2day_2022/results")

sys.path.append(path_results_folder)

## Getting the data

The data is stored under the `dataset/` folder. You can edit the `customer_frequentation.csv` respecting the pre-existing headers, or defining a new one but editing the code below.

In [None]:
df_freq = pd.read_csv("../datasets/customer_frequentation.csv", encoding="UTF-8")

In [None]:
df_freq.head()

## Data Analyses

In [None]:
fig = plt.figure(figsize=(20, 10))
plt.plot(df_freq.quantity)
plt.title("Offline shop customer frequentation per days")
plt.xlabel("days")
plt.ylabel("customer quantity")
plt.grid(True)

In [None]:
# to save the graph
# plt.savefig("../results/forecast_customer_frequentation_v1_data_analyse.png", dpi=200)

## Testing 4 different forecasting models

1. Exponential Smothing
2. Hold-Winters with additive seasonality
3. Hold-Winters with multiplicative seasonality
4. Arima

**Note:** all of those models are univariete, i.e. do not take the weather data into consideration. It only uses the previous quantity values.

In [None]:
quantity = df_freq.quantity

In [None]:
model1 = ExponentialSmoothing(quantity, seasonal_periods=7).fit()
model2 = ExponentialSmoothing(quantity, seasonal="add", seasonal_periods=7).fit()
model3 = ExponentialSmoothing(quantity, seasonal="mul", seasonal_periods=7).fit()
model4 = ARIMA(quantity).fit()

## Select the best performing model

Looking at the `MAPE`` value. In our case, this is the Holt-Winter's muliplicative method.

In [None]:
print(
    "MAPE model 1:",
    mean_absolute_percentage_error(quantity, model1.predict(start=0, end=364)),
)
print(
    "MAPE model 2:",
    mean_absolute_percentage_error(quantity, model2.predict(start=0, end=364)),
)
print(
    "MAPE model 3:",
    mean_absolute_percentage_error(quantity, model3.predict(start=0, end=364)),
)
print(
    "MAPE model 4:",
    mean_absolute_percentage_error(quantity, model4.predict(start=0, end=364)),
)

## Forecasting the next values

In [None]:
predictions = model3.predict(start=364, end=380)

In [None]:
fig = plt.figure(figsize=(20, 10))
plt.plot(predictions, color="red", linestyle="--", label="predictions")
plt.plot(quantity, color="black", label="observations")
plt.title("Offline shop customer frequentation per days - Observations vs. Forecast")
plt.legend()
plt.xlabel("days")
plt.ylabel("customer quantity")
plt.grid()