# __Home task: Time series forecasting__

In [303]:
import pandas as pd
import plotly.express as px
from prophet import Prophet
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error

Load dataset

In [304]:
stores_df = pd.read_csv("data/train.csv")
stores_df.head()

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0
1,1,2013-01-01,1,BABY CARE,0.0,0
2,2,2013-01-01,1,BEAUTY,0.0,0
3,3,2013-01-01,1,BEVERAGES,0.0,0
4,4,2013-01-01,1,BOOKS,0.0,0


Grouping sales by date

In [306]:
def sum_sales_per_day(df: pd.DataFrame, store_number:int=1) -> pd.DataFrame:
    day_level_df = df[df["store_nbr"]==store_number]\
        [
            ["date", "sales"]
        ]\
            .groupby("date").agg(
                {
                    "sales": "sum"
                }).reset_index()

    return day_level_df

# Group sales for store_number=14
day_level_df = sum_sales_per_day(stores_df, 14)
print(f"Shape: {day_level_df.shape}")
day_level_df.head()

Shape: (1684, 2)


Unnamed: 0,date,sales
0,2013-01-01,0.0
1,2013-01-02,7698.253
2,2013-01-03,6158.575996
3,2013-01-04,5777.689
4,2013-01-05,7974.621


Prepare dataset

In [307]:
window = 365

# Prepare data for Prophet
X = day_level_df[['date', 'sales']]
X['date'] = pd.to_datetime(X['date'])
X.rename(columns={"date": "ds", "sales":"y"}, inplace=True)

# Filling missing values with mean
X["y"].replace(float(0), X["y"].mean(), inplace=True)

# Split train and test
train, test = X[:-window], X[-window:]


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





In [308]:
class ProphetForecastModel:
    def __init__(self):
        self.model = Prophet()

    def train(self, train_data):
        self.model.fit(train_data)
    
    def predict(self, periods):
        future_df = self.model.make_future_dataframe(periods=periods)
        forecast = self.model.predict(future_df)
        return forecast
    
    def evaluate(self, test_data, forecast):
        benchmark_df = test_data.merge(forecast[["ds", "yhat"]], on="ds", how="left")

        fig = px.line(benchmark_df, x='ds', y=["y", "yhat"], markers=True, title="Prophet forecast")
        fig.show()

        mape = mean_absolute_percentage_error(
            test_data["y"],
            forecast["yhat"]
        )

        mae = mean_absolute_error(
            test_data["y"],
            forecast["yhat"]
        )
        
        mse = mean_squared_error(
            test_data["y"],
            forecast["yhat"]
        )
       
        print(f"MAPE: {round(mape, 2)}\n MAE: {round(mae, 2)}\n MSE: {round(mse, 2)}\n")

In [309]:
model = ProphetForecastModel()
model.train(train)

10:34:25 - cmdstanpy - INFO - Chain [1] start processing
10:34:26 - cmdstanpy - INFO - Chain [1] done processing


In [310]:
forecast = model.predict(periods=365)[-365:]

## __Forecast for 30 days ahead__

In [311]:
model.evaluate(test[:30], forecast[:30])

MAPE: 0.13
 MAE: 849.22
 MSE: 1159877.7



## __Forecast for 180 days ahead__

In [313]:
model.evaluate(test[:180], forecast[:180])

MAPE: 0.17
 MAE: 1376.95
 MSE: 3431843.05



## __Forecast for 270 days ahead__

In [315]:
model.evaluate(test[:270], forecast[:270])

MAPE: 0.19
 MAE: 1549.66
 MSE: 4322777.77



## __Forecast for 365 days ahead__

In [316]:
model.evaluate(test[:365], forecast[:365])

MAPE: 0.2
 MAE: 1653.5
 MSE: 4926523.1

