# __Home task: Time series forecasting__

In [181]:
import pandas as pd
import plotly.express as px
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error

Load dataset

In [182]:
stores_df = pd.read_csv("data/train.csv")
stores_df.head()

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0
1,1,2013-01-01,1,BABY CARE,0.0,0
2,2,2013-01-01,1,BEAUTY,0.0,0
3,3,2013-01-01,1,BEVERAGES,0.0,0
4,4,2013-01-01,1,BOOKS,0.0,0


Grouping sales by date

In [183]:
def sum_sales_per_day(df: pd.DataFrame, store_number:int=1) -> pd.DataFrame:
    day_level_df = df[df["store_nbr"]==store_number]\
        [
            ["date", "sales"]
        ]\
            .groupby("date").agg(
                {
                    "sales": "sum"
                }).reset_index()

    return day_level_df

# Group sales for store_number=14
day_level_df = sum_sales_per_day(stores_df, 14)
print(f"Shape: {day_level_df.shape}")
day_level_df.head()

Shape: (1684, 2)


Unnamed: 0,date,sales
0,2013-01-01,0.0
1,2013-01-02,7698.253
2,2013-01-03,6158.575996
3,2013-01-04,5777.689
4,2013-01-05,7974.621


Prepare dataset

In [184]:
window = 30

# Prepare data for Prophet
X = day_level_df[['date', 'sales']]
X['date'] = pd.to_datetime(X['date'])
X.rename(columns={"date": "ds", "sales":"y"}, inplace=True)

# Clean 'nan' values
X.fillna(0, inplace=True)

# Split train and test
train, test = X[:-window], X[-window:]

In [185]:
class ProphetForecastModel:
    def __init__(self):
        self.model = Prophet()

    def train(self, train_data):
        self.model.fit(train_data)
    
    def predict(self, periods):
        future_df = self.model.make_future_dataframe(periods=periods)
        forecast = self.model.predict(future_df)
        return forecast
    
    def plot_components(self, forecast):
        fig = plot_components_plotly(self.model, forecast, figsize=(1000, 300))
        fig.show()

    def plot(self, forecast):
        plot = plot_plotly(self.model, forecast, figsize=(1400, 700))
        plot.update_layout(showlegend=True)
        plot.show()
    
    def evaluate(self, test_data):
        forecast = self.predict(len(test_data))

        benchmark_df = test_data.merge(forecast[["ds", "yhat"]], on="ds", how="left")

        fig = px.line(benchmark_df, x='ds', y=["y", "yhat"], markers=True, title="Prophet forecast")
        fig.show()

        mape = mean_absolute_percentage_error(
            benchmark_df["y"],
            benchmark_df["yhat"]
        )

        mae = mean_absolute_error(
            benchmark_df["y"],
            benchmark_df["yhat"]
        )
        
        mse = mean_squared_error(
            benchmark_df["y"],
            benchmark_df["yhat"]
        )
       
        print(f"MAPE: {round(mape, 2)}\n MAE: {round(mae, 2)}\n MSE: {round(mse, 2)}\n")

In [186]:
model = ProphetForecastModel()
model.train(train)

00:31:16 - cmdstanpy - INFO - Chain [1] start processing
00:31:16 - cmdstanpy - INFO - Chain [1] done processing


## __Forecast for 30 days ahead__

In [187]:
forecast = model.predict(30)

In [188]:
model.plot_components(forecast)

In [189]:
model.plot(forecast)

In [190]:
model.evaluate(test)

MAPE: 0.18
 MAE: 1587.93
 MSE: 3961482.05



## __Forecast for 180 days ahead__

In [191]:
forecast = model.predict(180)

In [192]:
model.plot_components(forecast)

In [193]:
model.plot(forecast)

## __Forecast for 270 days ahead__

In [194]:
forecast = model.predict(270)

In [195]:
model.plot_components(forecast)

In [196]:
model.plot(forecast)

## __Forecast for 365 days ahead__

In [197]:
forecast = model.predict(365)

In [198]:
model.plot_components(forecast)

In [199]:
model.plot(forecast)