In [1]:
import warnings
import pandas as pd
import numpy as np

from tqdm.autonotebook import tqdm
from prophet import Prophet
from utils import suppress_stdout_stderr

In [2]:
warnings.simplefilter(action='ignore', category=FutureWarning)

DATA_PATH = 'data/prepared'
SUBMISSION_PATH = 'data/submission'

In [3]:
# Import pickled data
train_df = pd.read_pickle(f'{DATA_PATH}/train_df.pkl')
test_df = pd.read_pickle(f'{DATA_PATH}/test_df.pkl')
holidays_df = pd.read_pickle(f'{DATA_PATH}/holidays_df.pkl')

print(train_df.shape)

(1017209, 32)


In [4]:
# Dates for prophet
train_df['Date'] = pd.to_datetime(train_df['Date'])
train_df['Year'] = train_df.Date.dt.year
train_df['Month'] = train_df.Date.dt.month
train_df['Day'] = train_df.Date.dt.day

In [5]:
# Resample time series and fill missing gaps in time series using forward fill
train_df = train_df.set_index('Date')
train_df = train_df.groupby('Store').resample('D').ffill()
train_df = train_df.drop(columns=['Store'])
train_df = train_df.reset_index()

In [6]:
def prophet_predict(store_id, train, test, 
                    holidays: pd.DataFrame = None, 
                    xreg: list = None, 
                    periods: int = 48):
    """
    Fit a prophet model and make predictions for a single store
    """
    # Select a single store per model
    train = train[train['Store'] == store_id]
    train = train.rename(columns={'Date': 'ds', 
                                  'Sales': 'y'})
    train = train.sort_values('ds')

    model = Prophet(holidays=holidays)

    # External regressors
    if xreg is not None:
        for x in xreg:
            model.add_regressor(x)

    # Fit the model
    with suppress_stdout_stderr():
        model.fit(train)

    # Make a future dataframe with external predictions
    future = model.make_future_dataframe(periods=periods,
                                         freq='D',
                                         include_history=False)
    if xreg is not None:
        for x in xreg:
            future[x] = test[x]

    # ...and predict on it
    forecast = model.predict(future)
    forecast = forecast[['ds','yhat']].rename(columns={'ds':'Date', 
                                                       'yhat':'Sales'})
    forecast['Store'] = store_id

    return forecast

In [7]:
# Select external regressors for prophet
include_list = ['Open', 'Promo', 'CompetitionDistance', 
                'CompetitionOpenSinceMonth', 'CompetitionOpenSinceYear', 
                'Promo2', 'Promo2SinceWeek',
                'Promo2SinceYear', 'CompetitionOpen', 'PromoOpen', 'IsPromoMonth']
xreg_list = [x for x in train_df.columns.values.tolist() if x in include_list]
xreg_list = None

In [8]:
# Train a univariate prophet model for each store and make predictions
stores_test = test_df['Store'].unique()

submission_df = []
for store in tqdm(stores_test):
    pred = prophet_predict(store_id=store, 
                           train=train_df, 
                           test=test_df, 
                           holidays=holidays_df, 
                           xreg=xreg_list)
    submission_df.append(pred)

submission_df = pd.concat(submission_df)

  0%|          | 0/856 [00:00<?, ?it/s]INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
  0%|          | 1/856 [00:03<51:57,  3.65s/it]INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
  0%|          | 2/856 [00:05<35:27,  2.49s/it]INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
  0%|          | 3/856 [00:06<29:59,  2.11s/it]INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
  0%|          | 4/856 [00:08<27:35,  1.94s/it]INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
  1%|          | 5/856 [00:10<27:54,  1.97s/it]INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
  1%|          | 6/856 [00:12<27:17,  1.93s/it]INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=Tr

In [9]:
# Prepare for submission
submission_df = pd.merge(submission_df, test_df, on=['Store', 'Date'])
submission_df = submission_df[['Id', 'Sales']]

# Quick check
assert submission_df.shape[0] == test_df.shape[0]

submission_df.to_csv(f'{SUBMISSION_PATH}/submission_prophet.csv', index=False)