In [None]:
%matplotlib inline 

import os
print(os.listdir("../input/"))

import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns 
from fbprophet import Prophet

from dateutil.relativedelta import relativedelta # working with dates with style
from scipy.optimize import minimize              # for function minimization

import statsmodels.formula.api as smf            # statistics and econometrics
import statsmodels.tsa.api as smt
import statsmodels.api as sm
import scipy.stats as scs

from itertools import product                    # some useful functions
from tqdm import tqdm_notebook

import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

import warnings
warnings.filterwarnings('ignore')

sns.set()

## Read training data

In [None]:
train = pd.read_csv('../input/demand-forecasting-kernels-only/train.csv')
test = pd.read_csv('../input/demand-forecasting-kernels-only/test.csv')
sample_submission = pd.read_csv('../input/demand-forecasting-kernels-only/sample_submission.csv')

train['date'] = pd.to_datetime(train.date).dt.date
test['date'] = pd.to_datetime(test.date).dt.date
train.info()

In [None]:
train.sample(5)

In [None]:
#test.sample(5)

In [None]:
#sample_submission.head()

In [None]:
print('Training date range', train.date.min(), 'to ', train.date.max())
print('Testing date range', test.date.min(), 'to ', test.date.max())

In [None]:
# store ids
train.store.unique()

In [None]:
# item ids
train.item.unique()

In [None]:
holidays = pd.read_csv('../input/usa-public-holidays-list-20132018/usa_holidays_2013_2018.csv')
holidays['Date'] = pd.to_datetime(holidays.Date).dt.date

holidays.columns = ['ds', 'holiday']
holidays['lower_window'] = -1
holidays['upper_window'] = 0

cols = ['holiday', 'ds', 'lower_window', 'upper_window']
holidays = holidays[cols]
holidays.head()

In [None]:
%%time

#results_all = pd.DataFrame()
results_all = []

for store in train.store.unique():  #train.store.unique()
    for item in train.item.unique(): #train.item.unique()
        #store = 1
        #item = 1
        ts = train[(train.store == store) & (train.item == item)]
        test_sample = test[(test.store == store) & (test.item == item)]

        cols = ['date', 'sales']
        ts = ts[cols]
        ts = ts.sort_values('date', ascending=True)

        ts.columns = ['ds', 'y'] # for prophet
        ts['y'] = np.log1p(ts.y)
        
        m = Prophet(interval_width=0.8, daily_seasonality=True, yearly_seasonality=True, weekly_seasonality=True) #, daily_seasonality=True
        #m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
        #m.add_seasonality(name='quarterly', period=91.25, fourier_order=8)
        m.fit(ts)
        
        future = m.make_future_dataframe(periods=90)
        forecast = m.predict(future)
        
        #m.plot(forecast)
        
        result = forecast[['ds', 'yhat']].copy()
        
        result.columns = ['date', 'sales']
        result['store'] = store
        result['item'] = item
        result['date'] = result['date'].dt.date.apply(str)
        
        test_sample['date'] = test_sample.date.apply(str)
        
        merged_test = pd.merge(test_sample, result,  how='left', left_on=['date','store', 'item'], right_on = ['date','store', 'item'])
        #merged_test['sales'] = np.ceil(merged_test.sales).apply(int) #floor, round
        
        results_all.append(merged_test)

results_all = pd.concat(results_all, axis=0)
results_all['sales'] = np.expm1(results_all.sales)

In [None]:
results_all.tail()

In [None]:
results_all_floor = results_all.copy()
results_all_floor['sales'] = np.floor(results_all_floor.sales)
my_submission = pd.DataFrame({'id': results_all_floor.id, 'sales': results_all_floor.sales})
my_submission.to_csv('prophet_vanilla_floor.csv', index=False)

results_all_ceil = results_all.copy()
results_all_ceil['sales'] = np.ceil(results_all_ceil.sales)
my_submission = pd.DataFrame({'id': results_all_ceil.id, 'sales': results_all_ceil.sales})
my_submission.to_csv('prophet_vanilla_ceil.csv', index=False)

results_all_round = results_all.copy()
results_all_round['sales'] = np.round(results_all_round.sales)
my_submission = pd.DataFrame({'id': results_all_round.id, 'sales': results_all_round.sales})
my_submission.to_csv('prophet_vanilla_round.csv', index=False)