In [None]:
import openpyxl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import holidays
from prophet import Prophet
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import ParameterGrid
from prophet.plot import plot_plotly, plot_components_plotly
import plotly.graph_objects as go

In [None]:
df = pd.read_excel(f"Groceries_Sales.xlsx",index_col=0)
fig, ax = plt.subplots(figsize=(20,7))
a = sns.lineplot(x="Date", y="Sales", data=df)
a.set_title("Daily Sales Data",fontsize=15)
plt.show()

In [None]:
def visualize_data(df):
    result_multiplicative = seasonal_decompose(df, model='multiplicative', period=1)
    result_multiplicative.plot()
    plt.show()

    result_additive = seasonal_decompose(df, model='additive', period=1)
    result_additive.plot()
    plt.show()
    
visualize_data(df)

In [None]:
df.index = pd.to_datetime(df.index)
def setting_up_data(df):
    sales = df["Sales"]
    sales.name = "Sales"
    seasonal_data = sales.to_frame()
    seasonal_data["trend"] = seasonal_data["Sales"].rolling(window=7, center=True).mean()
    seasonal_data["detrended"] = seasonal_data["Sales"] - seasonal_data["trend"]
    seasonal_data.index = pd.to_datetime(seasonal_data.index)
    seasonal_data["month"] = seasonal_data.index.month
    seasonal_data["seasonality"] = seasonal_data.groupby("month")["detrended"].transform("mean")
    seasonal_data["resid"] = seasonal_data["detrended"] - seasonal_data["seasonality"]
    seasonal_data.loc[:, ["Sales", "trend", "seasonality", "resid"]].plot(subplots=True, title="Seasonal decomposition - additive")
    return seasonal_data

seasonal_data = setting_up_data(df)

In [7]:
df=df.reset_index("Date")
def extract_data(df):
    df_extract = df.copy()
    df_extract['date'] = df['Date']
    df_extract['month'] = df_extract['date'].dt.strftime('%B')
    df_extract['year'] = df_extract['date'].dt.strftime('%Y')
    df_extract['dayofweek'] = df_extract['date'].dt.strftime('%A')
    df_extract['quarter'] = df_extract['date'].dt.quarter
    df_extract['dayofyear'] = df_extract['date'].dt.dayofyear
    df_extract['dayofmonth'] = df_extract['date'].dt.day
    df_extract['weekofyear'] = df_extract['date'].dt.isocalendar().week
    X = df_extract[['dayofweek','quarter','month','year',
           'dayofyear','dayofmonth','weekofyear']]
    y = df['Sales']
    df_new = pd.concat([X, y], axis=1)
    return df_new

df_new = extract_data(df)

In [None]:
def bar_plot_data(df_new):
    fig,(ax1,ax2)= plt.subplots(nrows=2)
    fig.set_size_inches(7,7)

    week_day_Aggregated = pd.DataFrame(df_new.groupby("dayofweek")["Sales"].sum()).reset_index().sort_values('Sales')
    sns.barplot(data=week_day_Aggregated,x="dayofweek",y="Sales",hue = 'dayofweek',ax=ax1,dodge=False)
    ax1.set(xlabel='dayofweek', ylabel='Total Sales received')
    ax1.xaxis.label.set_size(8)
    ax1.set_title("Total Sales received By Weekday",fontsize=8)
    ax1.ticklabel_format(style='plain',axis='y')
    if ax1.get_legend() is not None:
        ax1.legend_.remove()


    yearAggregated = pd.DataFrame(df_new.groupby("year")["Sales"].sum()).reset_index()
    sns.barplot(data=yearAggregated,x="year",y="Sales",hue='year',ax=ax2)
    ax2.set(xlabel='year', ylabel='Total Sales received')
    ax2.xaxis.label.set_size(8)
    ax2.set_title("Total Sales received By year",fontsize=8)
    ax2.ticklabel_format(style='plain',axis='y')

    fig.tight_layout()

bar_plot_data(df_new)

In [9]:
df=df.rename(columns={'Date':'ds','Sales':'y'})
end_date = '2019-12-31'
df_train = df.loc[df['ds'] <= end_date]
df_test = df.loc[df['ds'] > end_date]

In [None]:
pd.plotting.register_matplotlib_converters()
f, ax = plt.subplots(figsize=(14,5))
df_train.plot(kind='line', x='ds', y='y', color='blue', label='Train', ax=ax)
df_test.plot(kind='line', x='ds', y='y', color='red', label='Test', ax=ax)
plt.title('Sales Amount Traning and Test data')
plt.show()

In [None]:
def model_Prophet(df_train, df_test):
    model =Prophet()
    model.fit(df_train)

    future = model.make_future_dataframe(periods=60)
    forecast = model.predict(future)

    df_test_forecast = model.predict(df_test)
    mape = mean_absolute_percentage_error(df_test['y'],df_test_forecast['yhat'])
    print(mape)
    return model, forecast, df_test_forecast

model, forecast, df_test_forecast = model_Prophet(df_train, df_test)

In [None]:
def plot_forecast(df_x, df_y, title):
    f, ax = plt.subplots(figsize=(14,5))
    f.set_figheight(5)
    f.set_figwidth(15)
    df_x.plot(kind='line',x='ds', y='y', color='red', label='Test', ax=ax)
    df_y.plot(kind='line',x='ds',y='yhat', color='blue',label='Forecast', ax=ax)
    plt.title(title)
    plt.show()

plot_forecast(df_test, df_test_forecast, 'February 2020 Forecast vs Actuals')

In [None]:
def model_holidays(df_train, df_test):
    india_holidays = holidays.India(years = 2018)
    holiday_india_df = pd.DataFrame([])
    for date, name in sorted(india_holidays.items()):
        holiday_india_df = pd.concat([holiday_india_df, pd.DataFrame({'ds': [date], 'holiday': [name]})], 
                                    ignore_index=True)
    model_with_holidays = Prophet(holidays=holiday_india_df)
    model_with_holidays.fit(df_train)
    future_holiday = model_with_holidays.make_future_dataframe(periods=57, freq='D')
    forecast_holiday = model_with_holidays.predict(future_holiday)
    df_test_forecast_holiday = model_with_holidays.predict(df_test)
    mape_holiday = mean_absolute_percentage_error(df_test['y'],df_test_forecast_holiday['yhat'])
    return holiday_india_df, df_test_forecast_holiday, mape_holiday

holiday_india_df, df_test_forecast_holiday, mape_holiday = model_holidays(df_train, df_test)
plot_forecast(df_test, df_test_forecast_holiday, 'Jan & Feb 2020 Forecast vs Actuals')

In [14]:
params_grid = {'seasonality_mode':('multiplicative','additive'),
               'changepoint_prior_scale':[0.1,0.2,0.3],
              'holidays_prior_scale':[0.1,0.2,0.3],
              'n_changepoints' : [100,150]}
grid = ParameterGrid(params_grid)
cnt = 0
for p in grid:
    cnt = cnt+1

In [None]:
%%timeit
strt='2019-12-31'
end='2020-02-26'
model_parameters = pd.DataFrame(columns = ['MAPE','Parameters'])
for p in grid:
    test = pd.DataFrame()
    random.seed(0)
    train_model =Prophet(changepoint_prior_scale = p['changepoint_prior_scale'],
                         holidays_prior_scale = p['holidays_prior_scale'],
                         n_changepoints = p['n_changepoints'],
                         seasonality_mode = p['seasonality_mode'],
                         weekly_seasonality=True,
                         daily_seasonality = True,
                         yearly_seasonality = True,
                         holidays=holiday_india_df, 
                         interval_width=0.95)
    train_model.add_country_holidays(country_name='US')
    train_model.fit(df_train)
    train_forecast = train_model.make_future_dataframe(periods=57, freq='D',include_history = False)
    train_forecast = train_model.predict(train_forecast)
    test=train_forecast[['ds','yhat']]
    Actual = df[(df['ds']>strt) & (df['ds']<=end)]
    MAPE = mean_absolute_percentage_error(Actual['y'],abs(test['yhat']))
    print('Mean Absolute Percentage Error(MAPE)------------------------------------',MAPE)
    model_parameters = pd.concat([model_parameters, pd.DataFrame({'MAPE':MAPE,'Parameters':p})], 
                                    ignore_index=True)

parameters = model_parameters.sort_values(by=['MAPE'])
parameters = parameters.reset_index(drop=True)

In [None]:
def final_prophet(holiday_india_df, df_train, df_test):
    final_model = Prophet(holidays=holiday_india_df,
                        changepoint_prior_scale= 0.1,
                        holidays_prior_scale = 0.2,
                        n_changepoints = 100,
                        seasonality_mode = 'multiplicative',
                        weekly_seasonality=True,
                        daily_seasonality = True,
                        yearly_seasonality = True,
                        interval_width=0.95)
    final_model.add_country_holidays(country_name='IN')
    final_model.fit(df_train)
    future = final_model.make_future_dataframe(periods=122, freq='D')
    forecast = final_model.predict(future)
    fig =final_model.plot_components(forecast)
    plot_components_plotly(final_model,forecast)
    df_test_final= final_model.predict(df_test)
    mape_final = mean_absolute_percentage_error(df_test['y'],abs(df_test_final['yhat']))
    plot_forecast(df_test, df_test_final, 'Jan & Feb 2020 Forecast vs Actuals')
    df_train['y'].plot(x='ds', legend=True, label='TRAIN')
    df_test['y'].plot(x='ds', legend=True, label='TRAIN')
    df_train.set_index('ds').plot(legend=True,label='TRAIN')
    # df_test['ds'].plot(y='y', legend=True,label='TEST')
    plt.show()
    return final_model, mape_final

final_model, mape_final = final_prophet(holiday_india_df, df_train, df_test)

In [None]:
sns.lineplot(x=df_train['ds'], y=df_train['y'])
sns.lineplot(x=df_test['ds'], y=df_test['y'])
sns.lineplot(x=forecast['ds'], y=forecast['yhat'])

plt.legend(['train','test'],
            loc='upper left')
sns.set_theme(rc={'figure.figsize': (8.27, 11.7)})
