$\color{red}{\text{REMARK:}}$  
This notebook makes use of multiprocessing.  
  
Since Jupyter Notebooks cannot properly cope with multiprocessing, this notebook may crash while computing the forecasts when using too much data.  
  
Therefore, this notebook is intended for development and debugging. When computing forecasts for all items, lease use multithreading/05_forecast_fbprophet_item_level.py.

# Import Packages

In [1]:
import sys
sys.path.append('..')
import utils
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from fbprophet import Prophet
from benchmarks.multithreading.forecaster import run_prophet
from tqdm.notebook import tqdm
import time
from multiprocessing import Pool, cpu_count

# Forecast using Prophet on Item Level

In [2]:
# Import data
submission = pd.read_csv(
    f'{utils.get_m5_root_dir()}/data/input/sample_submission.csv')
df_calendar, df_sales, df_prices = utils.import_m5_data(reduce_memory=False)

In [3]:
# Method has been copied to seperate forecaster.py file otherwise it gets stuck in an infinitive loop.
# Jupyter notebooks cannot properly cope with multiprocessing

"""
def run_prophet(df_prophet):
    model = Prophet(daily_seasonality=True, yearly_seasonality=True)
    model.fit(df_prophet)
    future = model.make_future_dataframe(periods=28, include_history=False)
    forecast = model.predict(future)['yhat'].to_list()
    return forecast

"""

"\ndef run_prophet(df_prophet):\n    model = Prophet(daily_seasonality=True, yearly_seasonality=True)\n    model.fit(df_prophet)\n    future = model.make_future_dataframe(periods=28, include_history=False)\n    forecast = model.predict(future)['yhat'].to_list()\n    return forecast\n\n"

In [4]:
# brigns a pd.Series to a DataFrame of prophet's required structure
def create_prophet_df(pd_series):
    d_string = [f'd_{di}' for di in list(range(1, 1914))]
    ds = pd.date_range(start='2011-01-29', end='2016-04-24')
    df = pd.DataFrame({'ds': ds, 'y': pd_series[d_string].values})
    return df

In [5]:
# Recude size of data --- only for debugging
df_sales = df_sales.head(25)
df_sales

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,2,1,1,1,0,1,1,1
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,0,5,4,1,0,1,3,7,2
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,1,0,1,1,2,2,2,4
5,HOBBIES_1_006_CA_1_validation,HOBBIES_1_006,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,1,0,1,0,0,0,2,0,0
6,HOBBIES_1_007_CA_1_validation,HOBBIES_1_007,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,1,0,1,0,0,1,1
7,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,12,15,0,0,...,0,0,1,37,3,4,6,3,2,1
8,HOBBIES_1_009_CA_1_validation,HOBBIES_1_009,HOBBIES_1,HOBBIES,CA_1,CA,2,0,7,3,...,0,0,1,1,6,0,0,0,0,0
9,HOBBIES_1_010_CA_1_validation,HOBBIES_1_010,HOBBIES_1,HOBBIES,CA_1,CA,0,0,1,0,...,1,0,0,0,0,0,0,2,0,2


In [6]:
# Iteratively compute forecasts on item level.

if __name__ == '__main__':

    start = time.time()

    # create a list of dataframes containing the sales figures of one item
    list_of_df_sales = [
        create_prophet_df(row) for i, row in df_sales.iterrows()
    ]

    # initiate a pool of workers
    pool = Pool(cpu_count())

    # paralell computation of forecats
    # list of lists containing the forecasts
    results = pool.map(run_prophet, tqdm(list_of_df_sales))

    # tranform list of lists to a dataframe
    results_df = pd.DataFrame(results,
                              columns=[f'F{di}' for di in list(range(1, 29))])

    # add columns with item's ids
    results_df.insert(0, 'id', df_sales['id'])

    # print computation stats
    print(
        f'Computation took {np.round((time.time() - start)/60,2)} minutes in total.'
    )
    print(
        f'Computation took {np.round((time.time()-start)/df_sales.shape[0],2)} seconds per item.'
    )

HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))


Computation took 0.32 minutes in total.
Computation took 0.78 seconds per item.


In [7]:
results_df

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,0.765524,0.825636,0.70993,0.700229,0.752582,0.899714,0.776378,0.724902,0.783688,...,0.655437,0.802138,0.679068,0.628654,0.689385,0.57515,0.567878,0.623709,0.775442,0.65789
1,HOBBIES_1_002_CA_1_validation,0.148058,0.15978,0.177024,0.228968,0.256769,0.347268,0.289613,0.236834,0.25694,...,0.430566,0.507257,0.43256,0.359923,0.357854,0.35769,0.389008,0.393453,0.458433,0.373722
2,HOBBIES_1_003_CA_1_validation,0.646831,0.65627,0.654694,0.667836,0.706775,0.782454,0.69402,0.641173,0.652129,...,0.733811,0.81448,0.730675,0.68194,0.696344,0.699444,0.716805,0.759348,0.837879,0.751429
3,HOBBIES_1_004_CA_1_validation,1.567754,1.245377,1.184262,1.593367,1.590055,2.680505,3.289942,1.549145,1.237448,...,1.690509,2.791603,3.408682,1.672184,1.361176,1.308076,1.721464,1.718398,2.804839,3.405891
4,HOBBIES_1_005_CA_1_validation,0.682394,0.6023,0.701623,0.633248,0.705551,1.14641,1.055758,0.533515,0.468903,...,0.849026,1.358161,1.335692,0.880029,0.878905,1.05758,1.067268,1.214675,1.72605,1.699779
5,HOBBIES_1_006_CA_1_validation,0.574035,0.366405,0.329355,0.458946,0.370252,0.7154,0.563279,0.694622,0.494622,...,0.571241,0.896317,0.720659,0.825495,0.596566,0.533868,0.633954,0.512471,0.822167,0.632601
6,HOBBIES_1_007_CA_1_validation,0.253261,0.274708,0.244945,0.266324,0.30577,0.352097,0.332257,0.298182,0.317271,...,0.329294,0.365726,0.336145,0.292723,0.303085,0.262279,0.272918,0.302181,0.339122,0.310909
7,HOBBIES_1_008_CA_1_validation,10.00968,9.435423,10.388168,9.470075,10.39898,11.204933,8.899481,9.485431,8.879349,...,9.599849,10.526365,8.365169,9.11755,8.697902,9.83141,9.118364,10.274044,11.325277,9.279766
8,HOBBIES_1_009_CA_1_validation,0.102087,0.149321,-0.133512,-0.060787,-0.06093,0.355779,0.086815,-0.031459,0.044013,...,0.01263,0.456724,0.20903,0.105635,0.189521,-0.062534,0.034648,0.052403,0.480423,0.216365
9,HOBBIES_1_010_CA_1_validation,0.454427,0.398565,0.409124,0.332288,0.439109,0.562931,0.874693,0.407632,0.35521,...,0.389806,0.518742,0.835442,0.373182,0.325472,0.344098,0.27525,0.389997,0.52171,0.841361


In [8]:
# Bring data to Kaggel's required form (evaluation & validation)
df_submission = results_df.append(
    results_df.replace(to_replace='validation', value='evaluation',
                       regex=True))
filename = utils.get_m5_root_dir(
) + '/data/submissions/fbprophet_on_item_level.csv'
#df_submission.to_csv(filename, index=False)
df_submission

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,0.765524,0.825636,0.70993,0.700229,0.752582,0.899714,0.776378,0.724902,0.783688,...,0.655437,0.802138,0.679068,0.628654,0.689385,0.57515,0.567878,0.623709,0.775442,0.65789
1,HOBBIES_1_002_CA_1_validation,0.148058,0.15978,0.177024,0.228968,0.256769,0.347268,0.289613,0.236834,0.25694,...,0.430566,0.507257,0.43256,0.359923,0.357854,0.35769,0.389008,0.393453,0.458433,0.373722
2,HOBBIES_1_003_CA_1_validation,0.646831,0.65627,0.654694,0.667836,0.706775,0.782454,0.69402,0.641173,0.652129,...,0.733811,0.81448,0.730675,0.68194,0.696344,0.699444,0.716805,0.759348,0.837879,0.751429
3,HOBBIES_1_004_CA_1_validation,1.567754,1.245377,1.184262,1.593367,1.590055,2.680505,3.289942,1.549145,1.237448,...,1.690509,2.791603,3.408682,1.672184,1.361176,1.308076,1.721464,1.718398,2.804839,3.405891
4,HOBBIES_1_005_CA_1_validation,0.682394,0.6023,0.701623,0.633248,0.705551,1.14641,1.055758,0.533515,0.468903,...,0.849026,1.358161,1.335692,0.880029,0.878905,1.05758,1.067268,1.214675,1.72605,1.699779
5,HOBBIES_1_006_CA_1_validation,0.574035,0.366405,0.329355,0.458946,0.370252,0.7154,0.563279,0.694622,0.494622,...,0.571241,0.896317,0.720659,0.825495,0.596566,0.533868,0.633954,0.512471,0.822167,0.632601
6,HOBBIES_1_007_CA_1_validation,0.253261,0.274708,0.244945,0.266324,0.30577,0.352097,0.332257,0.298182,0.317271,...,0.329294,0.365726,0.336145,0.292723,0.303085,0.262279,0.272918,0.302181,0.339122,0.310909
7,HOBBIES_1_008_CA_1_validation,10.00968,9.435423,10.388168,9.470075,10.39898,11.204933,8.899481,9.485431,8.879349,...,9.599849,10.526365,8.365169,9.11755,8.697902,9.83141,9.118364,10.274044,11.325277,9.279766
8,HOBBIES_1_009_CA_1_validation,0.102087,0.149321,-0.133512,-0.060787,-0.06093,0.355779,0.086815,-0.031459,0.044013,...,0.01263,0.456724,0.20903,0.105635,0.189521,-0.062534,0.034648,0.052403,0.480423,0.216365
9,HOBBIES_1_010_CA_1_validation,0.454427,0.398565,0.409124,0.332288,0.439109,0.562931,0.874693,0.407632,0.35521,...,0.389806,0.518742,0.835442,0.373182,0.325472,0.344098,0.27525,0.389997,0.52171,0.841361


In [9]:
# Submit via Kaggle API or with website https://www.kaggle.com/c/m5-forecasting-accuracy/submissions
# !kaggle competitions submit -c m5-forecasting-accuracy -f data/submissions/fbprophet_on_item_level.csv -m"fbprophet_on_item_level.csv"