## Multiple Time Series Forecasting with PyCaret
https://pycaret.gitbook.io/docs/learn-pycaret/official-blog/multiple-time-series-forecasting-with-pycaret

In [None]:
# read the csv file
import pandas as pd
data = pd.read_csv('/Users/Dhaval/Downloads/train.csv')
data['date'] = pd.to_datetime(data['date'])
# combine store and item column as time_series
data['store'] = ['store_' + str(i) for i in data['store']]
data['item'] = ['item_' + str(i) for i in data['item']]
data['time_series'] = data[['store', 'item']].apply(lambda x: '_'.join(x), axis=1)
data.drop(['store', 'item'], axis=1, inplace=True)
# extract features from date
data['month'] = [i.month for i in data['date']]
data['year'] = [i.year for i in data['date']]
data['day_of_week'] = [i.dayofweek for i in data['date']]
data['day_of_year'] = [i.dayofyear for i in data['date']]
data.head()

In [None]:
# check the unique time_series
data['time_series'].nunique()

In [None]:
# plot multiple time series with moving avgs in a loop**

import plotly.express as px

for i in data['time_series'].unique():
    subset = data[data['time_series'] == i]
    subset['moving_average'] = subset['sales'].rolling(30).mean()
    fig = px.line(subset, x="date", y=["sales","moving_average"], title = i, template = 'plotly_dark')
    fig.show()

In [None]:
from tqdm import tqdm
from pycaret.regression import *

all_ts = data['time_series'].unique()

all_results = []
final_model = {}

for i in tqdm(all_ts):
    
    df_subset = data[data['time_series'] == i]
    
    # initialize setup from pycaret.regression
    s = setup(df_subset, target = 'sales', train_size = 0.95,
              data_split_shuffle = False, fold_strategy = 'timeseries', fold = 3,
              ignore_features = ['date', 'time_series'],
              numeric_features = ['day_of_year', 'year'],
              categorical_features = ['month', 'day_of_week'],
              silent = True, verbose = False, session_id = 123)
    
    # compare all models and select best one based on MAE
    best_model = compare_models(sort = 'MAE', verbose=False)
    
    # capture the compare result grid and store best model in list
    p = pull().iloc[0:1]
    p['time_series'] = str(i)
    all_results.append(p)
    
    # finalize model i.e. fit on entire data including test set
    f = finalize_model(best_model)
    
    # attach final model to a dictionary
    final_model[i] = f
    
    # save transformation pipeline and model as pickle file 
    save_model(f, model_name='trained_models/' + str(i), verbose=False)

We can now create a data frame from all_results list. It will display the best model selected for each time series.


In [None]:
concat_results = pd.concat(all_results,axis=0)
concat_results.head()

### Generate predictions using trained models
Now that we have trained models, let’s use them to generate predictions, but first, we need to create the dataset for scoring (X variables).

In [None]:
# create a date range from 2013 to 2019
all_dates = pd.date_range(start='2013-01-01', end = '2019-12-31', freq = 'D')
# create empty dataframe
score_df = pd.DataFrame()
# add columns to dataset
score_df['date'] = all_dates
score_df['month'] = [i.month for i in score_df['date']]
score_df['year'] = [i.year for i in score_df['date']]
score_df['day_of_week'] = [i.dayofweek for i in score_df['date']]
score_df['day_of_year'] = [i.dayofyear for i in score_df['date']]
score_df.head()

Now let’s create a loop to load the trained pipelines and use the `predict_model` function to generate prediction labels.

In [None]:
from pycaret.regression import load_model, predict_model
all_score_df = []
for i in tqdm(data['time_series'].unique()):
    l = load_model('trained_models/' + str(i), verbose=False)
    p = predict_model(l, data=score_df)
    p['time_series'] = i
    all_score_df.append(p)
concat_df = pd.concat(all_score_df, axis=0)
concat_df.head()

We will now join the `data` and `concat_df`.

In [None]:
final_df = pd.merge(concat_df, data, how = 'left', left_on=['date', 'time_series'], right_on = ['date', 'time_series'])
final_df.head()

We can now create a loop to see all plots.

In [None]:
for i in final_df['time_series'].unique()[:5]:
    sub_df = final_df[final_df['time_series'] == i]
    
    import plotly.express as px
    fig = px.line(sub_df, x="date", y=['sales', 'Label'], title=i, template = 'plotly_dark')
    fig.show()