## Pycaret - Time Series 101
https://pycaret.gitbook.io/docs/learn-pycaret/official-blog/time-series-101-for-beginners

In [11]:
import numpy as np
import pandas as pd
data = pd.read_csv('/Users/Dhaval/Downloads/AirPassengers.csv')
data['Date'] = pd.to_datetime(data['Month'])
data.head()

Unnamed: 0,Month,#Passengers,Date
0,1949-01,112,1949-01-01
1,1949-02,118,1949-02-01
2,1949-03,132,1949-03-01
3,1949-04,129,1949-04-01
4,1949-05,121,1949-05-01


In [13]:
del data['Month']

In [14]:
# create 12 month moving average
data['MA12'] = data['#Passengers'].rolling(12).mean()

# plot the data and MA
import plotly.express as px
fig = px.line(data, x="Date", y=["#Passengers", "MA12"], template = 'plotly_dark')
fig.show()

In [15]:
# extract month and year from dates**
data['Month'] = [i.month for i in data['Date']]
data['Year'] = [i.year for i in data['Date']]

# create a sequence of numbers
data['Series'] = np.arange(1,len(data)+1)

# drop unnecessary columns and re-arrange
data.drop(['Date', 'MA12'], axis=1, inplace=True)
data = data[['Series', 'Year', 'Month', '#Passengers']] 

# check the head of the dataset**
data.head()

Unnamed: 0,Series,Year,Month,#Passengers
0,1,1949,1,112
1,2,1949,2,118
2,3,1949,3,132
3,4,1949,4,129
4,5,1949,5,121


In [16]:
# split data into train-test set
train = data[data['Year'] < 1960]
test = data[data['Year'] >= 1960]

# check shape
train.shape, test.shape


((132, 4), (12, 4))

In [19]:
# import the regression module**
from pycaret.regression import *

# initialize setup**
s = setup(data = train, test_data = test, target = '#Passengers', fold_strategy = 'timeseries', numeric_features = ['Year', 'Series'], fold = 3, transform_target = True, session_id = 123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,#Passengers
2,Original Data,"(132, 4)"
3,Missing Values,False
4,Numeric Features,2
5,Categorical Features,1
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(132, 13)"


In [20]:
best = compare_models(sort = 'MAE')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lr,Linear Regression,22.3981,923.875,28.2856,0.5621,0.0878,0.0746,1.8333
lar,Least Angle Regression,22.3981,923.8736,28.2856,0.5621,0.0878,0.0746,0.0267
huber,Huber Regressor,22.4233,891.6693,27.9375,0.5986,0.088,0.0749,0.0467
br,Bayesian Ridge,22.4783,932.2165,28.5483,0.5611,0.0884,0.0746,0.03
ridge,Ridge Regression,23.1975,1003.9423,30.041,0.5258,0.0933,0.0764,0.0167
lasso,Lasso Regression,38.4188,2413.5096,46.8468,0.0882,0.1473,0.1241,0.62
en,Elastic Net,40.6486,2618.8753,49.4048,-0.0824,0.1563,0.1349,0.0233
omp,Orthogonal Matching Pursuit,44.3054,3048.2652,53.8613,-0.4499,0.1713,0.152,0.0167
gbr,Gradient Boosting Regressor,50.1217,4032.0567,61.2306,-0.6189,0.2034,0.1538,0.03
rf,Random Forest Regressor,52.7754,4705.6863,65.6728,-0.7962,0.2148,0.1592,0.21


In [21]:
prediction_holdout = predict_model(best);

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Linear Regression,25.0712,972.2655,31.1812,0.8245,0.0692,0.0571


In [23]:
# generate predictions on the original dataset**
predictions = predict_model(best, data=data)

# add a date column in the dataset**
predictions['Date'] = pd.date_range(start='1949-01-01', end = '1960-12-01', freq = 'MS')

# line plot**
fig = px.line(predictions, x='Date', y=["#Passengers", "Label"], template = 'plotly_dark')

# add a vertical rectange for test-set separation**
fig.add_vrect(x0="1960-01-01", x1="1960-12-01", fillcolor="grey", opacity=0.25, line_width=0)
fig.show()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Linear Regression,12.5381,278.7538,16.6959,0.9805,0.0538,0.0447


In [24]:
final_best = finalize_model(best)

In [27]:
future_dates = pd.date_range(start = '1961-01-01', end = '1965-01-01', freq = 'MS')

future_df = pd.DataFrame()

future_df['Month'] = [i.month for i in future_dates]
future_df['Year'] = [i.year for i in future_dates]    
future_df['Series'] = np.arange(145,(145+len(future_dates)))
future_df.head()

Unnamed: 0,Month,Year,Series
0,1,1961,145
1,2,1961,146
2,3,1961,147
3,4,1961,148
4,5,1961,149


In [28]:
predictions_future = predict_model(final_best, data=future_df)
predictions_future.head()

Unnamed: 0,Month,Year,Series,Label
0,1,1961,145,486.278046
1,2,1961,146,482.208038
2,3,1961,147,550.486145
3,4,1961,148,535.187012
4,5,1961,149,538.923767


In [30]:
concat_df = pd.concat([data,predictions_future], axis=0)
concat_df_i = pd.date_range(start='1949-01-01', end = '1965-01-01', freq = 'MS')
concat_df.set_index(concat_df_i, inplace=True)
fig = px.line(concat_df, x=concat_df.index, y=["#Passengers", "Label"], template = 'plotly_dark')
fig.show()