# __Forecasting Exercises - Solutions__
This exercise walks through a SARIMA prediction and forecast similar to the one done on the Mauna Loa CO₂ dataset.<br>
This time we're using a seasonal time series of California Hospitality Industry Employees.

In [None]:
import pandas as pd
import numpy as np
%matplotlib inline

#load specific forecasting tools
from statsmodels.tsa.statespace.sarimax import SARIMAX

from statsmodels.graphics.tsaplots import plot_acf,plot_pacf # for determining (p,q) orders
from statsmodels.tsa.seasonal import seasonal_decompose      # for ETS Plots
from pmdarima import auto_arima                              # for determining ARIMA orders

#load specific evaluation tools
from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse

#ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")

#load datasets
df = pd.read_csv('../Data/HospitalityEmployees.csv',index_col='Date',parse_dates=True)
df.index.freq = 'MS'
print(len(df))
print(df.head())

So <tt>df</tt> has 348 records and one column. The data represents the number of employees in thousands of persons as monthly averages from January, 1990 to December 2018.

### __1. Plot the source data__
Create a line chart of the dataset. Optional: add a title and y-axis label.

In [None]:
## CODE HERE






In [None]:
title='California Hospitality Industry Employees'
ylabel='Thousands of Persons' #xlabelling not required

ax = df['Employees'].plot(figsize=(12,5),title=title)
ax.autoscale(axis='x',tight=True)
ax.set(ylabel=ylabel);

### __2. Run an ETS Decomposition__
Use an 'additive' model.

In [None]:
result = seasonal_decompose(df['Employees'], model='add')
result.plot();

### 3. __Run <tt>pmdarima.auto_arima</tt> to obtain recommended orders__
This may take awhile as there are a lot of combinations to evaluate.

In [None]:
auto_arima(df['Employees'],seasonal=True,m=12).summary()

You should see a recommended ARIMA Order of (0,1,0) combined with a seasonal order of (2,0,0,12).

### __4. Split the data into train/test sets__
Set one year (12 records) for testing. There is more than one way to do this!

In [None]:
train = df.iloc[:len(df)-12]
test = df.iloc[len(df)-12:]

### __5. Fit a SARIMA(0,1,0)(2,0,0,12) model to the training set__

In [None]:
model = SARIMAX(train['Employees'],order=(0,1,0),seasonal_order=(2,0,0,12))
results = model.fit()
results.summary()

### __6. Obtain predicted values__

In [None]:
start=len(train)
end=len(train)+len(test)-1
predictions = results.predict(start=start, end=end, dynamic=False, typ='levels').rename('SARIMA(0,1,0)(2,0,0,12) Predictions')

### 7. Plot predictions against known values
Optional: add a title and y-axis label.

In [None]:
title='California Hospitality Industry Employees'
ylabel='Thousands of Persons'
xlabel=''

ax = test['Employees'].plot(legend=True,figsize=(12,6),title=title)
predictions.plot(legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel);

### __8. Evaluate the Model using MSE and RMSE__
You can run both from the same cell if you want.

In [None]:
error1 = mean_squared_error(test['Employees'], predictions)
error2 = rmse(test['Employees'], predictions)
print(f'SARIMA(0,1,0)(2,0,0,12) MSE Error: {error1:11.10}')
print(f'SARIMA(0,1,0)(2,0,0,12) RMSE Error: {error2:11.10}')

### __9. Retrain the model on the full data and forecast one year into the future__

In [None]:
model = SARIMAX(df['Employees'],order=(0,1,0),seasonal_order=(2,0,0,12))
results = model.fit()
fcast = results.predict(len(df),len(df)+11,typ='levels').rename('SARIMA(0,1,0)(2,0,0,12) Forecast')

### 10. Plot the forecasted values alongside the original data
Optional: add a title and y-axis label.

In [None]:
title='California Hospitality Industry Employees'
ylabel='Thousands of Persons'
xlabel=''

ax = df['Employees'].plot(legend=True,figsize=(12,6),title=title)
fcast.plot(legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel);