In [None]:
%matplotlib inline

from pathlib import Path

import pandas as pd
import numpy as np
import seaborn as sns
import missingno as msno

from dmba import regressionSummary

import matplotlib.pylab as plt

In [None]:
data_df = pd.read_csv('.../resource/lib/public/candy_production.csv')

In [None]:
# Check the first 10 rows

???

![Pix_1.png](attachment:Pix_1.png)

In [None]:
# Check the last 10 rows

???

![Pix_2.png](attachment:Pix_2.png)

In [None]:
# Show the variable names, non-null counts, and datatypes

???

![Pix_3.png](attachment:Pix_3.png)

In [None]:
# Convert the date string into a datetime object in Python

data_df['date'] = pd.to_datetime(data_df['???'])

In [None]:
# Show the first five rows of the date column

???

![Pix_4.png](attachment:Pix_4.png)

In [None]:
# Check the datatypes again

???

![Pix_5.png](attachment:Pix_5.png)

In [None]:
# Extract underlying data from the date variable

data_df['year'] = data_df['date'].dt.???
data_df['month'] = data_df['date'].dt.???
data_df['day'] = data_df['date'].dt.???
data_df['weekday'] = data_df['date'].dt.???

In [None]:
# Create a new plotting object and load the time series data into it

data_ts = pd.Series(data_df.target.values, index=data_df.date)

In [None]:
# Display a line chart of the time series data

%matplotlib inline

data_ts.plot(ylim=[0, 180], 
               legend=False, 
               figsize=[10, 6], 
               color='darkorange')

plt.xlabel('Year')  # set x-axis label
plt.ylabel('Production')  # set y-axis label

plt.tight_layout()
plt.show()

![Pix_6.png](attachment:Pix_6.png)

#### Create two new variables that are required by Prophet

In [None]:
# Create two new variables, 'ds' and 'y' to match with Prophet's requirements
# Prophet will extract these two from the dataframe without any further feature selection

data_df[???] = data_df['date']
data_df[???] = data_df['target']

#### Use the 'year' variable to create training and test datasets

In [None]:
# Create training and test data (training = all years less than 2010)

train_data = data_df[data_df['year']<2010]
test_data = data_df[data_df['year']>=2010]

print(train_data.shape)
print(test_data.shape)

![Pix_7.png](attachment:Pix_7.png)

In [None]:
# Load the Prophet alogorithm and plotting elements

from fbprophet import Prophet
from fbprophet.plot import plot_plotly

import plotly.offline as py
py.init_notebook_mode()

### Our first time series model

In [None]:
# Create a fitted time series model

prophet_model_1 = Prophet()

prophet_model_1.add_country_holidays(country_name='US')

prophet_model_1.???(???)

In [None]:
# Let's check out the US holidays

prophet_model_1.train_holiday_names

#### Generate 1 year's worth of predictions

In [None]:
# Make places in the dataframe to hold two months of future data

future = prophet_model_1.make_future_dataframe(periods=60, freq='m')

future.tail(15)

![Pix_8.png](attachment:Pix_8.png)

In [None]:
# Create a set of predictions for the one year period

forecast_1 = prophet_model_1.???(future)

In [None]:
# Plot the actual and predicted series

fig1 = prophet_model_1.plot(???)

![Pix_9.png](attachment:Pix_9.png)

In [None]:
# Plot the components (year and monthly)

fig1 = prophet_model_1.plot_components(???)

![Pix_10.png](attachment:Pix_10.png)

In [None]:
# Display the changepoints identified by Prophet

from fbprophet.plot import add_changepoints_to_plot

fig = prophet_model_1.plot(forecast_1)

a = add_changepoints_to_plot(fig.gca(), prophet_model_1, forecast_1)

![Pix_11.png](attachment:Pix_11.png)

In [None]:
# List the dates of the changepoints

prophet_model_1.changepoints

![Pix_12.png](attachment:Pix_12.png)

### Create a second prophet model but with limited changepoints

In [None]:
# Create a fitted new model with limited changepoints and with fitting and prediction in a single line of code

prophet_model_2 = Prophet(n_changepoints=???)

forecast_2 = prophet_model_2.???(train_data).???(future)

In [None]:
# Display the changepoints identified by Prophet

fig = prophet_model_2.plot(???);

a = add_changepoints_to_plot(fig.gca(), prophet_model_2, ???)

![Pix_13.png](attachment:Pix_13.png)

In [None]:
# Make a prediction on the test data

prediction_2 = prophet_model_2.???(???)

prophet_model_2.plot(???)

![Pix_14.png](attachment:Pix_14.png)

In [None]:
# What's inside the prediction_2 model? Let's check it out.

prediction_2.head()

![Pix_15.png](attachment:Pix_15.png)

### Turn predictions and actual test values into numpy vectors by extracting values only

In [None]:
yhat_2 = prediction_2['yhat'].values # convert to numpy from pandas by extracting values alone

target = test_data['y'].values 

### Compute the Mean Directional Accuracy for prophet model 2

In [None]:
import numpy as np

def mda(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Directional Accuracy """
    return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))

mda(target, yhat_2)

![Pix_21.png](attachment:Pix_21.png)

### Compute the regression summary for prophet model_2

In [None]:
regressionSummary(???, ???)

![Pix_17.png](attachment:Pix_17.png)

In [None]:
forecast_error_2 = (target - yhat_2)

forecast_error_2

![Pix_18.png](attachment:Pix_18.png)

### Making a Tunable Time Series Model (model_3)

In [None]:
# Create a fit a new model with limited changepoints

prophet_model_3 = Prophet(changepoint_prior_scale=???)

forecast_3 = prophet_model_3.fit(???).predict(???)

In [None]:
fig3 = prophet_model_3.plot(forecast_3)

![Pix_19.png](attachment:Pix_19.png)

In [None]:
prediction_3 = prophet_model_3.predict(test_data)

prophet_model_3.plot(prediction_3)

![Pix_20.png](attachment:Pix_20.png)

In [None]:
yhat_3 = prediction_3['yhat'].values # convert to numpy from pandas by extracting values alone

target = test_data['y'].values 

### Compute the Mean Directional Accuracy

In [None]:
import numpy as np

def mda(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Directional Accuracy """
    return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))

mda(target, yhat_3)

![Pix_21.png](attachment:Pix_21.png)

### Display the regression performance summary

In [None]:
regressionSummary(target, yhat_3)

![Pix_22.png](attachment:Pix_22.png)