# Imports

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from random import gauss
from random import seed
from pandas import Series
from pandas.plotting import autocorrelation_plot
from matplotlib import pyplot
from plotly.subplots import make_subplots
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from prophet.plot import add_changepoints_to_plot


# Data

In [None]:
airline_passengers_data_path = "https://storage.googleapis.com/edulabs-public-datasets/airline-passengers.csv"
ts = pd.read_csv(airline_passengers_data_path, parse_dates=["Month"]).set_index("Month").squeeze()
adf = pd.DataFrame(ts).reset_index().rename(columns={"Month": "ds", "Passengers": "y"})

In [None]:
dayly_total_female_births_data_path = "https://storage.googleapis.com/edulabs-public-datasets/daily-total-female-births.csv"
ts = pd.read_csv(dayly_total_female_births_data_path, parse_dates=["Date"]).set_index("Date").squeeze()
bdf = pd.DataFrame(ts).reset_index().rename(columns={"Date": "ds", "Births": "y"})

**Rename columns to fit Prophet’s expected format**

In [None]:
df = pd.DataFrame(ts).reset_index().rename(columns={"Month": "ds", "Passengers": "y"})

# Prophet - technical details

In [None]:
model = Prophet()

In [None]:
model.fit(df)

In [None]:
future = model.make_future_dataframe(50, freq='MS')

In [None]:
forecast = model.predict(future)

In [None]:
forecast

In [None]:
fig = model.plot(forecast)

# Trend

##`growth`

In [None]:
log_adf = adf.copy()
log_adf['y'] = np.log(adf['y'])
log_adf['y'].plot()

In [None]:
model = Prophet(
    growth='logistic', # 'linear', 'logistic' or 'flat'
)

log_adf['cap'] = 6.4
model.fit(log_adf)
future = model.make_future_dataframe(50, freq='MS')
future['cap'] = 6.4
forecast = model.predict(future)
model.plot(forecast)

# Changepoints

`n_changepoints`

`changepoint_prior_scale`

`changepoints`

##`n_changepoints`

In [None]:
adf.shape

In [None]:
model = Prophet()
model.fit(adf)
future = model.make_future_dataframe(50, freq='MS')
forecast = model.predict(future)

In [None]:

fig = model.plot(forecast)
add_changepoints_to_plot(fig.gca(), model, forecast)

In [None]:
model = Prophet(n_changepoints=1)
model.fit(adf)
future = model.make_future_dataframe(50, freq='MS')
forecast = model.predict(future)
fig = model.plot(forecast)
add_changepoints_to_plot(fig.gca(), model, forecast)

## `changepoint_prior_scale`

In [None]:
model = Prophet(
    # By default, this parameter is set to 0.05. Increasing it will make the trend more flexible
    # changepoint_prior_scale=0.05
    # changepoint_prior_scale=0.1
    changepoint_prior_scale=0.5


)
model.fit(adf)
future = model.make_future_dataframe(50, freq='MS')
forecast = model.predict(future)
fig = model.plot(forecast)
add_changepoints_to_plot(fig.gca(), model, forecast)

# Seasonality

## seasonality_mode

In [None]:
model = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=False,
    daily_seasonality=False,
    # seasonality_mode='multiplicative', # 'additive' (default) or 'multiplicative'
)
model.fit(adf)
future = model.make_future_dataframe(50, freq='MS')
forecast = model.predict(future)
fig = model.plot(forecast)

In [None]:
model = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=False,
    daily_seasonality=False,
    seasonality_mode='multiplicative', # 'additive' (default) or 'multiplicative'
)
model.fit(adf)
future = model.make_future_dataframe(50, freq='MS')
forecast = model.predict(future)
fig = model.plot(forecast)

In [None]:
fig = model.plot_components(forecast)

In [None]:
model = Prophet(
    # yearly_seasonality=True, # by default 10
    yearly_seasonality=5,
    weekly_seasonality=False,
    daily_seasonality=False,
    seasonality_mode='multiplicative', # 'additive' (default) or 'multiplicative'
)
model.fit(adf)
future = model.make_future_dataframe(50, freq='MS')
forecast = model.predict(future)
fig = model.plot(forecast)
fig = model.plot_components(forecast)

##`seasonality_prior_scale`

In [None]:
model = Prophet(
    # By default, this parameter is set to 10. Increasing it will make the trend more flexible
    # usualy the range is [0.01, 10] - smaller value for less magnitude
    seasonality_prior_scale=10,
    seasonality_mode='multiplicative',
)
model.fit(adf)
future = model.make_future_dataframe(50, freq='MS')
forecast = model.predict(future)
fig = model.plot(forecast)

## custom seasonality

In [None]:
m = Prophet(weekly_seasonality=False)
m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
forecast = m.fit(df).predict(future)
fig = m.plot_components(forecast)

## Holidays

In [None]:
adf

### built-in country holidays

In [None]:
model = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=False,
    daily_seasonality=False,
    seasonality_mode='multiplicative', # 'additive' (default) or 'multiplicative'
)
model.add_country_holidays(country_name='US')
model.fit(adf)




In [None]:
model.train_holiday_names

In [None]:
future = model.make_future_dataframe(50, freq='MS')
forecast = model.predict(future)
fig = model.plot(forecast)

In [None]:
fig = model.plot_components(forecast)

### custom holidays / special events

In [None]:
summer_olympics_dates = [
    # Helsinki 1952: July 19 – August 3
    '1952-07-19', '1952-07-20', '1952-07-21', '1952-07-22', '1952-07-23', '1952-07-24', '1952-07-25',
    '1952-07-26', '1952-07-27', '1952-07-28', '1952-07-29', '1952-07-30', '1952-07-31', '1952-08-01',
    '1952-08-02', '1952-08-03',

    # Melbourne 1956: November 22 – December 8
    '1956-11-22', '1956-11-23', '1956-11-24', '1956-11-25', '1956-11-26', '1956-11-27', '1956-11-28',
    '1956-11-29', '1956-11-30', '1956-12-01', '1956-12-02', '1956-12-03', '1956-12-04', '1956-12-05',
    '1956-12-06', '1956-12-07', '1956-12-08',

    # Rome 1960: August 25 – September 11
    '1960-08-25', '1960-08-26', '1960-08-27', '1960-08-28', '1960-08-29', '1960-08-30', '1960-08-31',
    '1960-09-01', '1960-09-02', '1960-09-03', '1960-09-04', '1960-09-05', '1960-09-06', '1960-09-07',
    '1960-09-08', '1960-09-09', '1960-09-10', '1960-09-11'
]
winter_olympics_dates = [
    # Oslo 1952: February 14 – February 25
    '1952-02-14', '1952-02-15', '1952-02-16', '1952-02-17', '1952-02-18', '1952-02-19',
    '1952-02-20', '1952-02-21', '1952-02-22', '1952-02-23', '1952-02-24', '1952-02-25',

    # Cortina d'Ampezzo 1956: January 26 – February 5
    '1956-01-26', '1956-01-27', '1956-01-28', '1956-01-29', '1956-01-30', '1956-01-31',
    '1956-02-01', '1956-02-02', '1956-02-03', '1956-02-04', '1956-02-05',

    # Squaw Valley 1960: February 18 – February 28
    '1960-02-18', '1960-02-19', '1960-02-20', '1960-02-21', '1960-02-22', '1960-02-23',
    '1960-02-24', '1960-02-25', '1960-02-26', '1960-02-27', '1960-02-28'
]

In [None]:
summer_olympics = pd.DataFrame({
  'holiday': 'summer-olympics',
  'ds': pd.to_datetime(summer_olympics_dates),
  'lower_window': 0,
  'upper_window': 1,
})
winter_olympics = pd.DataFrame({
  'holiday': 'winter-olympics',
  'ds': pd.to_datetime(winter_olympics_dates),
  'lower_window': 0,
  'upper_window': 1,
})
holidays = pd.concat((summer_olympics, winter_olympics))

In [None]:
model = Prophet(
    seasonality_mode='multiplicative',
    holidays=holidays
)
model.fit(adf)

In [None]:
future = model.make_future_dataframe(50, freq='MS')
forecast = model.predict(future)
fig = model.plot_components(forecast)
fig = model.plot(forecast)

#Prophet cross-validation

In [None]:
adf.shape, adf

In [None]:
from prophet.diagnostics import cross_validation, performance_metrics

df_cv = cross_validation(
    model,
    initial=f"{8*365} days",  # Training period (8 years)
    period=f"{3*30} days",   # Spacing between cutoffs (6 months)
    # period=f"30 days", # 1 month
    horizon=f"{2*365} days"   # Forecast horizon (2 years)
)




In [None]:
df_cv

### `performance_metrics`

The performance_metrics utility can be used to compute some useful statistics of the prediction performance, as a function of the distance from the cutoff (how far into the future the prediction was).

In [None]:
df_p = performance_metrics(df_cv)
df_p

In [None]:
df_p.mean()

###`plot_cross_validation_metric`

In [None]:
from prophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(df_cv, metric='mape')


# Tuning hyperparameters


Parameters that can be tuned

**changepoint_prior_scale**: This is probably the **most impactful parameter**. It determines the flexibility of the trend, and in particular how much the trend changes at the trend changepoints. If it is too small, the trend will be underfit and variance that should have been modeled with trend changes will instead end up being handled with the noise term. If it is too large, the trend will overfit and in the most extreme case you can end up with the trend capturing yearly seasonality. The **default of 0.05** works for many time series, but this could be tuned; a range of **[0.001, 0.5]** would likely be about right. Parameters like this (regularization penalties; this is effectively a lasso penalty) are often tuned on a log scale.

**seasonality_prior_scale**: This parameter controls the **flexibility of the seasonality**. Similarly, a large value allows the seasonality to fit large fluctuations, a small value shrinks the magnitude of the seasonality. The **default is 10**, which applies basically no regularization. That is because we very rarely see overfitting here (there’s inherent regularization with the fact that it is being modeled with a truncated Fourier series, so it’s essentially low-pass filtered). A reasonable range for tuning it would probably be **[0.01, 10]**; when set to 0.01 you should find that the magnitude of seasonality is forced to be very small. This likely also makes sense on a log scale, since it is effectively an L2 penalty like in ridge regression.

**holidays_prior_scale**: This controls flexibility to fit holiday effects. Similar to seasonality_prior_scale, it **defaults to 10** which applies basically no regularization, since we usually have multiple observations of holidays and can do a good job of estimating their effects. This could also be tuned on a range of **[0.01, 10]** as with seasonality_prior_scale.

**seasonality_mode**: Options are **['additive', 'multiplicative']**. **Default is 'additive'**, but many business time series will have multiplicative seasonality. This is best identified just from looking at the time series and seeing if the magnitude of seasonal fluctuations grows with the magnitude of the time series (see the documentation here on multiplicative seasonality), but when that isn’t possible, it could be tuned.

In [None]:
import itertools


param_grid = {
    'changepoint_prior_scale': [0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 1.0, 10.0],
    'seasonality_mode': ['additive', 'multiplicative']
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
mapes = []  # Store the MAPEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    m = Prophet(**params).fit(adf)  # Fit model with given params
    df_cv = cross_validation(
        m,
        initial=f"{8*365} days",  # Training period (8 years)
        period=f"{3*30} days",   # Spacing between cutoffs (3 months)
        horizon=f"{2*365} days",   # Forecast horizon (2 years)
        parallel="processes"
    )
    df_p = performance_metrics(
        df_cv,
        rolling_window=1 # compute the metric across all simulated forecast points (not 10% as default)
    )
    mapes.append(df_p['mape'].values[0])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['mape'] = mapes

In [None]:
tuning_results.sort_values('mape', ascending=True)