# Plot script for presentation


**Motivation**: The prophet_plot allows to plot forecast to rapidly identify "outliers", i.e. real data values that fall outside of the prediction range. However, the layout is not suitable for a professional presentation.
In this script, I execute a script similar to Prophet_Prediction, but
* removed the cross-validation part to be fast
* dissociated the different part before prophet_plot, to allow for display feature customization.

**Structure**:
Similar to Prophet_Prediction, but prophet is replaced by prophet_CV_free and prophet_plot is replaced by prophet_plot_show.
Note that prophet_plot and prophet_plot_show do not allow for graph modification and execution.

**Note**: The same results may be also accessible with the interactive plot function of prophet. Check this out:
```python
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()

fig_int = plot_plotly(model, forecast)  # This returns a plotly Figure
py.iplot(fig_int)
```

**This is a script to do plots for the presentation**

In [15]:
import pandas as pd
import numpy as np
import time
import re

import seaborn as sns
sns.set()
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.dates import DateFormatter

# Note that the interactive plot may not work in Jupyter lab, but only in Jupyter Notebook (conflict of javascripts)
%matplotlib widget 

from datetime import datetime, timedelta
from pytz import timezone

In [16]:
import fbprophet
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation, performance_metrics
from fbprophet.plot import plot_cross_validation_metric

In [17]:
from sklearn.model_selection import ParameterGrid
import itertools
from random import sample

In [20]:
from helper import load_ds, df_dev_formater, df_generator, prophet_fit, get_outliers, find_index

## Load df_dev

In [5]:
device_nb = '33' # 2-digit number !
device, df_dev = df_dev_formater(device_nb)

assert device.shape[0]==1, 'No, or several devices in the df'

# Check report:
print('Check report:\n##############################################')
print('Device contained in the dataset: ' + device)
print('Tenant using the device: ' + df_dev['tenant'].unique())
print('\nThere are ' + str(df_dev.shape[0]) + ' lines.')
print('\nData types:')
print(df_dev.dtypes)

Check report:
##############################################
['Device contained in the dataset: device33']
['Tenant using the device: tenant09']

There are 236187 lines.

Data types:
device                                object
tenant                                object
ds             datetime64[ns, Europe/Zurich]
light                                float64
temperature                          float64
humidity                             float64
co2                                  float64
dtype: object


In [6]:
def prophet_CV_free(df_dev=df_dev,
            device=device,
            parameter='co2',
            begin='2019-03-26',
            end='2019-04-03',
            sampling_period_min=5,
            graph=1,
            predict_day=1,
            interval_width=0.6,
            changepoint_prior_scale=0.01,
            daily_fo = 12):
    """
    Combination of df_generator, model, prophet_fit, get_outliers for randomsearch 
    
    return df_p, the performance_metrics of Prophet
    """
    # Convert the sampling period (in min) into string and float for to feed Prophet
    sampling_period_st= str(sampling_period_min) + 'T'
    sampling_period_num=sampling_period_min/60

    # Generate the dataframe analysis
    df, predict_n, today_index, lookback_n = df_generator(
        df_dev,
        device,
        parameter,
        begin,
        end,
        sampling_period_st,
        sampling_period_num,
        graph=graph,
        predict_day=1)
    
    # config the model
    model = Prophet(interval_width=interval_width, # anomaly threshold,
                    yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False,
                    changepoint_prior_scale=changepoint_prior_scale) # Adjusting trend flexibility. should be <0.1 low --> toward overfit
    model.add_seasonality(name='daily', period=1, fourier_order=daily_fo) # prior scale
#     model.add_seasonality(name='weekly', period=7, fourier_order=15)

    # Fit the model, flag outliers, and visualize
    assert today_index>lookback_n, 'Not enough data for prediction (lookback_n<today_index)'
    plt.close()
    fig, forecast, model = prophet_fit(df, model, today_index, sampling_period_st, sampling_period_num, lookback_days=lookback_n, predict_days=predict_n)   
    
    return fig, forecast, model, df, model, today_index, lookback_n, predict_n

In [17]:
def prophet_plot_show(
        df,
        fig,
        today_index,
        lookback_days=None,
        predict_days=21,
        outliers=list()):
    """
    Plot the actual, predictions, and anomalous values
    Args
    ----
    df : pandas DataFrame
        The daily time-series data set contains ds column for
        dates (datetime types such as datetime64[ns]) and y column for numerical values
    fig : matplotlib Figure
        A plot with actual data, predicted values and the interval which we previously obtained
        from Prophet's model.plot(forecast).
    today_index : int
        The index of the date list in the dataframe dividing the baseline and prediction time frames.
    lookback_days : int, optional (default=None)
        Day (today_index-lookback_days)th to Day (today_index-1)th is the baseline time frame for training.
    predict_days : int, optional (default=21)
        Make prediction for Day (today_index)th to Day (today_index+predict_days)th.
    outliers : a list of (datetime, int) tuple
        The outliers we want to highlight on the plot.
    """
    # retrieve the subplot in the generated Prophets matplotlib figure
    ax = fig.get_axes()[0]
    sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':'white'})
    
    start = 0
#     end = today_index + predict_days # Original code
    end = df.shape[0]
    x_pydatetime = df['ds'].dt.to_pydatetime()
    # highlight the actual values of the entire time frame
    ax.plot(x_pydatetime[start:end],
            df.y[start:end],
            color='black', label='Actual')

    # plot each outlier in red dot and annotate the date
    for outlier in outliers:
        ax.scatter(outlier[0], outlier[1], s=16, color='red', label='Anomaly')
#         ax.text(outlier[0], outlier[1], str(outlier[0])[:10], color='red', fontsize=6)

#     # re-organize the legend
#     patch1 = mpatches.Patch(color='red', label='Anomaly')
#     patch2 = mpatches.Patch(color='orange', label='Actual')
#     patch3 = mpatches.Patch(color='skyblue', label='Predict and interval')
#     patch4 = mpatches.Patch(color='grey', label='Baseline area')
#     plt.legend(handles=[patch1, patch2, patch3, patch4], loc='upper left')

*Normal day*

In [31]:
# Single instance example
fig, forecast, model, df, model, today_index, lookback_n, predict_n = prophet_CV_free(df_dev,
                                                                               device,
                                                                               parameter='co2',
                                                                               begin='2019-03-17',
                                                                               end='2019-03-29',
                                                                               sampling_period_min=1,
                                                                               graph=1,
                                                                               predict_day=1,
                                                                               interval_width=0.6,
                                                                               changepoint_prior_scale=0.01,
                                                                               daily_fo = 3)

Full dataset: 2019-03-07 to the 2019-05-01. Analysed data the 2019-03-17 to the 2019-03-29.
o Trained on the data from the 2019-03-17 to the 2019-03-28 (10 days).
o Predict from the 2019-03-28 to the 2019-03-29 (1 days).


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [32]:
# Retrieve the subplot in the generated Prophets matplotlib figure
ax = fig.get_axes()[0]

In [33]:
# Set background to white
ax.set_facecolor('white')

In [34]:
# Remove the grid
# ax.set_axisbelow(True)
ax.yaxis.grid(color='white', linestyle='dashed')
ax.xaxis.grid(color='white', linestyle='dashed')

In [35]:
# Format the date
myFmt = DateFormatter("%d/%m")
ax.xaxis.set_major_formatter(myFmt)

In [36]:
outliers, df_pred = get_outliers(df, forecast, today_index, predict_days=predict_n)

In [37]:
today_index = find_index(df, '2019-03-26', starting_time='21:00')

4319


In [38]:
prophet_plot_show(df, fig, today_index, predict_days=predict_n, outliers=outliers)

*Abnormal day*

In [22]:
# Single instance example
fig, forecast, model, df, model, today_index, lookback_n, predict_n = prophet_CV_free(df_dev,
                                                                               device,
                                                                               parameter='co2',
                                                                               begin='2019-03-20',
                                                                               end='2019-04-01',
                                                                               sampling_period_min=1,
                                                                               graph=1,
                                                                               predict_day=1,
                                                                               interval_width=0.7,
                                                                               changepoint_prior_scale=0.01,
                                                                               daily_fo = 3)

Full dataset: 2019-03-07 to the 2019-05-01. Analysed data the 2019-03-20 to the 2019-04-01.
o Trained on the data from the 2019-03-20 to the 2019-03-31 (10 days).
o Predict from the 2019-03-31 to the 2019-04-01 (1 days).


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [23]:
ax = fig.get_axes()[0]
ax.set_facecolor('white')
ax.yaxis.grid(color='white', linestyle='dashed')
ax.xaxis.grid(color='white', linestyle='dashed')
myFmt = DateFormatter("%d/%m")
ax.xaxis.set_major_formatter(myFmt)
outliers, df_pred = get_outliers(df, forecast, today_index, predict_days=predict_n)
prophet_plot_show(df, fig, today_index, predict_days=predict_n, outliers=outliers)

In [None]:
mape_table