In [None]:
import json
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from fbprophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error
plt.style.use('fivethirtyeight') # For plots

In [None]:
data = json.load(open('input/history.json',))
timeseries_df = pd.DataFrame(data["timeSeries"])

In [None]:
timeseries_df.head()

In [None]:
timeseries_df = timeseries_df.rename(columns={"date": "ds", "value": "y"})
timeseries_df["ds"] = pd.to_datetime(timeseries_df["ds"], errors='raise', cache=True, utc=True)
timeseries_df["ds"] = timeseries_df["ds"].dt.tz_convert(None)

In [None]:
timeseries_df.info()

In [None]:
timeseries_df.info()

In [None]:
color_pal = ["#F8766D", "#D39200", "#93AA00",
             "#00BA38", "#00C19F", "#00B9E3",
             "#619CFF", "#DB72FB"]
timeseries_df.plot(y="y", x="ds", figsize=(15,5), color=color_pal[0], title='History')
plt.show()

In [None]:
def create_features(df, label=None):
    """
    Creates time series features from datetime index.
    """
    df = df.copy()
    df['ds'] = df['ds']
    df['hour'] = df['ds'].dt.hour
    df['dayofweek'] = df['ds'].dt.dayofweek
    df['quarter'] = df['ds'].dt.quarter
    df['month'] = df['ds'].dt.month
    df['year'] = df['ds'].dt.year
    df['dayofyear'] = df['ds'].dt.dayofyear
    df['dayofmonth'] = df['ds'].dt.day
    df['weekofyear'] = df['ds'].dt.isocalendar().week
    
    X = df[['hour','dayofweek','quarter','month','year',
           'dayofyear','dayofmonth','weekofyear']]
    if label:
        y = df[label]
        return X, y
    return X

X, y = create_features(timeseries_df, label='y')

features_and_target = pd.concat([X, y], axis=1)

In [None]:
features_and_target.head()

In [None]:
sns.pairplot(features_and_target.dropna(),
             hue='hour',
             x_vars=['hour','dayofweek',
                     'year','weekofyear'],
             y_vars='y',
             height=5,
             plot_kws={'alpha':0.15, 'linewidth':0}
            )
plt.suptitle('Power Use MW by Hour, Day of Week, Year and Week of Year')
plt.show()

In [None]:
split_date = '01-Apr-2021'
timeseries_df_train = timeseries_df.loc[timeseries_df["ds"] <= split_date].copy()
timeseries_df_test = timeseries_df.loc[timeseries_df["ds"] > split_date].copy()


In [None]:
# Plot train and test so you can see where we have split
timeseries_df_test.set_index("ds") \
    .join(timeseries_df_train.set_index("ds").rename(columns={'y': 'TRAINING SET'}),
          how='outer') \
    .plot(figsize=(15,5), title='test/train');

In [None]:
# Format data for prophet model using ds and y
timeseries_df_train.head()

In [None]:
model = Prophet()
model.fit(timeseries_df_train)

In [None]:
timeseries_df_test_forecast = model.predict(df=timeseries_df_test)

In [None]:
timeseries_df_test_forecast.head()

In [None]:
# Plot the forecast
f, ax = plt.subplots(1)
f.set_figheight(5)
f.set_figwidth(15)
fig = model.plot(timeseries_df_test_forecast, ax=ax)
plt.show()

In [None]:
fig = model.plot_components(timeseries_df_test_forecast)

In [None]:
# Plot the forecast with the actuals
f, ax = plt.subplots(1)
f.set_figheight(5)
f.set_figwidth(15)
ax.scatter(timeseries_df_test.set_index("y"), timeseries_df_test['y'], color='r')
fig = model.plot(timeseries_df_test_forecast, ax=ax)

In [None]:
df_german_holidays = pd.read_csv(
    "./input/german_holidays.csv",
    parse_dates=["Tag"],
    sep=";"
    )
# df_german_holidays = df_german_holidays.loc[["Tag", "Feiertage"]]
df_holidays = df_german_holidays.rename(columns={"Tag": "ds", "Feiertage": "holiday"})[["ds", "holiday"]]

# # df_holidays= pd.DataFrame(df_german_holidays)
df_holidays.head()

In [None]:
df_holidays.info()

In [None]:
model_with_holidays = Prophet(holidays=df_holidays)
model_with_holidays.fit(timeseries_df_train)

In [None]:
# Predict on training set with model
timeseries_df_test_forecast_holidays = model_with_holidays.predict(df=timeseries_df_test)

In [None]:
fig2 = model_with_holidays.plot_components(timeseries_df_test_forecast_holidays)

In [None]:
# Plot the forecast with the actuals
f, ax = plt.subplots(1)
f.set_figheight(5)
f.set_figwidth(15)
ax.scatter(timeseries_df_test.set_index("y"), timeseries_df_test['y'], color='r')
fig = model.plot(timeseries_df_test_forecast_holidays, ax=ax)

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    """Calculates MAPE given y_true and y_pred"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def calc_errors(df_forecast,df_test ):
    mse = mean_squared_error(y_true=df_test['y'], y_pred=df_forecast['yhat'])
    mae = mean_absolute_error(y_true=df_test['y'], y_pred=df_forecast['yhat'])
    mape = mean_absolute_percentage_error(y_true=df_test['y'],y_pred=df_forecast['yhat'])

    print(f'mse:{mse} mae: {mae} mape:{mape}')

In [None]:
calc_errors(timeseries_df_test_forecast, timeseries_df_test)

In [None]:
calc_errors(timeseries_df_test_forecast_holidays, timeseries_df_test)