# Peak prediction model
Create a model that, given hourly load and temperature data, return a dispatch decision for the following day. This decision will be informed by 1-day prediction, 2-day prediction, and historical peak minimums.

In [5]:
import pandas as pd
from scipy.stats import zscore
import pickle
import time
import numpy as np
from sklearn.linear_model import LinearRegression
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import loadForecast as lf

%matplotlib inline

# Create daily data, find correct answers

In [None]:
# date, max_load
d_df = pd.DataFrame()
df['date'] = df.dates.dt.date
d_df['max_load'] = df.groupby('date')['load'].max()
d_df['date'] = df['date'].unique().astype('datetime64')
d_df['year'] = d_df['date'].dt.year
d_df['month'] = d_df['date'].dt.month
d_df['day'] = d_df['date'].dt.day
d_df.head()

In [122]:
# get the correct answers for every month
l = []
for y in d_df['year'].unique():
    d = d_df[d_df['year'] == y]
    l.extend(d.groupby('month')['max_load'].idxmax())
d_df['should_dispatch'] = [(i in l) for i in d_df.index]
d_df[d_df['should_dispatch']].shape
# d_df[d_df['should_dispatch'] & (d_df['month'] == 4)]
l

[datetime.date(2002, 1, 3),
 datetime.date(2002, 2, 27),
 datetime.date(2002, 3, 4),
 datetime.date(2002, 4, 29),
 datetime.date(2002, 5, 1),
 datetime.date(2002, 6, 12),
 datetime.date(2002, 7, 25),
 datetime.date(2002, 8, 7),
 datetime.date(2002, 9, 5),
 datetime.date(2002, 10, 3),
 datetime.date(2002, 11, 27),
 datetime.date(2002, 12, 6),
 datetime.date(2003, 1, 24),
 datetime.date(2003, 2, 25),
 datetime.date(2003, 3, 6),
 datetime.date(2003, 4, 30),
 datetime.date(2003, 5, 30),
 datetime.date(2003, 6, 25),
 datetime.date(2003, 7, 22),
 datetime.date(2003, 8, 6),
 datetime.date(2003, 9, 9),
 datetime.date(2003, 10, 22),
 datetime.date(2003, 11, 24),
 datetime.date(2003, 12, 17),
 datetime.date(2004, 1, 6),
 datetime.date(2004, 2, 13),
 datetime.date(2004, 3, 18),
 datetime.date(2004, 4, 22),
 datetime.date(2004, 5, 28),
 datetime.date(2004, 6, 18),
 datetime.date(2004, 7, 16),
 datetime.date(2004, 8, 3),
 datetime.date(2004, 9, 17),
 datetime.date(2004, 10, 20),
 datetime.date(2004

# 1-day and 2-day forecasts

In [71]:
df = pd.read_csv('hourly/NCENT.csv', parse_dates=['dates'])
df['year'] = df['dates'].dt.year
df['month'] = df['dates'].dt.month
df['day'] = df['dates'].dt.day
df['hour'] = df['dates'].dt.hour

all_X_1 = lf.makeUsefulDf(df, noise=2.5, hours_prior=24)
all_X_2 = lf.makeUsefulDf(df, noise=4, hours_prior=48)
all_y = df['load']

p1, a1 = lf.neural_net_predictions(all_X_1, all_y)
p2, a2 = lf.neural_net_predictions(all_X_2, all_y)

In [84]:
p1_max = [max(p1[i:i+24]) for i in range(0, len(p1), 24)]
p2_max = [max(p2[i:i+24]) for i in range(0, len(p2), 24)]
a1, a2

({'test': 2.171315217977105, 'train': 1.9426664351769887},
 {'test': 2.907583554651494, 'train': 2.735516462464462})

# Create threshold

In [66]:
# what is the monthly threshold on prior years
max_vals = {}
for y in d_df['year'].unique()[:-1]:
    d = d_df[d_df['year'] == y]
    max_vals[y] = list(d.groupby('month')['max_load'].max())

df_thresh = pd.DataFrame(max_vals).T
thresholds = [None]*12
for i in range(12):
    thresholds[i] = df_thresh[i].min()

# Make dispatch decisions

In [127]:
df_dispatch = pd.DataFrame()
df_dispatch['should_dispatch'] = d_df[d_df['year'] == 2018]['should_dispatch']
df_dispatch['load'] = d_df['max_load']
df_dispatch['1-day'] = p1_max
df_dispatch['2-day'] = p2_max
df_dispatch['month'] = d_df['month']
df_dispatch['threshold'] = df_dispatch['month'].apply(lambda x: thresholds[x-1])
df_dispatch['above_threshold'] = df_dispatch['1-day'] >= df_dispatch['threshold']
df_dispatch['2-day_lower'] = df_dispatch['2-day'] <= df_dispatch['1-day']

highest = [-1*float('inf')]*12
dispatch_highest = [False]*365
for i, (l, m) in enumerate(zip(df_dispatch['1-day'], df_dispatch['month'])):
    if l >= highest[m-1]:
        dispatch_highest[i] = True
        highest[m-1] = l

df_dispatch['highest_so_far'] = dispatch_highest
df_dispatch['dispatch'] = df_dispatch['highest_so_far'] & df_dispatch['2-day_lower'] & df_dispatch['above_threshold']
df_dispatch.head()

Unnamed: 0_level_0,should_dispatch,1-day,2-day,month,threshold,above_threshold,2-day_lower,actual_load,highest_so_far,dispatch
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-01-01,False,21727.0,20115.652344,1,13638.864085,True,True,20338.611561,True,True
2018-01-02,False,23110.335938,21471.070312,1,13638.864085,True,True,22168.940606,True,True
2018-01-03,False,22742.78125,20949.039062,1,13638.864085,True,True,22017.339622,False,False
2018-01-04,False,19999.328125,18993.402344,1,13638.864085,True,True,19569.618815,False,False
2018-01-05,False,18073.220703,17213.603516,1,13638.864085,True,True,17602.195958,False,False


# Compare to correct answers

In [124]:
pre = np.array(df_dispatch['dispatch'])
ans = np.array(df_dispatch['should_dispatch'])

def recall(ans, pre):
    true_positive = sum(ans & pre)
    false_negative = sum(ans & (~ pre))
    return true_positive / (true_positive + false_negative + 1e-7)
def precision(ans, pre):
    true_positive = sum(ans & pre)
    false_positive = sum((~ ans) & pre)
    return (true_positive)/(true_positive + false_positive + 1e-7)
def peaks_missed(ans, pre):
    return sum(ans & (~ pre))
def unnecessary_dispatches(ans, pre):
    return sum((~ ans) & pre)

print(recall(ans, pre), precision(ans, pre), peaks_missed(ans, pre), unnecessary_dispatches(ans, pre))

0.9166666590277779 0.1896551720868014 1 47
