In [1]:
import utils
import os
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"  # or choose another backend if you wish
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, DatetimeTickFormatter
from bokeh.models.tools import HoverTool
from bokeh.io import output_notebook
import sklearn
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import datetime
import keras
from keras.models import Sequential, Model
from keras.layers import Embedding, Flatten, Dense, Dropout, Input

Using plaidml.keras.backend backend.


In [2]:
output_notebook()

In [3]:
weather_data = utils.load_weather_data()
actual_load = utils.load_pal_data()
print(weather_data.shape)
print(actual_load.shape)

(5537, 6)
(5513, 3)


In [4]:
merged = actual_load.join(weather_data, how='inner')
print(merged.shape)
if merged.isnull().values.any():
    print('Null values detected in dataset!')

(5513, 9)


In [5]:
merged = merged.drop(columns=['pal_min', 'pal_max'])

In [6]:
merged = merged.sample(frac=1)  # to be changed later
merged.head()

Unnamed: 0,pal_mean,weekday,week,month,PRCP,TMAX,TMIN
2016-09-11,7348.465116,6,36,9,0.0,83.0,67.0
2016-04-02,4924.972759,5,13,4,0.16,61.0,49.0
2009-11-03,5702.375347,1,45,11,0.0,60.0,43.0
2005-09-07,6958.647766,2,36,9,0.0,82.0,62.0
2006-04-30,4740.615972,6,17,4,0.0,70.0,44.0


Now, we will normalize the data.

In [7]:
# should only be computed using training data
mean = merged.mean(axis=0)
merged -= mean
std = merged.std(axis=0)
merged /= std
merged.head()

Unnamed: 0,pal_mean,weekday,week,month,PRCP,TMAX,TMIN
2016-09-11,1.287142,1.500386,0.634723,0.725166,-0.355005,1.097858,1.094193
2016-04-02,-1.216176,1.000318,-0.889043,-0.720289,0.039724,-0.112321,0.0111
2009-11-03,-0.413167,-0.999955,1.23098,1.303348,-0.355005,-0.167329,-0.349931
2005-09-07,0.884485,-0.499887,0.634723,0.725166,-0.355005,1.04285,0.793334
2006-04-30,-1.406605,1.500386,-0.62404,-0.720289,-0.355005,0.382752,-0.289759


In [8]:
train_df = merged.sample(frac=0.8)
labels = train_df['pal_mean'].tolist()
train_df.drop(columns=['pal_mean'], inplace=True)
train_df.head()

Unnamed: 0,weekday,week,month,PRCP,TMAX,TMIN
2017-07-01,1.000318,-0.027784,0.146984,0.212418,1.207874,1.33488
2013-12-14,1.000318,1.562233,1.592439,1.445947,-1.597541,-1.613539
2016-04-26,-0.999955,-0.62404,-0.720289,0.237089,-0.167329,-0.109244
2012-03-04,1.500386,-1.154046,-1.00938,-0.355005,-0.992451,-0.771134
2006-02-18,1.000318,-1.286547,-1.298471,-0.355005,-1.542533,-2.034742


In [9]:
main_input = Input(shape=(train_df.shape[1],), name='main_input')

x = Dense(64, activation='relu')(main_input)
x = Dropout(.25)(x)
x = Dense(64, activation='relu')(x)

output = Dense(1, name='output')(x)

model = Model(inputs=[main_input], outputs=output)
model.summary()

INFO:plaidml:Opening device "metal_amd_radeon_rx_580.0"


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
main_input (InputLayer)      (None, 6)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                448       
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
output (Dense)               (None, 1)                 65        
Total params: 4,673
Trainable params: 4,673
Non-trainable params: 0
_________________________________________________________________


In [10]:
model.compile(optimizer='rmsprop',
              loss='mse',
              metrics=['mae'])
history = model.fit([train_df], labels,
                    epochs=50,
                    batch_size=32,
                    validation_split=0.2,
                   )

Train on 3528 samples, validate on 882 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [11]:
source = pd.DataFrame(history.history)
source['epoch'] = source.index + 1
acc_plot = figure(plot_width=800, plot_height=400, x_axis_label='Epoch', y_axis_label='MAE')
acc_plot.circle(x='epoch', y='mean_absolute_error', source=source, size=10, fill_alpha=.5, legend_label='MAE')
acc_plot.line(x='epoch', y='val_mean_absolute_error', source=source, line_width=2, legend_label='Val MAE', alpha=0.5)
show(acc_plot)

In [12]:
loss_plot = figure(plot_width=800, plot_height=400, x_axis_label='Epoch', y_axis_label='Loss')
loss_plot.circle(x='epoch', y='loss', source=source, size=10, fill_alpha=.5, legend_label='Loss')
loss_plot.line(x='epoch', y='val_loss', source=source, line_width=2, legend_label='Val Loss', alpha=0.5)
show(loss_plot)

In [13]:
predictions = model.predict(merged.copy().drop(columns=['pal_mean']))
results = merged.copy()
results['prediction'] = predictions
results['pal_mean'] *= std.pal_mean
results['pal_mean'] += mean.pal_mean
results['prediction'] *= std.pal_mean
results['prediction'] += mean.pal_mean
results['date'] = results.index
results

Unnamed: 0,pal_mean,weekday,week,month,PRCP,TMAX,TMIN,prediction,date
2016-09-11,7348.465116,1.500386,0.634723,0.725166,-0.355005,1.097858,1.094193,6171.164551,2016-09-11
2016-04-02,4924.972759,1.000318,-0.889043,-0.720289,0.039724,-0.112321,0.011100,4980.407715,2016-04-02
2009-11-03,5702.375347,-0.999955,1.230980,1.303348,-0.355005,-0.167329,-0.349931,5576.588379,2009-11-03
2005-09-07,6958.647766,-0.499887,0.634723,0.725166,-0.355005,1.042850,0.793334,6619.808594,2005-09-07
2006-04-30,4740.615972,1.500386,-0.624040,-0.720289,-0.355005,0.382752,-0.289759,4762.888672,2006-04-30
...,...,...,...,...,...,...,...,...,...
2007-07-25,7445.016263,-0.499887,0.237219,0.146984,-0.355005,1.097858,1.094193,7230.652344,2007-07-25
2017-08-06,5702.728621,1.500386,0.303470,0.436075,-0.355005,0.712801,0.913677,5713.123535,2017-08-06
2018-11-11,4976.853819,1.500386,1.230980,1.303348,-0.355005,-0.937443,-0.951649,5092.887207,2018-11-11
2018-03-19,5683.512847,-1.500023,-0.955294,-1.009380,-0.355005,-0.882435,-0.951649,5628.737793,2018-03-19


In [14]:
results = results.sort_index()
# results = results[-100:]
results

Unnamed: 0,pal_mean,weekday,week,month,PRCP,TMAX,TMIN,prediction,date
2005-02-01,5580.662630,-0.999955,-1.419048,-1.298471,-0.355005,-1.267492,-1.433024,5889.156250,2005-02-01
2005-02-02,5535.835517,-0.499887,-1.419048,-1.298471,-0.355005,-1.267492,-1.252508,5869.904297,2005-02-02
2005-02-03,5514.954639,0.000181,-1.419048,-1.298471,-0.305664,-1.212484,-1.192336,5862.766602,2005-02-03
2005-02-04,5717.773469,0.500249,-1.419048,-1.298471,0.311101,-0.937443,-0.891477,5718.160645,2005-02-04
2005-02-05,5130.559122,1.000318,-1.419048,-1.298471,-0.355005,-0.552386,-0.650790,5196.727051,2005-02-05
...,...,...,...,...,...,...,...,...,...
2020-03-26,4709.563448,0.000181,-0.889043,-1.009380,-0.355005,-0.167329,-0.650790,5571.223633,2020-03-26
2020-03-27,4544.689236,0.500249,-0.889043,-1.009380,-0.355005,0.327744,0.071272,5370.293457,2020-03-27
2020-03-28,4425.012329,1.000318,-0.889043,-1.009380,0.755171,-0.497378,-0.289759,5111.006836,2020-03-28
2020-03-29,4447.838255,1.500386,-0.889043,-1.009380,-0.231652,-0.882435,-0.289759,4925.034180,2020-03-29


In [15]:
# results = ColumnDataSource(results)
pred_plot = figure(plot_width=1200, plot_height=600, x_axis_label='Date', y_axis_label='Usage')
pred_plot.circle(x='date', y='pal_mean', source=results, size=10, fill_alpha=.5, legend_label='Actual')
pred_plot.triangle(x='date', y='prediction', source=results, size=10, fill_alpha=.5, legend_label='Prediction', color='green')
pred_plot.line(x='date', y='prediction', source=results, alpha=.5, legend_label='Prediction', color='green')
pred_plot.xaxis.formatter=DatetimeTickFormatter()
show(pred_plot)