In [1]:
import utils
import os
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"  # or choose another backend if you wish
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, DatetimeTickFormatter
from bokeh.models.tools import HoverTool
from bokeh.io import output_notebook
import sklearn
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import datetime
import keras
from keras.models import Sequential, Model
from keras.layers import Embedding, Flatten, Dense, Dropout, Input

Using plaidml.keras.backend backend.


In [2]:
output_notebook()

In [3]:
train, test = utils.load_data()

Now, we will normalize the data.

In [4]:
# should only be computed using training data
mean = train.mean(axis=0)
# merged -= mean
std = train.std(axis=0)
# merged /= std
# merged.head()


In [5]:
mean

pal_mean       6101.896693
day_of_year     181.988110
weekday           2.993148
week             26.374244
month             6.486900
PRCP              0.144359
TMAX             62.929665
TMIN             48.726925
dtype: float64

In [None]:
# one-hot encoding by default
utils.preprocess(train)
utils.preprocess(test)

In [8]:
train_df = merged.sample(frac=0.8)
labels = train_df['pal_mean'].tolist()
train_df.drop(columns=['pal_mean'], inplace=True)
train_df.head()

Unnamed: 0,weekday,week,month,PRCP,TMAX,TMIN
2011-12-19,-1.500023,1.628484,1.592439,-0.355005,-0.882435,-1.252508
2009-03-30,-1.500023,-0.822792,-1.00938,-0.355005,-0.552386,-0.470274
2011-02-28,-1.500023,-1.154046,-1.298471,0.7305,-0.167329,-0.530446
2018-02-03,1.000318,-1.419048,-1.298471,-0.355005,-1.762565,-1.97457
2017-10-02,-1.500023,0.899726,1.014257,-0.355005,0.547777,0.191616


In [9]:
main_input = Input(shape=(train_df.shape[1],), name='main_input')

x = Dense(64, activation='relu')(main_input)
x = Dropout(.25)(x)
x = Dense(64, activation='relu')(x)

output = Dense(1, name='output')(x)

model = Model(inputs=[main_input], outputs=output)
model.summary()

INFO:plaidml:Opening device "metal_amd_radeon_rx_580.0"


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
main_input (InputLayer)      (None, 6)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                448       
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
output (Dense)               (None, 1)                 65        
Total params: 4,673
Trainable params: 4,673
Non-trainable params: 0
_________________________________________________________________


In [10]:
model.compile(optimizer='rmsprop',
              loss='mse',
              metrics=['mae'])
history = model.fit([train_df], labels,
                    epochs=50,
                    batch_size=32,
                    validation_split=0.2,
                   )

Train on 3528 samples, validate on 882 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [11]:
source = pd.DataFrame(history.history)
source['epoch'] = source.index + 1
acc_plot = figure(plot_width=800, plot_height=400, x_axis_label='Epoch', y_axis_label='MAE')
acc_plot.circle(x='epoch', y='mean_absolute_error', source=source, size=10, fill_alpha=.5, legend_label='MAE')
acc_plot.line(x='epoch', y='val_mean_absolute_error', source=source, line_width=2, legend_label='Val MAE', alpha=0.5)
show(acc_plot)

In [12]:
loss_plot = figure(plot_width=800, plot_height=400, x_axis_label='Epoch', y_axis_label='Loss')
loss_plot.circle(x='epoch', y='loss', source=source, size=10, fill_alpha=.5, legend_label='Loss')
loss_plot.line(x='epoch', y='val_loss', source=source, line_width=2, legend_label='Val Loss', alpha=0.5)
show(loss_plot)
# get_screenshot_as_png(loss_plot, driver=None)

In [13]:
predictions = model.predict(merged.copy().drop(columns=['pal_mean']))
results = merged.copy()
results['prediction'] = predictions
results['pal_mean'] *= std.pal_mean
results['pal_mean'] += mean.pal_mean
results['prediction'] *= std.pal_mean
results['prediction'] += mean.pal_mean
results['date'] = results.index
results

Unnamed: 0,pal_mean,weekday,week,month,PRCP,TMAX,TMIN,prediction,date
2009-02-27,5893.711765,0.500249,-1.154046,-1.298471,-0.256322,-0.112321,-0.349931,5509.038086,2009-02-27
2014-06-03,6732.903082,-0.999955,-0.226536,-0.142107,-0.058958,1.317891,0.973849,7070.421387,2014-06-03
2013-08-21,7813.025424,-0.499887,0.502222,0.436075,-0.355005,1.482915,1.395052,8022.878906,2013-08-21
2013-07-10,8876.531724,-0.499887,0.104718,0.146984,-0.355005,1.207874,1.575567,8013.886230,2013-07-10
2017-01-27,5773.755903,0.500249,-1.485299,-1.587562,-0.355005,-1.102468,-0.771134,5719.138184,2017-01-27
...,...,...,...,...,...,...,...,...,...
2006-09-05,6174.697674,-0.999955,0.634723,0.725166,0.779842,0.327744,0.733162,6287.090820,2006-09-05
2013-04-15,5483.545486,-1.500023,-0.690291,-0.720289,-0.355005,-0.222337,-0.289759,5473.591797,2013-04-15
2014-09-01,7406.609247,-1.500023,0.634723,0.725166,-0.355005,1.372899,1.575567,7962.198242,2014-09-01
2019-05-14,5299.572069,-0.999955,-0.425288,-0.431198,0.385112,-0.607394,-0.289759,5537.526367,2019-05-14


In [14]:
results = results.sort_index()
# results = results[-100:]
results

Unnamed: 0,pal_mean,weekday,week,month,PRCP,TMAX,TMIN,prediction,date
2005-02-01,5580.662630,-0.999955,-1.419048,-1.298471,-0.355005,-1.267492,-1.433024,5971.286133,2005-02-01
2005-02-02,5535.835517,-0.499887,-1.419048,-1.298471,-0.355005,-1.267492,-1.252508,5943.454102,2005-02-02
2005-02-03,5514.954639,0.000181,-1.419048,-1.298471,-0.305664,-1.212484,-1.192336,5940.933105,2005-02-03
2005-02-04,5717.773469,0.500249,-1.419048,-1.298471,0.311101,-0.937443,-0.891477,5712.355957,2005-02-04
2005-02-05,5130.559122,1.000318,-1.419048,-1.298471,-0.355005,-0.552386,-0.650790,5158.868164,2005-02-05
...,...,...,...,...,...,...,...,...,...
2020-03-26,4709.563448,0.000181,-0.889043,-1.009380,-0.355005,-0.167329,-0.650790,5568.157715,2020-03-26
2020-03-27,4544.689236,0.500249,-0.889043,-1.009380,-0.355005,0.327744,0.071272,5460.233887,2020-03-27
2020-03-28,4425.012329,1.000318,-0.889043,-1.009380,0.755171,-0.497378,-0.289759,4976.417480,2020-03-28
2020-03-29,4447.838255,1.500386,-0.889043,-1.009380,-0.231652,-0.882435,-0.289759,4859.572266,2020-03-29


In [15]:
# results = ColumnDataSource(results)
pred_plot = figure(plot_width=1200, plot_height=600, x_axis_label='Date', y_axis_label='Usage')
pred_plot.circle(x='date', y='pal_mean', source=results, size=10, fill_alpha=.5, legend_label='Actual')
pred_plot.triangle(x='date', y='prediction', source=results, size=10, fill_alpha=.5, legend_label='Prediction', color='green')
pred_plot.line(x='date', y='prediction', source=results, alpha=.5, legend_label='Prediction', color='green')
pred_plot.xaxis.formatter=DatetimeTickFormatter()
show(pred_plot)
# get_screenshot_as_png(pred_plot, driver=None)