In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from pyfiles import graph_creation
import time
import holidays
import pickle
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
df = pd.read_excel('../GEFCom2014 Data/GEFCom2014-E.xlsx')
df['dow'] = df.Date.apply(lambda x: x.dayofweek)
df['doy'] = df.Date.apply(lambda x: x.dayofyear)
df['month'] = df.Date.apply(lambda x: x.month)
df = df[df.load.isnull().sum():]
ush = holidays.US()
df['is_holiday'] = 1 * df.Date.apply(lambda x: x in ush)
df = df.reindex(columns=('doy', 'month', 'dow', 'Hour', 'is_holiday', 'T', 'load', 'Date'))
offset = df.load.mean()
scale = df.load.std()
df.load -= df.load.mean()
df.load /= df.load.std()
df['T'] -= df['T'].mean()
df['T'] /= df['T'].std()
del df['Date'], df['doy']


df.month = np.cos(2*np.pi/12*df.month)
df.Hour = np.cos(2*np.pi/24*df.Hour)
df.dow = np.cos(2*np.pi/7*df.dow)

In [None]:
df.head()

In [None]:
train = df[:7*len(df)//8]
# val = df[3*len(df)//4:7*len(df)//8]
test = df[7*len(df)//8:]

In [None]:
nt = np.array(train)
# nv = np.array(val)
ntt = np.array(test)
x_train = nt[:,:-1]
y_train = nt[:,-1]
x_val = ntt[:,:-1]
y_val = ntt[:,-1]

In [None]:
def row2seq_rnn(data, ys, past=24, future=24):
    xl = []
    xfl = []
    yl = []
    for i in range(past, len(data)-future):
        tmp1 = data[i-past:i]
        tmp2 = ys[i-past:i, None]
        xfl.append(data[i:i+future])
        xl.append(np.append(tmp1, tmp2, axis=1))
        yl.append(ys[i:i+future])
    return np.array(xl), np.array(xfl), np.array(yl)
def batch(*vars, size=512):
    for i in range(0, min(len(v) for v in vars), size):
        yield (v[i:i+size] for v in vars)

In [None]:
xt, xtf, yt = row2seq_rnn(x_train, y_train)
xv, xvf, yv = row2seq_rnn(x_val, y_val)
xtt, xttf, ytt = row2seq_rnn(ntt[:,:-1], ntt[:,-1])

In [None]:
hidden_dim = 32

In [None]:
g = tf.Graph()
with g.as_default():
    x = tf.placeholder(tf.float32, (None, None, xt.shape[2]), name='x_past')
    y = tf.placeholder(tf.float32, (None, None), name='y')
    xf = tf.placeholder(tf.float32, (None, None, xtf.shape[2]), name='x_future')
    keep_prob = tf.placeholder_with_default(1.0, (), name='keep_prob')
    is_training = tf.placeholder_with_default(False, (), name='is_training')
    regularization = tf.placeholder_with_default(0.005, (), name='regularization')
    
    out_weight = tf.Variable(tf.random_normal((hidden_dim,))/hidden_dim, dtype=tf.float32, name='out_weight')
    out_bias = tf.Variable(tf.zeros(1), dtype=tf.float32, name='out_bias')
    
    # Basic seq2seq LSTM
    outputs, _  = graph_creation.s2s_lstm_fixed(x, xf, hidden_dim, 2, use_bn=True, is_training=is_training, keep_prob=keep_prob, project=True)
    
    preds = tf.add(tf.einsum('ijk,k->ij', outputs, out_weight), out_bias, name='predictions')
    loss = tf.reduce_mean((y-preds)**2)
    reg_loss = tf.nn.l2_loss(out_weight) * regularization / tf.cast(tf.shape(x)[0], tf.float32)
    step = tf.train.AdamOptimizer().minimize(loss+reg_loss)
    saver = tf.train.Saver()

In [None]:
try:
    while True:
        tf.get_default_session().close()
except:
    pass
sess = tf.InteractiveSession(graph=g)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())

In [None]:
def train_epoch(bs, keep_p=1.0):
    perm = np.random.permutation(len(xt))
    errors = []
    for xs, xfs, ys in batch(xt[perm], xtf[perm], yt[perm], size=bs):
        _, l = sess.run((step, loss), feed_dict={x:xs, xf:xfs, y:ys, keep_prob:keep_p, is_training:True})
        errors.append(l)
    return errors
def evaluate(bs):
    l = []
    s = 0
    for xs, xfs, ys in batch(xtt, xttf, ytt, size=bs):
        l.append(sess.run(loss, feed_dict={x:xs, xf:xfs, y:ys})*len(ys))
        s += len(ys)
    return sum(l)/s

In [None]:
bs = 1024*2

In [None]:
train_errs = []
test_errs = []
for i in range(len(test_errs), 300):
    start = time.time()
    train_err = train_epoch(bs, 0.7)
    train_errs.extend(train_err)
    test_errs.append(evaluate(bs*2))
    end = time.time()
    if i>1 and test_errs[-1]==min(test_errs):
        saver.save(sess, './eecs484results/2L48h_bn_0.3d_24p_24f_project/model', global_step=len(test_errs))
    print(f'Epoch {i} ({end-start:.2f}s): train_loss={train_errs[-1]:.4f}, test_loss={test_errs[-1]:.4f}')

In [None]:
def predict(bs):
    l = []
    for xs, xfs, ys in batch(xtt, xttf,  ytt, size=bs):
        l.append(sess.run(preds, feed_dict={x:xs, xf:xfs, y:ys}))
    return np.concatenate(l,0)

In [None]:
pos = predict(bs*2)*scale + offset
yov = ytt*scale + offset
loss_by_horizon = ((yov-pos)**2).mean(0).astype(np.float32)
loss_by_horizon