# Recurrent Neural Network

Let's try throwing a neural network at this problem. (This was my ultimate goal all along.)  We'll give input method the day of the week, time of day, day of the year, and temperature.    This first version uses a single recurrent cell, with a linear combination at the end.  This could be enhanced by making deeper networks at both the beginning and end, using a fancier cell (LSTM, GRU).  

This desperately needs some regularization (dropout)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from get_weather_data import convert_isd_to_df, convert_state_isd
from EBA_util import remove_na, avg_extremes

%matplotlib inline

import tensorflow as tf
from tensorflow.contrib.layers import fully_connected
from tensorflow.contrib.rnn import BasicRNNCell,LSTMCell

In [2]:
#Extend to multiple temperature series
try:
    df_joint=pd.read_csv('data/pdx_joint.txt',
        index_col=0, parse_dates=True)
    print('Read in PDX Frame from file')
except:
    print('Creating PDX DataFrame from scratch')
    air_df = pd.read_csv('data/air_code_df.gz')
    #Just get the weather station data for cities in Oregon.
    df_weather=convert_state_isd(air_df,'OR')
    #Select temperature for Portland, OR
    #msk1=np.array(df_weather['city']=='Portland')
    msk2=np.array(df_weather['state']=='OR')
    df_pdx_weath=df_weather.loc[msk2]
    #find number of unique station city/state combinations
    Nstation = len(df_pdx_weath['city, state'].unique())

    #reshape the single temperature column into Nstation copies.  
    unique_station=df_pdx_weath['city, state'].unique()
    temp_df=pd.DataFrame()
    for station in unique_station:
        colname=str('Temp-'+station)
        temp_df[colname]=df_pdx_weath.loc[df_pdx_weath['city, state']==station,'Temp']

    #get electricity data for Portland General Electric
    df_eba=pd.read_csv('data/EBA_time.gz',index_col=0,parse_dates=True)
    msk=df_eba.columns.str.contains('Portland')
    df_pdx=df_eba.loc[:,msk]
    #select out demand data
    msk1 = df_pdx.columns.str.contains('[Dd]emand') 
    dem=df_pdx.loc[:,msk1]
    #Make a combined Portland Dataframe for demand vs weather.
    df_joint=pd.DataFrame(dem)
    df_joint=df_joint.join(temp_df)
    df_joint = df_joint.rename(columns={df_joint.columns[0]:'Demand',
             df_joint.columns[1]:'Forecast'})
    df_joint.to_csv('data/pdx_joint.txt')
    
dem=df_joint['Demand'].copy()
temp=df_joint.loc[:,df_joint.columns.str.contains('Temp')].copy()
fore=df_joint['Forecast'].copy()


Read in PDX Frame from file


In [3]:
#clean up data, remove NA
#remove NA values, and average extreme values down
for y in [temp,dem]:
    if len(y.shape)>1:
        for i in range(y.shape[1]):
            x= y.iloc[:,i]
            x = remove_na(x)
            y.iloc[:,i] = avg_extremes(x)
    else:
        x= y
        x = remove_na(x)
        y = avg_extremes(x)
    
#if len(temp.shape)>1:
#    for i in range(temp.shape[1]):
#        x=temp.iloc[:,i]
#        x = remove_na(x)
#        temp.iloc[:,i] = avg_extremes(x)

#temp=remove_na(temp)
#temp = avg_extremes(temp)

Number of extreme values 1. Number of zero values 3


Number of NA values 156


Number of extreme values 0. Number of zero values 143


Number of NA values 126


Number of extreme values 0. Number of zero values 138


Number of NA values 181


Number of extreme values 0. Number of zero values 148


Automatic pdb calling has been turned OFF
Number of NA values 56


In [4]:
def make_temptime_data(temp_mat):
    """make_input_data
    Takes input temperature data matrix (for multiple locations),
    and extends with extra indices for time of day, day of year, day of week, and holiday. 

    Input: temp_mat - pandas series of temperatures a location.  
    Output: in_mat - scaled matrix of temperatures, and scaled times of day and year.
            temp_max - maximum temperature for series (needed to invert transformations?)
            temp_min - minimum temperature
    """
    Tind = temp_mat.index
    Nt=len(Tind)
    hr = Tind.hour.values/(24-1)
    #scale length of year
    dyear = Tind.dayofyear.values/(365-1+Tind.is_leap_year.astype(int))
    dweek = Tind.dayofweek.values/(7-1)
    #scale temperature data to so that max/min correspond to [0,1]  
    temp_max = temp_mat.max(axis=0)
    temp_min = temp_mat.min(axis=0)
    temp_mat = (temp_mat-temp_min)/(temp_max-temp_min)
    in_mat=np.stack([hr,dweek,dyear]).T
    in_mat= np.hstack([temp_mat.values,in_mat])
    return in_mat, temp_max,temp_min

def scale_demand(dem):
    """scale_demand
    Scale demand to be on 0,1 scale.
    Input: demand - series at single location
    Output: dem_scale - scaled array of values.
            dem_max, dem_min - the maximum and minimum values.
    """
    dem_scale = dem.values
    dem_max = np.max(dem_scale)
    dem_min = np.min(dem_scale)
    dem_scale = (dem_scale-dem_min)/(dem_max-dem_min)
    return dem_scale, dem_max,dem_min

temp_mat,tmax,tmin=make_temptime_data(temp)
dem_mat,dmax,dmin=scale_demand(dem)

Nt=len(dem)
Ntest = Nt//2

temp_train = temp_mat[0:Ntest,:]
temp_test = temp_mat[Ntest:,:]
dem_train = dem_mat[0:Ntest]
dem_test = dem_mat[Ntest:]

In [12]:
dem_mat.shape

(20216,)

In [82]:
np.isnan(dem).sum()

0

In [42]:
def get_random_batch(X,y,n_batch,seq_len):
    """get_random_batch(Xsig,t,n_batch)   
    Gets multiple random samples for the data.
    Samples generated by 'get_selection' function.
    Makes list of returned entries.
    Then combines together with 'stack' function at the end.

    X - matrix of inputs, (Nt, Ninputs)
    y - vector of desired outputs (Nt)
    n_batch - number of batches
    seq_len - length of sequence to extract in each batch

    Outputs:
    X_batch - random subset of inputs shape (Nbatch,seq_len,Ninputs) 
    y_batch - corresponding subset of outputs (Nbatch,seq_len)
    """
    Nt,Nin = X.shape
    x_list=[]
    y_list=[]
    for i in range(n_batch):
        n0=int(np.random.random()*(Nt-seq_len-1))
        x_sub = X[n0:n0+seq_len]
        y_sub = y[n0:n0+seq_len]
        x_list.append(x_sub)
        y_list.append(y_sub)
    x_batch=np.stack(x_list,axis=0)
    y_batch=np.stack(y_list,axis=0)
    y_batch=y_batch.reshape( [n_batch,seq_len,-1])                    
    return x_batch,y_batch

Xb,yb=get_random_batch(temp_mat,dem_mat,1000,24)


In [47]:
yb.shape


(1000, 24, 1)

In [92]:
n_steps=24
n_inputs=len(temp.iloc[0])+3
n_neurons=120
n_layers=3
n_outputs=1  #number of stations to predict at that time.
lr=1E-2
np.random.seed(seed=3453)

In [87]:
def make_RNN_cell(n_neurons,fn=tf.nn.relu):
    cell=BasicRNNCell(num_units=n_neurons,activation=fn)
    return cell

In [None]:
#Initial test with code liberally borrowed from ch14 of Geron's 
#"Practical Machine Learning with scikit-learn and Tensorflow"

#Makes a single RNN cell, with a fully connected output layer (with no activation on the output).

print('setting up graphs:Multi-layer RNN')
tf.reset_default_graph()
#inputs:  Nobs, with n_steps, and n_inputs per step
X = tf.placeholder(tf.float32,[None,n_steps,n_inputs],name='X')
#Outputs: n_outputs we want to predict in the future.
y = tf.placeholder(tf.float32,[None,n_steps,n_outputs],name='y')

#define neural network shape
#works:make a list of them.  
# cell=BasicRNNCell(num_units=n_neurons,activation=tf.nn.relu)
# cell2=BasicRNNCell(num_units=n_neurons,activation=tf.nn.relu)
# multi_cell = tf.contrib.rnn.MultiRNNCell([cell,cell2],state_is_tuple=True)

#Make a list of cells to pass along.  
cell_list=[]
for i in range(n_layers):
    cell_list.append(make_RNN_cell(n_neurons,tf.nn.relu))

multi_cell=tf.contrib.rnn.MultiRNNCell(cell_list,state_is_tuple=True)
#multi_cell = tf.contrib.rnn.MultiRNNCell([cell]*n_layers,state_is_tuple=True)
#Note that using [cell]*n_layers did not work.  Might need to change init_state?
#Based on 
rnn_outputs,states=tf.nn.dynamic_rnn(multi_cell,X,dtype=tf.float32)
#this maps the number of hidden units to fewer outputs.
stacked_rnn_outputs = tf.reshape(rnn_outputs,[-1,n_neurons])
stacked_outputs = fully_connected(stacked_rnn_outputs,n_outputs,activation_fn=tf.nn.tanh)
outputs=tf.reshape(stacked_outputs,[-1,n_steps,n_outputs])

#define loss (mean-square-error)
loss = tf.reduce_mean(tf.square(outputs-y))
#define optimization function.
optimizer=tf.train.AdamOptimizer(learning_rate=lr)
training_op=optimizer.minimize(loss)
init=tf.global_variables_initializer()

saver = tf.train.Saver()
#compute number correct.
print('Loading data')
n_iter=1000
n_batch=100
run_network=True

if (run_network==True):
    print('Running this thang')
    with tf.Session() as sess:
        init.run()
        for iteration in range(n_iter):
            #select random starting point. 
            X_batch,y_batch=get_random_batch(
                            temp_train, dem_train, n_batch, n_steps)

            sess.run(training_op, feed_dict={X: X_batch, y:y_batch})
            if iteration%10 ==0:
                mse =loss.eval(feed_dict={X:X_batch,y:y_batch})
                print(iteration,"\tMSE:",mse)
                #save model
                saver.save(sess, "./models/pdx_RNN_model")

130 	MSE: 0.0138508


120 	MSE: 0.0161236


110 	MSE: 0.0143526


100 	MSE: 0.015318


90 	MSE: 0.0219297


80 	MSE: 0.022877


70 	MSE: 0.0254141


60 	MSE: 0.0299337


50 	MSE: 0.0298571


40 	MSE: 0.237012


30 	MSE: 0.448338


20 	MSE: 0.456865


10 	MSE: 0.449538


0 	MSE: 0.451826


Loading data
Running this thang


setting up graphs:Multi-layer RNN


['a', 'a', 'a']

In [189]:
#Add a hidden layer on input/output to simple RNN cell
n_steps=24
n_inputs=len(temp.iloc[0])+3
n_hidden=100
n_outputs=24

print('setting up graphs: Hidden-RNN-Hidden')
tf.reset_default_graph()

tf.reset_default_graph()
#inputs:  Nobs, with n_steps, and n_inputs per step
X = tf.placeholder(tf.float32,[None,n_steps,n_inputs],name='X')
#Outputs: n_outputs we want to predict in the future.
y = tf.placeholder(tf.float32,[None,n_steps,n_outputs],name='y')

#define neural network shape
cell=BasicRNNCell(num_units=n_neurons,activation=tf.nn.relu)
rnn_outputs,states=tf.nn.dynamic_rnn(cell,X,dtype=tf.float32)
#this maps the number of hidden units to fewer outputs.
stacked_rnn_outputs = tf.reshape(rnn_outputs,[-1,n_neurons])
stacked_outputs = fully_connected(stacked_rnn_outputs,n_outputs,activation_fn=tf.nn.tanh)
outputs=tf.reshape(stacked_outputs,[-1,n_steps,n_outputs])


#define loss (mean-square-error)
loss = tf.reduce_mean(tf.square(output_2-y))
#define optimization function.
optimizer=tf.train.AdamOptimizer(learning_rate=lr)
training_op=optimizer.minimize(loss)
init=tf.global_variables_initializer()

n_iter=20
n_batch=100

##make model saver
#Old
saver = tf.train.Saver()
save_path="./models/pdx_RNN2_model"

with tf.Session() as sess:
     init.run()
     for iteration in range(n_iter):
         #select random starting point. 
         X_batch,y_batch=get_random_batch(
         temp_train, dem_train, n_batch, n_steps)
         sess.run(training_op, feed_dict={X: X_batch, y:y_batch})
         if iteration%10 ==0:
            mse =loss.eval(feed_dict={X:X_batch,y:y_batch})
            print(iteration,"\tMSE:",mse)
            #save model
            saver.save(sess, save_path)

exported_meta=tf.train.export_meta_graph( filename=save_path+".meta")

10 	MSE: 0.0305145


0 	MSE: 0.0704299


setting up graphs: Hidden-RNN-Hidden


In [38]:
def model_predict_whole(Xin,path_str="pdx_RNN_model"):
    """model_predict_whole(tstart)
    Retrieve the outputs of the network for all values of the inputs 
    """
    Nt,Nin=Xin.shape
    nmax = int(Nt/n_steps)
    ytot = np.zeros((Nt,1))
    #reset graph, and reload saved graph
    #tf.reset_default_graph()
    model_path = "./models/"+path_str    
    #saver = tf.train.import_meta_graph(model_path+".meta")        
    with tf.Session() as sess:

        #restore variables
        saver.restore(sess,model_path)
        for i in range(nmax-1):
            n0=n_steps*i
            x_sub = Xin[n0:n0+n_steps,:]
            x_sub = x_sub.reshape(-1,n_steps,Nin)
            y_pred=sess.run(outputs,feed_dict={X:x_sub})
            ytot[n0:n0+n_steps]=y_pred
    return ytot

In [29]:
def plot_whole_sample_fit(X,y,ntest,n_steps,path_str="pdx_RNN_model"):
    """plot_whole_sample_fit

    Plot ALL of the predictions of the trained model
    on a 'test' set with different noise, and longer
    times.  Concatenates the predicted results together.  
    """
    #pull in the inputs, and predictions
    Nt, Nin = X.shape
    ytot=model_predict_whole(X,path_str)
    plt.figure()
    #now plot against the test sets defined earlier
    plt.plot(np.arange(0,ntest),X[:ntest,0],'b',label='Training')
    plt.plot(np.arange(ntest,Nt), X[ntest:,0],'g',label='Test')
    plt.plot(np.arange(Nt),ytot,'r',label='Predicted')
    plt.plot(np.arange(Nt),dem_mat,label='Real')
    plt.legend(loc='right')
    plt.show()
    return ytot

In [77]:
#n0,x_sub,y_pred=toy_predict(2.5)
ytot=plot_whole_sample_fit(temp_mat,dem_mat,Ntest,n_steps,'pdx_RNN_model')

<matplotlib.figure.Figure at 0x7efb9f02f940>

INFO:tensorflow:Restoring parameters from ./models/pdx_RNN_model


In [136]:
%pdb off

Automatic pdb calling has been turned OFF


In [79]:
pred=pd.DataFrame(ytot,index=dem.index)

(20216, 1)

In [82]:
def rmse(x,y):
    z = np.sqrt(np.sum((x-y)*(x-y))/len(x))
    return z

plt.plot(fore,ytot,'.')
plt.show()

<matplotlib.figure.Figure at 0x7efbac0ebbe0>

In [52]:
print(rmse(fore['2016-01':'2016-06']/dem['2016-01':'2016-06'],1))
nt = len(ytot)//2
print(rmse(pred[:nt],dem_mat[:nt]))

0.0303363570153
0.149862419879


In [None]:
len(ytot)//2