# GRU model for price prediction

## 0. Assumptions of the model

Model used : encoder decoder made of GRU cells

## 1. Packages 

In [22]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf  
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler
print('Imported tensorflow', tf.VERSION)

Imported tensorflow 1.11.0


In [18]:
# import folder scripts
from feature_selection_feed import top_correlated, reformat_as_sequence
from evaluation import score_mape

## 2. Data Source

In [3]:
df = pd.read_csv('metals_daily_train.csv')
df = df.dropna(axis=0)

In [4]:
df.head()

Unnamed: 0,date,p0,p1,p2,f000_open,f000_high,f000_low,f000_settle,f001_open,f001_high,...,f136_open,f136_high,f136_low,f136_settle,f137_open,f137_high,f137_low,f137_settle,week,week_date
109,20081201,444.511058,457.032497,457.032497,53.08,56.33,52.62,56.29,49.11,52.15,...,9420.0,9680.0,9315.0,9540.0,9520.0,9800.0,9495.0,9650.0,2030,20081201
110,20081202,446.908899,465.530103,459.323035,55.99,56.29,54.68,55.3,51.8,52.24,...,9480.0,9600.0,9430.0,9510.0,9640.0,9730.0,9560.0,9630.0,2030,20081201
111,20081203,453.48482,482.060575,459.69694,56.5,56.72,54.65,55.21,53.01,53.02,...,9495.0,9580.0,9400.0,9500.0,9530.0,9690.0,9505.0,9590.0,2030,20081201
112,20081204,447.532919,472.395859,459.964389,55.5,57.81,54.88,57.62,51.75,54.42,...,9485.0,9485.0,9120.0,9145.0,9400.0,9445.0,9205.0,9225.0,2030,20081201
113,20081205,447.084228,471.922241,459.503235,58.5,60.0,56.63,56.75,54.7,56.18,...,8710.0,9000.0,8595.0,8665.0,8885.0,8940.0,8670.0,8715.0,2030,20081201


In [5]:
def normalize(df):
    scaler = StandardScaler()
    values = df.values.reshape(-1, 1)
    values = scaler.fit_transform(values)
    return pd.DataFrame(values), scaler

# 2Bis. Boosted data source

In [210]:
df = pd.read_csv('metals_daily_train.csv')
df = df.dropna(axis=0)

selected_features = top_correlated(df, 50, 0.75)
selected_features = selected_features.insert(0, 'date')

df_ref = reformat_as_sequence(df, 100, 50)
df_ref.head()

Unnamed: 0,date_t,p1_(t-99),p1_(t-98),p1_(t-97),p1_(t-96),p1_(t-95),p1_(t-94),p1_(t-93),p1_(t-92),p1_(t-91),...,p1_(t+41),p1_(t+42),p1_(t+43),p1_(t+44),p1_(t+45),p1_(t+46),p1_(t+47),p1_(t+48),p1_(t+49),p1_(t+50)
100,20090515.0,457.032497,465.530103,482.060575,472.395859,471.922241,472.073147,472.134908,472.519568,473.139085,...,512.965645,516.764439,516.749309,516.772004,519.259329,519.221323,519.236525,525.538546,525.46931,527.917451
101,20090519.0,465.530103,482.060575,472.395859,471.922241,472.073147,472.134908,472.519568,473.139085,467.806318,...,516.764439,516.749309,516.772004,519.259329,519.221323,519.236525,525.538546,525.46931,527.917451,556.722797
102,20090520.0,482.060575,472.395859,471.922241,472.073147,472.134908,472.519568,473.139085,467.806318,468.585995,...,516.749309,516.772004,519.259329,519.221323,519.236525,525.538546,525.46931,527.917451,556.722797,556.771695
103,20090521.0,472.395859,471.922241,472.073147,472.134908,472.519568,473.139085,467.806318,468.585995,474.348364,...,516.772004,519.259329,519.221323,519.236525,525.538546,525.46931,527.917451,556.722797,556.771695,556.804299
104,20090522.0,471.922241,472.073147,472.134908,472.519568,473.139085,467.806318,468.585995,474.348364,493.223915,...,519.259329,519.221323,519.236525,525.538546,525.46931,527.917451,556.722797,556.771695,556.804299,560.517007


In [211]:
trend_memory = len(selected_features) - 1

In [212]:
def trend_bins(df, lag, block=None):
    """Return categories of trend (-1, 0, +1) over the last lag days at each date"""
    # compute slopes 
    slopes = []
    for t in range(lag, len(df)):
        linreg = LinearRegression()
        linreg.fit(np.arange(lag).reshape(-1, 1), df.iloc[(t-lag):t].values)
        slopes.append(list(linreg.coef_.flatten()))
    df_slopes = pd.DataFrame(slopes, columns=df.columns)
    
    # categorize slopes
    df_copy_slopes = df_slopes.copy()
    for col in df.columns:
        if col not in block:
            thre_low = np.quantile(df_copy_slopes[col].values, 0.33)
            thre_high = np.quantile(df_copy_slopes[col].values, 0.66)
            df_slopes[col] = 0
            df_slopes[col][df_copy_slopes[col] >= thre_high] = 1
            df_slopes[col][df_copy_slopes[col] <= thre_low] = -1
            
    df_slopes = df_slopes.drop(block, axis=1)
    for col in block:
        df_slopes[col] = df[col].values[lag:len(df)]
        
    return df_slopes

In [213]:
# prepare for join
df_trends = trend_bins(df[selected_features], 50, block=['date'])
df_trends = df_trends.set_index('date')
df_ref = df_ref.set_index('date_t')
df_trends.index.astype('float64')

# join
df_boosted = df_trends.join(df_ref, how='inner')
df_boosted = df_boosted.reset_index()
df_boosted = df_boosted.drop('index', axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [218]:
def fetch_sample_boosted(df, batch_size, input_seq_len, output_seq_len, random_state=None):
    """Get a batch from the dataframe. 
    
    Each batch contains batch_size sequences. 
    Each sequences is made of input_seq_len values and the follwing output_seq_len 
    values of the time series.
    """
    X_batch = []
    y_batch = []
    n = df.shape[0]
    np.random.seed(random_state)
    rs = np.random.randint(0, n-df.shape[1], batch_size)
    for _, r in zip(range(batch_size), rs):
        X_batch.append(df_boosted.iloc[r, :-output_seq_len].values.reshape(-1, 1))
        y_batch.append(df_boosted.iloc[r, -output_seq_len:].values.reshape(-1, 1))
    X_batch = np.array(X_batch)
    X_batch = np.array(X_batch).transpose((1, 0, 2))
    y_batch = np.array(y_batch).transpose((1, 0, 2))
    return X_batch, y_batch

In [223]:
# just an example
fetch_sample_boosted(df_boosted, 2, 10, 5, random_state=5)

(array([[[ -1.       ],
         [  1.       ]],
 
        [[ -1.       ],
         [  0.       ]],
 
        [[ -1.       ],
         [  1.       ]],
 
        [[ -1.       ],
         [  0.       ]],
 
        [[ -1.       ],
         [  0.       ]],
 
        [[ -1.       ],
         [  1.       ]],
 
        [[ -1.       ],
         [  0.       ]],
 
        [[ -1.       ],
         [  1.       ]],
 
        [[ -1.       ],
         [  1.       ]],
 
        [[ -1.       ],
         [  1.       ]],
 
        [[ -1.       ],
         [  1.       ]],
 
        [[ -1.       ],
         [  1.       ]],
 
        [[  0.       ],
         [  1.       ]],
 
        [[  0.       ],
         [  1.       ]],
 
        [[ -1.       ],
         [  1.       ]],
 
        [[ -1.       ],
         [  1.       ]],
 
        [[  1.       ],
         [  1.       ]],
 
        [[559.7102754],
         [485.689497 ]],
 
        [[562.6184014],
         [485.7535252]],
 
        [[562.6904985],
       

In [253]:
def normalize_boosted(df, trend_memory):
    scaler = StandardScaler()
    values = df[trend_memory:].values
    #.reshape(-1, 1)
    values = scaler.fit_transform(values)
    concat = pd.concat([df.iloc[:, :trend_memory], pd.DataFrame(values)], axis=1)
    return pd.DataFrame(values), scaler

In [256]:
df_boosted.iloc[:, :trend_memory]

Unnamed: 0,p1,f091_settle,f029_settle,f090_settle,f131_settle,f132_settle,f130_settle,f133_settle,f028_settle,f135_settle,f134_settle,f011_settle,f010_settle,f002_settle,f004_settle,f003_settle,f097_settle
0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
2,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
3,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
4,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
5,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
6,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
7,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1
8,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1
9,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1


In [254]:
# normalized/scaled prices
df_boosted_sc, scaler = normalize_boosted(df_boosted, 17)
df_boosted_sc.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,157,158,159,160,161,162,163,164,165,166
0,1.165059,1.197133,1.236515,1.192552,0.010798,1.238686,0.011438,1.238686,1.236515,1.224209,...,0.374182,0.37507,0.250637,0.187268,0.065155,0.06469,0.065981,0.06594,0.065608,0.064982
1,1.165059,1.197133,1.236515,1.192552,0.010798,1.238686,0.011438,1.238686,1.236515,1.224209,...,0.375271,0.25083,0.187483,0.065389,0.065009,0.066375,0.066348,0.066013,0.065388,0.065202
2,1.165059,1.197133,1.236515,1.192552,1.241824,1.238686,0.011438,1.238686,1.236515,1.224209,...,0.25102,0.187671,0.065594,0.065243,0.066694,0.066741,0.066421,0.065793,0.065608,0.064763
3,1.165059,1.197133,1.236515,1.192552,1.241824,1.238686,0.011438,1.238686,1.236515,1.224209,...,0.187855,0.065771,0.065448,0.066928,0.06706,0.066814,0.066201,0.066013,0.065169,0.028722
4,1.165059,1.197133,1.236515,1.192552,1.241824,1.238686,0.011438,1.238686,1.236515,1.224209,...,0.065944,0.065624,0.067133,0.067295,0.067134,0.066595,0.066421,0.065574,0.029121,0.028867


## 3. Seq2Seq with GRU cells model

In [196]:
# Dataframe we work on 
df = df_boosted_sc

# Proportion of samples in the training set 
train_prop = 0.8

# train test split
cut = int(train_prop * len(df))
df_train = df[:cut]
df_test = df[cut:]

# sanity check
print('train', len(df_train), len(df_train)/len(df))
print('test', len(df_test), len(df_test)/len(df))

train 1564 0.8
test 391 0.2


In [227]:
# Load paths to TF seq2seq model and recurrent cells to be used in this project
tf.nn.seq2seq = tf.contrib.legacy_seq2seq
tf.nn.rnn_cell = tf.contrib.rnn 
tf.nn.rnn_cell.GRUCell = tf.contrib.rnn.GRUCell # Useful for learning long-range dependencies in sequences

# Data shape parameters
batch_size = 15 # How many time series to train on before updating model's weight parameters
output_seq_len = 50 # How many days to predict into the future
input_seq_len = 100 + trend_memory # How many days to train on in the past

# Internal neural network parameters
input_dim = output_dim = 1 # Univariate time series (predicting future values based on stream of historical values)
hidden_dim = 50  # Number of neurons in each recurrent unit 
num_layers = 2  # Number of stacked recurrent cells (number of recurrent layers)

# Optimizer parameters
learning_rate = 0.005  # Small lr helps not to diverge during training. 
epochs =  1000 #1000  # How many times we perform a training step (how many times we show a batch)
lr_decay = 0.9  # default: 0.9 . Simulated annealing.
momentum = 0.2  # default: 0.0 . Momentum technique in weights update
lambda_l2_reg = 0.01  # L2 regularization of weights - reduces overfitting

random_state = 42

In [228]:
# Reset any existing graph, close any previous session, discard old variables, and start fresh
tf.reset_default_graph()
if 'sess' in globals():
    sess.close()
sess = tf.InteractiveSession()
tf.set_random_seed(random_state)

with tf.variable_scope('Seq2Seq'):
    # Input values to encoder RNN
    encoder_inputs = [tf.placeholder(tf.float32, shape=(None, input_dim), 
                     name="encoder_input_{}".format(t)) for t in range(input_seq_len)]
    
    # Target values for decoder RNN
    decoder_targets = [tf.placeholder(tf.float32, shape=(None, output_dim), 
                       name="decoder_target_{}".format(t)) for t in range(output_seq_len)]
    
    # Feed final n encoder inputs into the decoder RNN, where n = output_seq_len
    # "GO", represented by 0, starts the decoder
    decoder_inputs = [tf.zeros_like(encoder_inputs[0], dtype=np.float32, name="GO")] +\
                      encoder_inputs[-(output_seq_len - 1):]
    
    # Stack hidden recurrent layers
    cells = list()
    for i in range(num_layers):
        with tf.variable_scope('RNN_' + str(i)):
            cells.append(tf.nn.rnn_cell.GRUCell(hidden_dim))
    cell = tf.nn.rnn_cell.MultiRNNCell(cells)
    
    # Pass encoder and decoder inputs through model, retrieving output from the decoder at each prediction step
    decoder_outputs, decoder_state = tf.nn.seq2seq.basic_rnn_seq2seq(encoder_inputs, decoder_inputs, cell)
    
    # Squeeze decoder output into a single value, representing the forecast at that point in the sequence
    W_out = tf.Variable(tf.truncated_normal([hidden_dim, output_dim], seed=random_state)) # Output weight matrix
    b_out = tf.Variable(tf.truncated_normal([output_dim], seed=random_state)) # Output bias
    
    # Apply a trainable, constant linear transformation to final outputs
    output_scale_factor = tf.Variable(1.0, name="Output_Scale_Factor")
    reshaped_outputs = [output_scale_factor * (tf.matmul(i, W_out) + b_out) for i in decoder_outputs]

In [229]:
with tf.variable_scope('Loss'):
    # Compute Mean Absolute Percentage loss for output at each time step: 
    # https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss
    output_loss = 0
    for _y, _Y in zip(reshaped_outputs, decoder_targets):
        #output_loss += tf.reduce_mean(tf.metrics.mean_absolute_error(_Y, _y))
        #output_loss += tf.reduce_mean(tf.abs((_Y-_y)/_Y))
        output_loss += tf.reduce_mean(tf.nn.l2_loss(_y - _Y))
    # Penalize model complexity with L2 regularization
    output_loss = output_loss / len(reshaped_outputs)
    reg_loss = 0
    for tf_var in tf.trainable_variables():
        if not ("Bias" in tf_var.name or "Output_" in tf_var.name):
            reg_loss += tf.reduce_mean(tf.nn.l2_loss(tf_var))
    # Add regularization term to loss function        
    loss = output_loss + lambda_l2_reg * reg_loss
    
with tf.variable_scope('Optimizer'):
    # Search for minimum of loss function with RMSProp:
    # https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
    optimizer = tf.train.RMSPropOptimizer(learning_rate, decay=lr_decay, momentum=momentum, centered=False)
    train_op = optimizer.minimize(loss)

In [230]:
def train_batch(df, batch_size, input_seq_len, output_seq_len):
    """
    Trains session model, attempting to optimize internal weight parameters
    to accurately predict the number of steps into future given by output_seq_len
    
    @df: DataFrame to sample random time series from
    @batch_size: How many time series to sample at a time
    @input_seq_len: How many months before for prediction (training)
    @output_seq_len: How many months to reserve for prediction (training target)
    """
    X_train, y_train = fetch_sample_boosted(df=df, 
                                    batch_size=batch_size, 
                                    input_seq_len=input_seq_len, 
                                    output_seq_len=output_seq_len,
                                    )
    feed_dict = {encoder_inputs[t]: X_train[t] for t in range(len(encoder_inputs))}
    feed_dict.update({decoder_targets[t]: y_train[t] for t in range(len(decoder_targets))})
    train_loss = sess.run([train_op, loss], feed_dict)
    return train_loss

In [231]:
def test_batch(df, input_seq_len, output_seq_len, random_state=None):
    """
    Tests session model on a batch of random time series drawn from one of the metrics DataFrames.
    All passed parameters should be same as those used during training.
    
    @df: DataFrame to sample random time series from
    @batch_size: How many time series to sample at a time
    @input_seq_len: How many months before for prediction (training)
    @output_seq_len: How many months to set aside for prediction (training target)
    @random_state: Controls reproducible output
    """
    X_test, y_test = fetch_sample_boosted(df=df, 
                                  batch_size=1, 
                                  input_seq_len=input_seq_len, 
                                  output_seq_len=output_seq_len,
                                  random_state=random_state)
    feed_dict = {encoder_inputs[t]: X_test[t] for t in range(len(encoder_inputs))}
    feed_dict.update({decoder_targets[t]: y_test[t] for t in range(len(decoder_targets))})
    test_loss = sess.run([train_op, loss], feed_dict)
    return test_loss[1]

In [232]:
# Reset variables and run passengers training ops
sess.run(tf.global_variables_initializer())
for t in range(epochs + 1):
    train_loss = train_batch(df=df_train, batch_size=batch_size, input_seq_len=input_seq_len, output_seq_len=output_seq_len)
    # Taking the dev_loss on the same random samples serves as a validation run every 100 training runs
    if t % 100 == 0:
        dev_loss = test_batch(df=df_test, input_seq_len=input_seq_len, output_seq_len=output_seq_len)
        print("Step {0}/{1} \ttrain loss: {2} \tdev loss: {3}".format(t, epochs, train_loss[1], dev_loss))

Step 0/1000 	train loss: 1904401.625 	dev loss: 118429.59375
Step 100/1000 	train loss: 1480706.125 	dev loss: 68238.0
Step 200/1000 	train loss: 454109.125 	dev loss: 43235.20703125
Step 300/1000 	train loss: 165527.546875 	dev loss: 4967.71826171875
Step 400/1000 	train loss: 115932.5390625 	dev loss: 15169.205078125
Step 500/1000 	train loss: 45658.390625 	dev loss: 1220.2694091796875
Step 600/1000 	train loss: 52544.1796875 	dev loss: 576.150390625
Step 700/1000 	train loss: 30144.568359375 	dev loss: 370.3835144042969
Step 800/1000 	train loss: 90794.8984375 	dev loss: 593.9873046875
Step 900/1000 	train loss: 64850.71484375 	dev loss: 608.21044921875
Step 1000/1000 	train loss: 69595.390625 	dev loss: 393.9820251464844


## Tensorboard

In [None]:
from datetime import datetime
import os
import pathlib

t = datetime.utcnow().strftime("%Y%m%d%H%M%S") 
log_dir = "tf_logs"
logd = "/tmp/{}/r{}/".format(log_dir, t)

# Make directory if it doesn't exist

from pathlib import Path
home = str(Path.home())

logdir = os.path.join(os.sep,home,logd)

if not os.path.exists(logdir):
    os.makedirs(logdir)

In [None]:
# Then every time you have specified a graph run:
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [None]:
!tensorboard --logdir=$logdir

## 4. Example

In [233]:
X_test, y_test = fetch_sample_boosted(df_test, 1, input_seq_len, output_seq_len, random_state=random_state)
feed_dict = {encoder_inputs[t]: X_test[t] for t in range(len(encoder_inputs))}
feed_dict.update({decoder_targets[t]: y_test[t] for t in range(len(decoder_targets))})
res = sess.run([reshaped_outputs], feed_dict=feed_dict)
print(X_test)
print(res)

[[[ -1.       ]]

 [[  0.       ]]

 [[  0.       ]]

 [[  1.       ]]

 [[  0.       ]]

 [[  0.       ]]

 [[  0.       ]]

 [[  0.       ]]

 [[  0.       ]]

 [[  1.       ]]

 [[  0.       ]]

 [[  1.       ]]

 [[  1.       ]]

 [[  1.       ]]

 [[  0.       ]]

 [[  0.       ]]

 [[  1.       ]]

 [[450.8642504]]

 [[448.3266022]]

 [[444.5957322]]

 [[444.6413391]]

 [[447.8148138]]

 [[454.3751066]]

 [[454.4816018]]

 [[454.2088077]]

 [[454.2021582]]

 [[454.0359859]]

 [[462.8118195]]

 [[462.5207556]]

 [[462.6493201]]

 [[466.5506919]]

 [[468.805319 ]]

 [[468.9081979]]

 [[475.0908017]]

 [[475.2506947]]

 [[481.2997544]]

 [[481.4546784]]

 [[481.3419965]]

 [[481.3983309]]

 [[481.4546784]]

 [[485.3838173]]

 [[485.2133726]]

 [[485.2914782]]

 [[481.5392245]]

 [[481.4969477]]

 [[481.6097022]]

 [[481.4758122]]

 [[481.595605 ]]

 [[487.8429536]]

 [[494.1696736]]

 [[494.1407418]]

 [[494.1552073]]

 [[494.0973504]]

 [[500.432311 ]]

 [[506.7396372]]

 [[512.965

In [243]:
len(scaler.mean_)

167

In [238]:
def viz_prediction(X_test, y_test, batch_sample_nb):
    assert (batch_sample_nb < batch_size) & (batch_sample_nb >= 0)
    X_test, y_test = fetch_sample_boosted(df_train, batch_size, input_seq_len, output_seq_len)
    feed_dict = {encoder_inputs[t]: X_test[t] for t in range(len(encoder_inputs))}
    feed_dict.update({decoder_targets[t]: y_test[t] for t in range(len(decoder_targets))})
    res = sess.run([reshaped_outputs], feed_dict=feed_dict)[0]
    res = np.array(res)
    # shape: (output_seq_len, batch_size, input_dim)
    res.transpose((1,0,2))
    print(X_test[:,2, :].flatten())
    X_plot = list(scaler.inverse_transform(X_test[:, batch_sample_nb, :].flatten()))
    y_plot = list(scaler.inverse_transform(y_test[:, batch_sample_nb, :].flatten()))
    y_pred = list(scaler.inverse_transform(res[:, batch_sample_nb].flatten()))
    plt.figure(figsize=(8,6))
    plt.plot(X_plot + y_pred, label='prediction')
    plt.plot(X_plot + y_plot, label='actual')
    plt.legend()
    plt.show()
    return (y_pred, y_plot)
    
y_pred, y_true = viz_prediction(X_test, y_test, 0)
print("MAPE on sample", score_mape(y_pred, y_true, as_days=True))

[  0.          0.          0.          0.          1.          1.
   1.          1.          0.          0.         -1.          1.
   1.          0.         -1.         -1.         -1.        537.3800436
 536.0033597 536.8494442 537.6804942 537.7956292 537.7336273 537.7513407
 537.7779129 537.7513407 537.7867709 537.7247711 537.7779129 534.6628993
 535.1915183 536.4910633 536.7836657 536.7038334 536.6062928 536.7304415
 536.7304415 536.3936    535.2383866 536.0177783 536.204105  535.7784054
 534.9201974 535.1853287 535.3799255 535.3091467 535.2737643 535.5303927
 535.5303927 535.3533813 534.4612581 532.8312879 527.5495844 526.0261671
 525.8699835 524.3563526 524.3217644 510.1520253 502.110673  503.4871717
 492.3744549 493.0886109 491.9581519 486.9696199 484.496124  484.2302353
 481.7342433 480.5546974 482.8194892 482.3601579 481.7620046 481.4983569
 481.544862  481.1575937 483.3330648 483.4343174 483.0373388 487.0403917
 493.7756681 490.724142  489.7762644 489.8708874 489.2564909 489.

ValueError: operands could not be broadcast together with shapes (117,) (167,) (117,) 

## Evaluation

In [207]:
random_state = 21
# Eval on a full batch:
def eval_batch(random_state, batch_size):
    X_test, y_test = fetch_sample_boosted(df_test, batch_size, input_seq_len, output_seq_len, random_state=random_state)
    feed_dict = {encoder_inputs[t]: X_test[t] for t in range(len(encoder_inputs))}
    feed_dict.update({decoder_targets[t]: y_test[t] for t in range(len(decoder_targets))})
    res = sess.run([reshaped_outputs], feed_dict=feed_dict)[0]
    res = np.array(res)
    # shape: (output_seq_len, batch_size, input_dim)
    res.transpose((1,0,2))
    MAPE = 0
    for b in range(batch_size):
        X_plot = list(scaler.inverse_transform(X_test[:, b, :].flatten()))
        y_true = list(scaler.inverse_transform(y_test[:, b, :].flatten()))
        y_pred = list(scaler.inverse_transform(res[:, b].flatten()))
        MAPE += score_mape(y_pred, y_true, as_days=True)
    return MAPE / batch_size

In [208]:
print("MAPE on 15 batch", eval_batch(random_state, 50))

ValueError: operands could not be broadcast together with shapes (117,) (167,) (117,) 