In [126]:
%matplotlib inline

import warnings

import pandas as pd
import numpy as np

import matplotlib.pylab as plt
from matplotlib.pylab import rcParams

from core.database import *


# set pre-defined params
rcParams['figure.figsize'] = 15, 6
window = 15

# define db
db = Database(name='bdf')

# pull gold and silver
gold = db.get_commodity('gold')
silver = db.get_commodity('silver')

gold.set_index('date', inplace=True)
silver.set_index('date', inplace=True)


In [128]:
"""
    I’ve removed some of the previous columns (open price, daily highs and lows) and reformulated some new ones. 
    close_off_high represents the gap between the closing price and price high for that day, 
    where values of -1 and 1 mean the closing price was equal to the daily low or daily high, respectively. 
    The volatility columns are simply the difference between high and low price divided by the opening price. 
    You may also notice that model_data is arranged in order of earliest to latest. 
    We don’t actually need the date column anymore, as that information won’t be fed into the model.
"""

"""
    Data preparation:
        - remove open, high, low
"""


columns = ['price', 'change', 'volatility', 'vol']

gold['volatility'] = (gold['high'] - gold['low'])/gold['open']
df_g = gold.loc[:, columns]


df_g.head()


Unnamed: 0_level_0,price,change,volatility,vol
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-02-22,1325.7,-0.0024,0.003236,0.0
2018-02-22,1325.7,-0.0024,0.003236,0.0
2018-02-21,1328.9,-0.0017,0.010359,0.0
2018-02-20,1331.2,-0.0123,0.017331,359380.0
2018-02-19,1347.8,-0.002,0.004813,0.0


In [129]:
"""
    Data preparation:
        - remove open, high, low
"""


columns = ['price', 'change', 'volatility', 'vol']

silver['volatility'] = (silver['high'] - silver['low'])/silver['open']
df_s = silver.loc[:, columns]


df_s.head()


Unnamed: 0_level_0,price,change,volatility,vol
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-02-22,16.425,-0.0042,0.005759,0.0
2018-02-21,16.495,0.0043,0.024041,0.0
2018-02-20,16.425,-0.0111,0.015051,0.0
2018-02-19,16.61,0.0003,0.007229,0.0
2018-02-18,16.605,0.0003,0.003012,0.0


In [130]:
"""
    Data preparation:
        - remove open, high, low
"""

df = df_g.join(df_s, how='inner', lsuffix='_g', rsuffix='_s')
df.drop_duplicates(inplace=True)
df.head()


Unnamed: 0_level_0,price_g,change_g,volatility_g,vol_g,price_s,change_s,volatility_s,vol_s
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-22,1336.9,0.0029,0.005525,104320.0,17.015,-0.0038,0.008785,0.0
2018-01-23,1341.7,0.0036,0.008069,70400.0,17.05,0.0021,0.02057,0.0
2018-01-24,1361.4,0.0147,0.016862,159340.0,17.515,0.0273,0.037537,0.0
2018-01-25,1368.0,0.0048,0.017832,142580.0,17.305,-0.012,0.03282,0.0
2018-01-26,1357.2,-0.0079,0.009167,142270.0,17.385,0.0046,0.01358,0.0


In [131]:
"""
    Feature transformation:
        - scaler
"""

from sklearn.preprocessing import MinMaxScaler


target = 'price_g'

to_scale = [c for c in df.columns if c not in [target]]

scaler = MinMaxScaler()

df[to_scale] = scaler.fit_transform(df[to_scale])

df.head()


Unnamed: 0_level_0,price_g,change_g,volatility_g,vol_g,price_s,change_s,volatility_s,vol_s
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-22,1336.9,0.460606,0.127436,0.220387,0.585062,0.511774,0.151818,0.0
2018-01-23,1341.7,0.481818,0.219439,0.148727,0.614108,0.604396,0.461761,0.0
2018-01-24,1361.4,0.818182,0.537373,0.336622,1.0,1.0,0.907968,0.0
2018-01-25,1368.0,0.518182,0.572445,0.301215,0.825726,0.383046,0.783914,0.0
2018-01-26,1357.2,0.133333,0.259123,0.30056,0.892116,0.643642,0.277925,0.0


In [132]:

days_to_forecast = 5

train_index = df.index[:-days_to_forecast]
test_index = list(set(df.index) - set(train_index))

X_train = df.loc[train_index, :]
X_test = df.loc[test_index, :]


In [133]:
import tensorflow as tf


batch_size = 3
total_steps = 1000
target = 'price_g'

def input_evaluation_set(df, label):
    features = dict()
    for col in [c for c in df.columns if c not in [label]]:
        features[col] = np.array(df[col])
    labels = np.array(df[label])
    
    return features, labels

train_x, train_y = input_evaluation_set(X_train, target)


In [134]:
def train_input_fn(features, labels, batch_size):
    """An input function for training"""
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)

    # Build the Iterator, and return the read end of the pipeline.
    return dataset.make_one_shot_iterator().get_next()


In [135]:
def eval_input_fn(features, labels, batch_size):
    """An input function for evaluation or prediction"""
    features=dict(features)
    if labels is None:
        # No labels, use only features.
        inputs = features
    else:
        inputs = (features, labels)

    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices(inputs)

    # Batch the examples
    assert batch_size is not None, "batch_size must not be None"
    dataset = dataset.batch(batch_size)

    # Return the dataset.
    return dataset


In [136]:
# Feature columns describe how to use the input.
my_feature_columns = []
for key in train_x.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))


In [137]:
my_feature_columns


[_NumericColumn(key='change_g', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='volatility_g', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='vol_g', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='price_s', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='change_s', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='volatility_s', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='vol_s', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [158]:
# Build 2 hidden layer DNN with 64, 32 units respectively.

regressor = tf.estimator.DNNRegressor(
    feature_columns=my_feature_columns,
    
    # two hidden layers with 64 and 32 nodes.
    hidden_units=[64, 64])


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpgdf6lft7', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fcb9a8e1198>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [159]:
# Train the Model.

regressor.train(
    input_fn=lambda: train_input_fn(train_x, train_y, batch_size),
    steps=total_steps
)


INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpgdf6lft7/model.ckpt.
INFO:tensorflow:loss = 5357594.5, step = 1
INFO:tensorflow:global_step/sec: 879.755
INFO:tensorflow:loss = 94425.16, step = 101 (0.114 sec)
INFO:tensorflow:global_step/sec: 1437.66
INFO:tensorflow:loss = 371760.2, step = 201 (0.069 sec)
INFO:tensorflow:global_step/sec: 1353.34
INFO:tensorflow:loss = 49818.836, step = 301 (0.074 sec)
INFO:tensorflow:global_step/sec: 1203.61
INFO:tensorflow:loss = 55665.336, step = 401 (0.083 sec)
INFO:tensorflow:global_step/sec: 1379.61
INFO:tensorflow:loss = 20484.9, step = 501 (0.072 sec)
INFO:tensorflow:global_step/sec: 1311.6
INFO:tensorflow:loss = 20825.85, step = 601 (0.076 sec)
INFO:tensorflow:global_step/sec: 1415.93
INFO:tensorflow:loss = 159040.22, step = 701 (0.070 sec)
INFO:tensorflow:global_step/sec: 1423.47
INFO:tensorflow:loss = 19375.78, step = 801 (0.071 sec)
INFO:tensorflow:global_step/sec: 1386.62
INFO:tensorflow:loss

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x7fcb9a8e1358>

In [160]:
# Evaluate the model.
eval_result = regressor.evaluate(
    input_fn=lambda: eval_input_fn(train_x, train_y, batch_size))

print('Loss: {loss:0.3f}\n'.format(**eval_result))


INFO:tensorflow:Starting evaluation at 2018-02-22-17:41:24
INFO:tensorflow:Restoring parameters from /tmp/tmpgdf6lft7/model.ckpt-1000
INFO:tensorflow:Finished evaluation at 2018-02-22-17:41:24
INFO:tensorflow:Saving dict for global step 1000: average_loss = 23385.691, global_step = 1000, loss = 66816.266
Loss: 66816.266



In [161]:
test_x, expected = input_evaluation_set(X_test, target)

predictions = regressor.predict(input_fn=lambda: eval_input_fn(test_x, expected, batch_size=batch_size))


In [162]:
for pred_dict, expec in zip(predictions, expected):
    print('predicted: {0}\texpected: {1}\n'.format(pred_dict['predictions'][0], expec))
    

INFO:tensorflow:Restoring parameters from /tmp/tmpgdf6lft7/model.ckpt-1000
predicted: 940.4830932617188	expected: 1347.8

predicted: 897.0011596679688	expected: 1350.5

predicted: 1167.3245849609375	expected: 1331.2

predicted: 868.23486328125	expected: 1325.7

predicted: 1062.9779052734375	expected: 1328.9

