In [12]:
import os
import numpy as np
import pandas as pd
import pickle
import quandl
from datetime import datetime
import tensorflow as tf;
from six.moves import cPickle as pickle;
from sklearn.preprocessing import MinMaxScaler
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense, LSTM
py.init_notebook_mode(connected=True)

In [2]:
def get_quandl_data(quandl_id):
    '''Download and cache Quandl dataseries'''
    cache_path = 'data/{}.pkl'.format(quandl_id).replace('/','-')
    try:
        f = open(cache_path, 'rb')
        df = pickle.load(f)   
        print('Loaded {} from cache'.format(quandl_id))
    except (OSError, IOError) as e:
        print('Downloading {} from Quandl'.format(quandl_id))
        df = quandl.get(quandl_id, authtoken="rGwyAH1yyw29yX8E1LQJ", returns="pandas")
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(quandl_id, cache_path))
    return df
    
def merge_dfs_on_column(dataframes, labels, col):
    '''Merge a single column of each dataframe into a new combined dataframe'''
    series_dict = {}
    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]
        
    return pd.DataFrame(series_dict)

In [3]:
exchanges = ['KRAKEN', 'COINBASE', 'BITFINEX']

exchange_data = {}

for exchange in exchanges:
    exchange_code = 'BCHARTS/{}USD'.format(exchange)
    btc_exchange_df = get_quandl_data(exchange_code)
    btc_exchange_df = btc_exchange_df.replace(0, np.NaN) # Wack Kraken values ˜
    exchange_data[exchange] = btc_exchange_df

print(exchange_data)

btc_usd_datasets = merge_dfs_on_column(list(exchange_data.values()), list(exchange_data.keys()), 'Weighted Price')
print(list(exchange_data['COINBASE'].columns))

Loaded BCHARTS/KRAKENUSD from cache
Loaded BCHARTS/COINBASEUSD from cache
Loaded BCHARTS/BITFINEXUSD from cache
{'KRAKEN':                   Open        High         Low       Close  Volume (BTC)  \
Date                                                                       
2014-01-07   874.67040   892.06753   810.00000   810.00000     15.622378   
2014-01-08   810.00000   899.84281   788.00000   824.98287     19.182756   
2014-01-09   825.56345   870.00000   807.42084   841.86934      8.158335   
2014-01-10   839.99000   857.34056   817.00000   857.33056      8.024510   
2014-01-11   858.20000   918.05471   857.16554   899.84105     18.748285   
2014-01-12   899.96114   900.93989   833.00001   860.00000     25.429433   
2014-01-13   847.32152   859.99999   815.00000   835.00000     25.869127   
2014-01-14   835.00000   877.29300   805.00000   831.00000     31.662881   
2014-01-15   831.00000   864.00000   828.00000   850.00364      6.707565   
2014-01-16   853.00000   865.00000   824.

In [48]:
# Merging data together to create one consistent set 
# Basically does a left join on all three data sets and gets the mean of all values
# Prevents weird discrepencies in the data 

new = exchange_data['KRAKEN'].join(exchange_data['COINBASE'], how='outer')
print(new)
# new = new.join(exchange_data['BITFINEX'], how='outer')
# new['new_open'] = new[['Open', 'Open_x', 'Open_y']].mean(axis=1)
# new['new_high'] = new[['High_x', 'High_y', 'High']].mean(axis=1)
# new['new_low'] = new[['Low_x', 'Low_y', 'Low']].mean(axis=1)
# new['new_close'] = new[['Close_x', 'Close_y', 'Close_y']].mean(axis=1)
# new['new_btc_volume'] = new[['Volume (BTC)_x', 'Volume (BTC)_y', 'Volume (BTC)']].mean(axis=1)
# new['new_currency_volume'] = new[['Volume (Currency)_x', 'Volume (Currency)_y', 'Volume (Currency)']].mean(axis=1)
# new['new_weighted_price'] = new[['Weighted Price_x', 'Weighted Price_y', 'Weighted Price']].mean(axis=1)

# df = new[['new_open', 'new_high', 'new_low', 'new_close', 'new_btc_volume', 'new_currency_volume', 'new_weighted_price']]

# print(df.head())

ValueError: columns overlap but no suffix specified: Index(['Open', 'High', 'Low', 'Close', 'Volume (BTC)', 'Volume (Currency)',
       'Weighted Price'],
      dtype='object')

In [53]:
# Merging data together to create one consistent set 
# Basically does a left join on all three data sets and gets the mean of all values
# Prevents weird discrepencies in the data 

new = pd.merge(exchange_data['KRAKEN'], exchange_data['COINBASE'], how='outer', left_index=True, right_index=True)
new = pd.merge(new, exchange_data['BITFINEX'], how='outer', left_index=True, right_index=True)
new['new_open'] = new[['Open', 'Open_x', 'Open_y']].mean(axis=1)
new['new_high'] = new[['High_x', 'High_y', 'High']].mean(axis=1)
new['new_low'] = new[['Low_x', 'Low_y', 'Low']].mean(axis=1)
new['new_close'] = new[['Close_x', 'Close_y', 'Close']].mean(axis=1)
new['new_btc_volume'] = new[['Volume (BTC)_x', 'Volume (BTC)_y', 'Volume (BTC)']].mean(axis=1)
new['new_currency_volume'] = new[['Volume (Currency)_x', 'Volume (Currency)_y', 'Volume (Currency)']].mean(axis=1)
new['new_weighted_price'] = new[['Weighted Price_x', 'Weighted Price_y', 'Weighted Price']].mean(axis=1)

df = new[['new_open', 'new_high', 'new_low', 'new_close', 'new_btc_volume', 'new_currency_volume', 'new_weighted_price']]

print(df.head())

              new_open    new_high    new_low   new_close  new_btc_volume  \
Date                                                                        
2013-03-31   93.250000  100.000000   93.03000   93.100000      390.827224   
2013-04-01   93.170000  105.900000   92.49999  102.370000     4919.654127   
2013-04-02  102.800000  118.388067   99.00000  117.989990     9084.832816   
2013-04-03  116.579097  146.880000  101.51088  134.952969    12909.402178   
2013-04-04  131.779686  143.000000  119.00000  132.681000     6910.100414   

            new_currency_volume  new_weighted_price  
Date                                                 
2013-03-31         3.708933e+04           94.899563  
2013-04-01         4.921010e+05          100.027564  
2013-04-02         9.874119e+05          108.687953  
2013-04-03         1.652092e+06          127.975861  
2013-04-04         9.094385e+05          131.610029  


In [54]:
btc_trace = go.Scatter(x=df.index, y=df['new_weighted_price'])
py.iplot([btc_trace])

In [72]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('time_%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('time_%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('time_%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

def create_rnn_set(data, look_back=1):
    df = pd.DataFrame(data)
    columns = [df.shift(i) for i in range(1, look_back + 1)]
    columns.append(df)
    df = pd.concat(columns, axis=1)
    df.fillna(0, inplace=True)
    return df

def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    print(len(dataY))
    return np.array(dataX), np.array(dataY)

In [None]:
values = df['new_weighted_price'].values

In [101]:
'''
Create a model based on 'weighed_average' variable.
''' 

# Normalize values 
values = df['new_weighted_price'].values.reshape(-1,1)
values = values.astype('float32')
# scaler = MinMaxScaler(feature_range=(0, 1))
# scaled = scaler.fit_transform(values)

look_back=1
train_size = int(len(values) * 0.7)
test_size = len(values) - train_size
train, test = values[0:train_size,:], values[train_size:len(values),:]

val = series_to_supervised(train, look_back).values
trainX, trainY = val[:,0:look_back], val[:, look_back]
trainX = trainX.reshape(trainX.shape[0], look_back, trainX.shape[1])
val = series_to_supervised(test, look_back).values
testX, testY = val[:,0:look_back], val[:, look_back]
testX = testX.reshape(testX.shape[0], look_back, testX.shape[1])

neurons = 1
model = Sequential()
model.add(LSTM(neurons, batch_input_shape=(1, trainX.shape[1], trainX.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(trainX, trainY, epochs=300, batch_size=1, validation_data=(testX, testY), verbose=2, shuffle=False)



Train on 1175 samples, validate on 504 samples
Epoch 1/300
8s - loss: 174050.9558 - val_loss: 5661962.0578
Epoch 2/300
6s - loss: 172737.3642 - val_loss: 5655426.2411
Epoch 3/300
6s - loss: 171425.7213 - val_loss: 5648894.8855
Epoch 4/300
6s - loss: 170120.2911 - val_loss: 5642376.3367
Epoch 5/300
6s - loss: 168822.5514 - val_loss: 5635871.5371
Epoch 6/300
6s - loss: 167532.7889 - val_loss: 5629381.1363
Epoch 7/300
6s - loss: 166251.0641 - val_loss: 5622905.4910
Epoch 8/300
7s - loss: 164977.3871 - val_loss: 5616443.9495
Epoch 9/300
8s - loss: 163711.7596 - val_loss: 5609998.1729
Epoch 10/300
8s - loss: 162454.1617 - val_loss: 5603566.4338
Epoch 11/300
7s - loss: 161204.5652 - val_loss: 5597149.8248
Epoch 12/300
7s - loss: 159962.9995 - val_loss: 5590747.9727
Epoch 13/300
7s - loss: 158729.4358 - val_loss: 5584360.7918
Epoch 14/300
7s - loss: 157503.8663 - val_loss: 5577989.0679
Epoch 15/300
7s - loss: 156286.2917 - val_loss: 5571632.4197
Epoch 16/300
7s - loss: 155076.6929 - val_loss:

7s - loss: 63704.7417 - val_loss: 4941530.7420
Epoch 136/300
7s - loss: 63321.3482 - val_loss: 4937586.9653
Epoch 137/300
7s - loss: 62942.9809 - val_loss: 4933667.7864
Epoch 138/300
6s - loss: 62569.6453 - val_loss: 4929772.1895
Epoch 139/300
7s - loss: 62201.2852 - val_loss: 4925901.4347
Epoch 140/300
7s - loss: 61837.9293 - val_loss: 4922054.9829
Epoch 141/300
7s - loss: 61479.4748 - val_loss: 4918233.8830
Epoch 142/300
7s - loss: 61125.9027 - val_loss: 4914435.9747
Epoch 143/300
7s - loss: 60777.1057 - val_loss: 4910663.5061
Epoch 144/300
8s - loss: 60433.1607 - val_loss: 4906915.0461
Epoch 145/300
7s - loss: 60093.8788 - val_loss: 4903190.5455
Epoch 146/300
7s - loss: 59759.2718 - val_loss: 4899490.2981
Epoch 147/300
7s - loss: 59429.2283 - val_loss: 4895814.7955
Epoch 148/300
6s - loss: 59103.9874 - val_loss: 4892162.9692
Epoch 149/300
6s - loss: 58783.1355 - val_loss: 4888535.5858
Epoch 150/300
6s - loss: 58466.8585 - val_loss: 4884933.2001
Epoch 151/300
6s - loss: 58155.1080 - 

7s - loss: 41699.4163 - val_loss: 4609912.4782
Epoch 271/300
6s - loss: 41658.7471 - val_loss: 4608668.2858
Epoch 272/300
7s - loss: 41618.8067 - val_loss: 4607437.5131
Epoch 273/300
6s - loss: 41579.5990 - val_loss: 4606218.0875
Epoch 274/300
7s - loss: 41541.0568 - val_loss: 4605009.9793
Epoch 275/300
7s - loss: 41503.2209 - val_loss: 4603814.5701
Epoch 276/300
7s - loss: 41466.0873 - val_loss: 4602631.1307
Epoch 277/300
7s - loss: 41429.6569 - val_loss: 4601460.3260
Epoch 278/300
7s - loss: 41393.8783 - val_loss: 4600300.6097
Epoch 279/300
7s - loss: 41358.7106 - val_loss: 4599152.2315
Epoch 280/300
7s - loss: 41324.1812 - val_loss: 4598015.9978
Epoch 281/300
6s - loss: 41290.2890 - val_loss: 4596891.1166
Epoch 282/300
6s - loss: 41257.0294 - val_loss: 4595776.5970
Epoch 283/300
6s - loss: 41224.3319 - val_loss: 4594673.1744
Epoch 284/300
7s - loss: 41192.2608 - val_loss: 4593582.1065
Epoch 285/300
8s - loss: 41160.8051 - val_loss: 4592503.1777
Epoch 286/300
7s - loss: 41129.9276 - 

In [None]:
def forecast(model, batch_size, row):
    X = row[0:-1]
    X = X.reshape(1, 1, len(X))
    yhat = model.predict(X, batch_size=batch_size)
    return yhat[0,0]

model.predict()

In [84]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
#         print(i, a, dataset[i + look_back, 0])

    return np.array(dataX), np.array(dataY)


np.random.seed(7)
# load the dataset
dataset = df.values
dataset = dataset.astype('float32')

# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

# # reshape dataset
look_back = 3
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
print(testX[0:10, :], testY[0:10])

# create and fit Multilayer Perceptron model
# model = Sequential()
# model.add(Dense(12, input_dim=look_back, activation='relu'))
# model.add(LSTM(8, activation='relu'))
# model.add(Dense(1))
# model.compile(loss='mean_squared_error', optimizer='adam')
# model.fit(trainX, trainY, epochs=200, batch_size=2, verbose=0)

# trainScore = model.evaluate(trainX, trainY, verbose=0)
# print('Train Score: %.2f MSE (%.2f RMSE)' % (trainScore, math.sqrt(trainScore)))
# testScore = model.evaluate(testX, testY, verbose=0)
# print('Test Score: %.2f MSE (%.2f RMSE)' % (testScore, math.sqrt(testScore)))

# trainPredict = model.predict(trainX)
# testPredict = model.predict(testX)

# trainPredictPlot = np.empty_like(dataset)
# trainPredictPlot[:, :] = np.nan
# trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

# testPredictPlot = np.empty_like(dataset)
# testPredictPlot[:, :] = np.nan
# testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict


[[ 457.41665649  449.90466309  453.48001099]
 [ 449.90466309  453.48001099  445.02398682]
 [ 453.48001099  445.02398682  451.31332397]
 [ 445.02398682  451.31332397  447.85934448]
 [ 451.31332397  447.85934448  449.00665283]
 [ 447.85934448  449.00665283  461.07998657]
 [ 449.00665283  461.07998657  460.41665649]
 [ 461.07998657  460.41665649  460.18667603]
 [ 460.41665649  460.18667603  462.36700439]
 [ 460.18667603  462.36700439  452.01300049]] [ 445.02398682  451.31332397  447.85934448  449.00665283  461.07998657
  460.41665649  460.18667603  462.36700439  452.01300049  453.37667847]


In [71]:
# plt.figure(1)
# plt.plot(dataset)
# plt.figure(2)
# plt.plot(trainPredictPlot)
# plt.plot(trainX)
# plt.figure(3)
# plt.plot(testPredictPlot)
# plt.plot(testX)
# plt.figure(4)
# plt.plot(testPredict)
# plt.show()

In [None]:
print(testX[-1][-1])
print(testX[-2][-1])
print(testX[-3][-1])
print(testX[-4][-1])
print(testX[-5][-1])

print(testPredict[450:455][::-1])