In [221]:
# Adi Pradhan
# ZTDL Bootcamp Sep 2018
# Predicting the Price of BTC using an LSTM 
%reset -f
#libraries

import os
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import warnings
from keras.utils import to_categorical
warnings.filterwarnings('ignore')

In [222]:
# read the csvs
df1 = pd.read_csv('data/data.part1.csv',index_col=0)
df2 = pd.read_csv('data/data.part2.csv',index_col=0)
df = pd.concat([df1,df2])

In [223]:
# create a Pandas Series with index as the date and data as the mean price in that day
df['timestamp_hour'] = pd.to_datetime(df['Timestamp'],unit='s')
df['timestamp_hour'] = pd.to_datetime(df['timestamp_hour'])
df = df.set_index(['timestamp_hour'])
df_hourly = df.resample('H').mean()
df_hourly = df_hourly[['Weighted_Price']]
df_hourly = df_hourly.dropna()

In [224]:
df_hourly['Next_Weighted_Price'] = df_hourly['Weighted_Price'].shift(-1)
df_hourly = df_hourly[1:]
df_hourly = df_hourly[:-1]


In [225]:
df_hourly.head()

Unnamed: 0_level_0,Weighted_Price,Next_Weighted_Price
timestamp_hour,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-12-01 06:00:00,342.0,370.0
2014-12-01 07:00:00,370.0,370.0
2014-12-01 08:00:00,370.0,370.0
2014-12-01 09:00:00,370.0,370.0
2014-12-01 10:00:00,370.0,370.0


In [226]:
df_hourly.tail()

Unnamed: 0_level_0,Weighted_Price,Next_Weighted_Price
timestamp_hour,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-06-26 19:00:00,6172.690783,6183.871078
2018-06-26 20:00:00,6183.871078,6187.186309
2018-06-26 21:00:00,6187.186309,6155.090729
2018-06-26 22:00:00,6155.090729,6113.676832
2018-06-26 23:00:00,6113.676832,6073.96996


In [227]:
df_hourly['y'] = df_hourly.apply(lambda row: 1 if row['Next_Weighted_Price'] > row['Weighted_Price'] else 0,axis=1)

In [228]:
# most interesting man (coin flip prediction)
df_hourly['coin_flip'] = np.random.randint(0,2,size=df_hourly.shape[0])

# yoda (always HODL - optimist)
df_hourly['optimistic_prediction'] = 1

df_hourly.head()

Unnamed: 0_level_0,Weighted_Price,Next_Weighted_Price,y,coin_flip,optimistic_prediction
timestamp_hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-12-01 06:00:00,342.0,370.0,1,1,1
2014-12-01 07:00:00,370.0,370.0,0,0,1
2014-12-01 08:00:00,370.0,370.0,0,1,1
2014-12-01 09:00:00,370.0,370.0,0,0,1
2014-12-01 10:00:00,370.0,370.0,0,0,1


In [229]:
df_hourly['coin_flip'].value_counts()/len(df_hourly)

0    0.500808
1    0.499192
Name: coin_flip, dtype: float64

Therefore the coin flip has 50.4% accuracy

In [230]:
df_hourly['y'].value_counts()/len(df_hourly)

1    0.529827
0    0.470173
Name: y, dtype: float64

Therefore the optimistic view has 51% accuracy

In [231]:
# Get the right series for prediction 
X = df_hourly[['Weighted_Price']]
y = df_hourly['y']

In [254]:
# for train test split we will take Mar, Apr and May 2018 as the test data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,shuffle=False,test_size=0.2)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(24260, 1)
(6065, 1)
(24260,)
(6065,)


In [255]:
from sklearn.preprocessing import MinMaxScaler
mx = MinMaxScaler()
X_train_rs = X_train.values.reshape(-1,1)
X_test_rs = X_test.values.reshape(-1,1)
X_train_sc = mx.fit_transform(X_train_rs)
X_test_sc = mx.transform(X_test_rs)

In [256]:
X_train_sc_df = pd.DataFrame(X_train_sc,columns=['Price'])
X_test_sc_df = pd.DataFrame(X_test_sc,columns=['Price'])
X_train_sc_df.describe()
print(X_train_sc_df.shape[1:])

(1,)


In [257]:
window_size = 30 # days
for s in np.arange(1,window_size+1):
    X_train_sc_df['shift_{}'.format(s)] = X_train_sc_df['Price'].shift(s)
    X_test_sc_df['shift_{}'.format(s)] = X_test_sc_df['Price'].shift(s)

In [258]:
X_train_sc_df.head(n=30)

Unnamed: 0,Price,shift_1,shift_2,shift_3,shift_4,shift_5,shift_6,shift_7,shift_8,shift_9,...,shift_21,shift_22,shift_23,shift_24,shift_25,shift_26,shift_27,shift_28,shift_29,shift_30
0,0.0346,,,,,,,,,,...,,,,,,,,,,
1,0.039518,0.0346,,,,,,,,,...,,,,,,,,,,
2,0.039518,0.039518,0.0346,,,,,,,,...,,,,,,,,,,
3,0.039518,0.039518,0.039518,0.0346,,,,,,,...,,,,,,,,,,
4,0.039518,0.039518,0.039518,0.039518,0.0346,,,,,,...,,,,,,,,,,
5,0.039518,0.039518,0.039518,0.039518,0.039518,0.0346,,,,,...,,,,,,,,,,
6,0.039518,0.039518,0.039518,0.039518,0.039518,0.039518,0.0346,,,,...,,,,,,,,,,
7,0.039518,0.039518,0.039518,0.039518,0.039518,0.039518,0.039518,0.0346,,,...,,,,,,,,,,
8,0.039518,0.039518,0.039518,0.039518,0.039518,0.039518,0.039518,0.039518,0.0346,,...,,,,,,,,,,
9,0.039518,0.039518,0.039518,0.039518,0.039518,0.039518,0.039518,0.039518,0.039518,0.0346,...,,,,,,,,,,


In [260]:
X_train_final = X_train_sc_df.values.reshape(-1, 31, 1)
#X_train_final = X_train_final[30:]
#y_train = y_train[30:]
y_train = to_categorical(y_train.values)
y_test = to_categorical(y_test.values)
X_test_final = X_test_sc_df.values.reshape(-1,31,1)

In [261]:
X_train.shape

(24260, 1)

In [286]:
from keras.layers import Dense, CuDNNLSTM, Dropout
from keras import Sequential
from keras.callbacks import EarlyStopping

model = Sequential()
model.add(CuDNNLSTM(12,input_shape=(31,1)))
model.add(Dropout(0.2))
model.add(Dense(2,activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_14 (CuDNNLSTM)    (None, 12)                720       
_________________________________________________________________
dropout_19 (Dropout)         (None, 12)                0         
_________________________________________________________________
dense_14 (Dense)             (None, 2)                 26        
Total params: 746
Trainable params: 746
Non-trainable params: 0
_________________________________________________________________


In [287]:
model.compile(optimizer='adam',loss='categorical_crossentropy')
early_stop = EarlyStopping(monitor='loss',patience=3,verbose=1)

In [288]:
h = model.fit(X_train_final,y_train,epochs=200,batch_size=100,callbacks=[early_stop],verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 00005: early stopping


In [289]:
model.evaluate(X_train_final,y_train)



1.1920930376163597e-07

In [290]:

model.evaluate(X_test_final,y_test)

ValueError: Error when checking target: expected dense_14 to have 2 dimensions, but got array with shape (6065, 2, 2)

In [271]:
y_pred = model.predict(X_test_final)
y_pred.shape

(6065, 2)

In [280]:
y_pred_classes = np.argmax(y_pred,axis=1)

In [282]:
y_pred

array([[0.4689403, 0.5310597],
       [0.4689403, 0.5310597],
       [0.4689403, 0.5310597],
       ...,
       [0.4689403, 0.5310597],
       [0.4689403, 0.5310597],
       [0.4689403, 0.5310597]], dtype=float32)