In [244]:
import pandas as pd
import keras
import numpy as np
from itertools import islice
from sklearn.model_selection import train_test_split
from stldecompose import decompose
from matplotlib import pyplot
from keras.models import model_from_json
#https://stackoverflow.com/questions/48356464/how-to-model-convolutional-recurrent-network-crnn-in-keras
#https://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/
#https://stackoverflow.com/questions/43034960/many-to-one-and-many-to-many-lstm-examples-in-keras
#https://github.com/keras-team/keras/issues/6063
#https://stackoverflow.com/questions/34357617/append-2d-array-to-3d-array-extending-third-dimension

In [245]:
def rolling_window(a, window, step_size):
    shape = a.shape[:-1] + (a.shape[-1] - window +2  - step_size, window)
    strides = a.strides + (a.strides[-1] * step_size,)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

In [246]:
def smape(A, F):
    return 100/len(A) * np.sum(2 * np.abs(F - A) / (np.abs(A) + np.abs(F)))

In [247]:
in_win = 15
out_win = 3

### CMC Data Processing

In [248]:
# data_bt = pd.read_csv("cmc_btc.csv")
# data_bt['Date'] = pd.to_datetime(data_bt['Date'])
# data_bt = data_bt.sort_values(by=['Date']).reset_index(drop=True)
# data_bt.head()

In [249]:
# date_col = pd.to_datetime(data_bt["Date"])
# data_open = data_bt["Open*"]
# data_high = data_bt["High"]
# data_low = data_bt["Low"]
# data_close = data_bt["Close**"]
# data_vol = data_bt["Volume"]
# data_mark = data_bt["Market Cap"]

### BITFINEX DATA PROCESSING

In [250]:
data_bt = pd.read_csv("BITFINEX_SPOT_BTC_USD/1DAY_2013-03-31&2018-09-20.csv")
data_bt['time_close'] = pd.to_datetime(data_bt['time_close'])
data_bt = data_bt.sort_values(by=['time_close']).reset_index(drop=True)
data_bt.head()

Unnamed: 0,price_open,price_high,price_low,price_close,volume_traded,trades_count,time_close,time_open,time_period_start,time_period_end,market_cap
0,93.25,100.0,93.03,93.10001,390.827224,55,2013-03-31 23:36:44,2013-03-31T22:07:48.0000000Z,2013-03-31T22:00:00.000000Z,2013-04-01T00:00:00.0000000Z,
1,93.17,105.9,92.49999,102.37,4919.654127,627,2013-04-01 23:41:42,2013-04-01T00:05:39.0000000Z,2013-04-01T00:00:00.000000Z,2013-04-02T00:00:00.0000000Z,
2,102.8,118.388067,99.0,117.98999,9084.832816,1205,2013-04-02 23:54:35,2013-04-02T00:00:11.0000000Z,2013-04-02T00:00:00.000000Z,2013-04-03T00:00:00.0000000Z,
3,116.579097,146.88,101.51088,134.952969,12909.402178,2502,2013-04-03 23:59:37,2013-04-03T00:07:29.0000000Z,2013-04-03T00:00:00.000000Z,2013-04-04T00:00:00.0000000Z,
4,131.779686,143.0,119.0,132.681,6910.100414,1456,2013-04-04 23:50:09,2013-04-04T00:02:15.0000000Z,2013-04-04T00:00:00.000000Z,2013-04-05T00:00:00.0000000Z,


In [251]:
data_bt.dropna(inplace=True)
data_bt.reset_index(drop=True, inplace=True)
data_bt.isna().any()

price_open           False
price_high           False
price_low            False
price_close          False
volume_traded        False
trades_count         False
time_close           False
time_open            False
time_period_start    False
time_period_end      False
market_cap           False
dtype: bool

In [252]:
date_col = pd.to_datetime(data_bt["time_close"])
data_open = data_bt["price_open"]
data_high = data_bt["price_high"]
data_low = data_bt["price_low"]
data_close = data_bt["price_close"]
data_vol = data_bt["volume_traded"]
data_mark = data_bt["market_cap"]

### Log Transformation

In [253]:
log_open = np.log(data_open)
log_high = np.log(data_high)
log_low = np.log(data_low)
log_close = np.log(data_close)
log_vol = np.log(data_vol)
log_mark = np.log(data_mark)

### STL Decompostion

In [254]:
log_open = pd.DataFrame(log_open)
log_open = log_open.set_index(date_col)

log_high = pd.DataFrame(log_high)
log_high = log_high.set_index(date_col)

log_low = pd.DataFrame(log_low)
log_low = log_low.set_index(date_col)

log_close = pd.DataFrame(log_close)
log_close = log_close.set_index(date_col)

log_vol = pd.DataFrame(log_vol)
log_vol = log_vol.set_index(date_col)

log_mark = pd.DataFrame(log_mark)
log_mark = log_mark.set_index(date_col)

In [255]:
stl_open = decompose(log_open)
stl_high = decompose(log_high)
stl_low = decompose(log_low)
stl_close = decompose(log_close)
stl_vol = decompose(log_vol)
stl_mark = decompose(log_mark)

### Deasonal TS component

In [256]:
deseason_open  = (stl_open.resid + stl_open.trend).iloc[:,0]
deseason_high  = (stl_high.resid + stl_high.trend).iloc[:,0]
deseason_low   = (stl_low.resid + stl_low.trend).iloc[:,0]
deseason_close = (stl_close.resid + stl_close.trend).iloc[:,0]
deseason_vol   = (stl_vol.resid + stl_vol.trend).iloc[:,0]
deseason_mark  = (stl_mark.resid + stl_mark.trend).iloc[:,0]

### Input Normalized Window

In [257]:
date_col_win_in = rolling_window(date_col[0:(len(data_bt)-out_win)], in_win, 1)
date_col_win_in_exp = np.expand_dims(date_col_win_in, axis=2)

data_open_win_in = rolling_window(deseason_open[0:(len(deseason_open)-out_win)], in_win, 1)
data_open_win_in = pd.DataFrame(data_open_win_in) 
norm_open_win_in = data_open_win_in.subtract(data_open_win_in.iloc[:,in_win-1], axis='index')
norm_open_win_in_exp = np.expand_dims(norm_open_win_in.values, axis=2)

data_high_win_in = rolling_window(deseason_high[0:(len(deseason_high)-out_win)], in_win, 1)
data_high_win_in = pd.DataFrame(data_high_win_in) 
norm_high_win_in = data_high_win_in.subtract(data_high_win_in.iloc[:,in_win-1], axis='index')
norm_high_win_in_exp = np.expand_dims(norm_high_win_in.values, axis=2)

data_low_win_in = rolling_window(deseason_low[0:(len(deseason_low)-out_win)], in_win, 1)
data_low_win_in = pd.DataFrame(data_low_win_in) 
norm_low_win_in = data_low_win_in.subtract(data_low_win_in.iloc[:,in_win-1], axis='index')
norm_low_win_in_exp = np.expand_dims(norm_low_win_in.values, axis=2)

data_close_win_in = rolling_window(deseason_close[0:(len(deseason_close)-out_win)], in_win, 1)
data_close_win_in = pd.DataFrame(data_close_win_in) 
norm_close_win_in = data_close_win_in.subtract(data_close_win_in.iloc[:,in_win-1], axis='index')
norm_close_win_in_exp = np.expand_dims(norm_close_win_in.values, axis=2)

data_vol_win_in = rolling_window(deseason_vol[0:(len(deseason_vol)-out_win)], in_win, 1)
data_vol_win_in = pd.DataFrame(data_vol_win_in) 
norm_vol_win_in = data_vol_win_in.subtract(data_vol_win_in.iloc[:,in_win-1], axis='index')
norm_vol_win_in_exp = np.expand_dims(norm_vol_win_in.values, axis=2)

data_mark_win_in = rolling_window(deseason_mark[0:(len(deseason_mark)-out_win)], in_win, 1)
data_mark_win_in = pd.DataFrame(data_mark_win_in) 
norm_mark_win_in = data_mark_win_in.subtract(data_mark_win_in.iloc[:,in_win-1], axis='index')
norm_mark_win_in_exp = np.expand_dims(norm_mark_win_in.values, axis=2)

  app.launch_new_instance()


### Orignal Signal

In [258]:
date_col_win_all = rolling_window(date_col[0:(len(data_bt)-out_win)], in_win, 1)
date_col_win_all = pd.DataFrame(date_col_win_all)

data_open_win_all = rolling_window(data_open[0:(len(data_open)-out_win)], in_win, 1)
data_open_win_all = pd.DataFrame(data_open_win_all) 

data_high_win_all = rolling_window(data_high[0:(len(data_high)-out_win)], in_win, 1)
data_high_win_all = pd.DataFrame(data_high_win_all) 


data_low_win_all = rolling_window(data_low[0:(len(data_low)-out_win)], in_win, 1)
data_low_win_all = pd.DataFrame(data_low_win_all) 


data_close_win_all = rolling_window(data_close[0:(len(data_close)-out_win)], in_win, 1)
data_close_win_all = pd.DataFrame(data_close_win_all) 


data_vol_win_all = rolling_window(data_vol[0:(len(data_vol)-out_win)], in_win, 1)
data_vol_win_all = pd.DataFrame(data_vol_win_all) 


data_mark_win_all = rolling_window(data_mark[0:(len(data_mark)-out_win)], in_win, 1)
data_mark_win_all = pd.DataFrame(data_mark_win_all) 

  app.launch_new_instance()


### Labels :-   "Up : 1"   &   "Down : 0"

In [259]:
true_labels = []

for i in range(1,len(data_close_win_all)):
    if (data_close_win_all.iloc[i,][in_win-1] > data_close_win_all.iloc[i-1,][in_win-1]):
        true_labels.append(1)
    else:
        true_labels.append(0)
        


### Stacking Data for Training in High Dimension

In [260]:
x_all = np.dstack((norm_open_win_in_exp, norm_high_win_in_exp, norm_low_win_in_exp
                   ,norm_close_win_in_exp, norm_vol_win_in_exp, norm_mark_win_in_exp))
x_all = x_all[:len(x_all)-1,:,:]
x_all.shape

(1942, 15, 6)

In [261]:
y_all = pd.DataFrame(true_labels)

In [262]:
y_all.head()

Unnamed: 0,0
0,1
1,0
2,1
3,1
4,1


### Train Validation Test Split

In [263]:
#X_train, X_test, y_train,  = train_test_split(x_all, y_all, test_size=0.10,shuffle=False)

msk = np.random.rand(len(x_all)) < 0.81

X_train = x_all[msk]
X_test = x_all[~msk]

Y_train = y_all[msk]
Y_test = y_all[~msk]

print(X_test.shape, X_train.shape,len(Y_train),len(Y_test))

(352, 15, 6) (1590, 15, 6) 1590 352


### Building Deep Learning Architecture using Keras

In [264]:
from keras import Sequential
from keras.layers import LSTM, RepeatVector, TimeDistributed, Dense, Activation,Permute,Conv1D,GaussianNoise,Dropout,regularizers,Conv2D,Reshape
from keras.optimizers import SGD, nadam,adam
from keras.callbacks import ReduceLROnPlateau,EarlyStopping

In [265]:
batch_size = 32
model = Sequential() 
model.add(LSTM(256, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences = True))
model.add(LSTM(128))
model.add(Dropout(0.2))
# Add fully connected layer with a ReLU activation function

# Add fully connected layer with a ReLU activation function
model.add(Dense(units=64, activation='relu'))

# Add fully connected layer with a sigmoid activation function

model.add(Dense(units=1, activation='sigmoid'))
nadam = nadam(lr = 0.02)
model.compile(loss='binary_crossentropy', optimizer="nadam", metrics=['accuracy'])  

In [266]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_34 (LSTM)               (None, 15, 256)           269312    
_________________________________________________________________
lstm_35 (LSTM)               (None, 128)               197120    
_________________________________________________________________
dropout_15 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_64 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_65 (Dense)             (None, 1)                 65        
Total params: 474,753
Trainable params: 474,753
Non-trainable params: 0
_________________________________________________________________


### Training Model

In [267]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss',patience=3, min_lr=0.00001,factor=0.1)
# for i in range(40):
#     model.fit(X_train, Y_train,
#               epochs=1,
#               batch_size=batch_size,
#               verbose=2,
#               validation_data = (X_test, Y_test))
#     model.reset_states()

#early_stopping = EarlyStopping(monitor='val_loss', patience=5)
model.fit(X_train, Y_train,
          epochs=50,
          batch_size=batch_size,
          verbose=2,
          callbacks=[reduce_lr],
          validation_data = (X_test, Y_test))

# Save the weights
model.save_weights('model_weights.h5')

# Save the model architecture
with open('model_architecture.json', 'w') as f:
    f.write(model.to_json())


Train on 1590 samples, validate on 352 samples
Epoch 1/50
 - 11s - loss: 0.6960 - acc: 0.5157 - val_loss: 0.6924 - val_acc: 0.5227
Epoch 2/50
 - 6s - loss: 0.6923 - acc: 0.5403 - val_loss: 0.6929 - val_acc: 0.5227
Epoch 3/50
 - 7s - loss: 0.6932 - acc: 0.5264 - val_loss: 0.6946 - val_acc: 0.5227
Epoch 4/50
 - 7s - loss: 0.6906 - acc: 0.5371 - val_loss: 0.6927 - val_acc: 0.5227
Epoch 5/50
 - 7s - loss: 0.6894 - acc: 0.5377 - val_loss: 0.6928 - val_acc: 0.5227
Epoch 6/50
 - 7s - loss: 0.6887 - acc: 0.5384 - val_loss: 0.6926 - val_acc: 0.5227
Epoch 7/50
 - 7s - loss: 0.6886 - acc: 0.5428 - val_loss: 0.6921 - val_acc: 0.5256
Epoch 8/50
 - 7s - loss: 0.6877 - acc: 0.5403 - val_loss: 0.6923 - val_acc: 0.5312
Epoch 9/50
 - 7s - loss: 0.6876 - acc: 0.5447 - val_loss: 0.6918 - val_acc: 0.5256
Epoch 10/50
 - 7s - loss: 0.6872 - acc: 0.5503 - val_loss: 0.6924 - val_acc: 0.5256
Epoch 11/50
 - 7s - loss: 0.6869 - acc: 0.5528 - val_loss: 0.6916 - val_acc: 0.5284
Epoch 12/50
 - 7s - loss: 0.6855 - ac

In [268]:
# Model reconstruction from JSON file
with open('model_architecture.json', 'r') as f:
    model = model_from_json(f.read())

# Load weights into the new model
model.load_weights('model_weights.h5')

### Carrying out Prediction on test data set

In [269]:
y_pred = []
for i in range(0,len(X_test)):
    pred = model.predict(X_test[i:(i+1),:,:])
    y_pred.append(pred[0])

In [270]:
y_pred = pd.DataFrame(y_pred).values

### Renormalizing and seasonalizing the predictions and true labels

In [271]:
season_high  = pd.DataFrame(rolling_window(stl_high.seasonal.iloc[:,0][in_win:len(stl_high.seasonal)], out_win, 1))
season_low   = pd.DataFrame(rolling_window(stl_low.seasonal.iloc[:,0][in_win:len(stl_low.seasonal)], out_win, 1))
season_close = pd.DataFrame(rolling_window(stl_close.seasonal.iloc[:,0][in_win:len(stl_close.seasonal)], out_win, 1))

  app.launch_new_instance()


In [272]:
season_high_pr  = pd.DataFrame(rolling_window(stl_high.seasonal.iloc[:,0][0:(len(stl_high.seasonal)-out_win)], in_win, 1))
season_high_pr.drop(season_high_pr.columns[0:(in_win-out_win)], axis=1, inplace=True)
season_high_pr.columns = np.arange(len(season_high_pr.columns))
season_low_pr   = pd.DataFrame(rolling_window(stl_low.seasonal.iloc[:,0][0:(len(stl_low.seasonal)-out_win)], in_win, 1))
season_low_pr.drop(season_low_pr.columns[0:(in_win-out_win)], axis=1, inplace=True)
season_low_pr.columns = np.arange(len(season_low_pr.columns))
season_close_pr = pd.DataFrame(rolling_window(stl_close.seasonal.iloc[:,0][0:(len(stl_close.seasonal)-out_win)], in_win, 1))
season_close_pr.drop(season_close_pr.columns[0:(in_win-out_win)], axis=1, inplace=True)
season_close_pr.columns = np.arange(len(season_close_pr.columns))

  app.launch_new_instance()


In [273]:
X_train.shape[0]

1590

#### Evaluating Predictions

In [279]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score,confusion_matrix

In [275]:
y_pred[y_pred < 0.5] = 0
y_pred[y_pred >= 0.5] = 1

In [276]:
precision_recall_fscore_support(Y_test,y_pred)

(array([0.51315789, 0.5326087 ]),
 array([0.23214286, 0.79891304]),
 array([0.31967213, 0.63913043]),
 array([168, 184], dtype=int64))

In [277]:
accuracy_score(Y_test,y_pred)

0.5284090909090909

In [282]:
tn, fp, fn, tp = confusion_matrix(Y_test,y_pred).ravel()

In [283]:
(tn, fp, fn, tp)

(39, 129, 37, 147)