**Dependencies**

In [2]:
import pandas as pd 
import numpy as np
import sys

from tensorflow import keras
from sklearn.model_selection import GridSearchCV
from sklearn import svm
from sklearn.metrics import confusion_matrix

sys.path.insert(0, '../functions/')

from train_test_models import *
from preprocess_data import preprocess_data

**Data Preprocessing**

In [3]:
# Dataset
timeframe = "1h"
data = pd.read_csv(f"../data/BTC_EUR-{timeframe}.csv")
data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='ms')
data.set_index(keys='Timestamp', inplace=True)

# Parameters
shift_days = 10
window = 24
value_to_predict = 'Close'

# Preprocess Data
train_x, val_x, test_x, train_y, val_y, test_y = preprocess_data(data, k=shift_days, column=value_to_predict, window=window)

print('Train shape: {}\nTest shape: {}\nBalanced Train: {}'
    .format(train_x.shape, test_x.shape, train_y.sum() == train_x.shape[0]/2))

131it [00:12, 10.76it/s]


Train shape: (12578, 24, 270)
Test shape: (3393, 24, 270)
Balanced Train: True


**Baseline**

In [12]:
# Baseline classifier
base_x = train_x.reshape(train_x.shape[0], -1)
classifier = svm.SVC()
classifier.fit(base_x, train_y[:,0])

# Predictions
y_hat = classifier.predict(test_x.reshape(test_x.shape[0], -1))

buys = test_y.sum()
holds = len(test_y)-test_y.sum()

cm = confusion_matrix(test_y, y_hat)
FP = cm.sum(axis=0) - np.diag(cm)  
FN = cm.sum(axis=1) - np.diag(cm)
TP = np.diag(cm)
TN = cm.sum() - (FP + FN + TP)

# Print stats
print('Buys: {:d}, Holds: {:d}'.format(buys, holds))
print("True Buys: {:d}, False Buys: {:d}, True Holds: {:d}, False Holds: {:d}".format(TP[0],FP[0],TN[0],FN[0]))
print("Accuracy: {:.4f}".format(np.count_nonzero(((y_hat == test_y[:,0])))/len(test_y)))

Buys: 1703, Holds: 1690
True Buys: 752, False Buys: 617, True Holds: 1086, False Holds: 938
Accuracy: 0.5417


**Network Training**

In [4]:
# Train Networks
num_nets = 3

model = list()
for i in range(num_nets):
    model.append(train_model(net=i, 
                            train_data=(train_x, train_y), 
                            batch_size=128, 
                            epochs=20, 
                            loss='binary_crossentropy',
                            verbose=0))

**Network Testing**

In [5]:
for i in range(num_nets):
    test_model(model[i], test_x, test_y, only_accuracy=False)

Buys: 1703, Holds: 1690
True Buys: 851, False Buys: 786, True Holds: 917, False Holds: 839
Accuracy: 0.5211
Buys: 1703, Holds: 1690
True Buys: 833, False Buys: 734, True Holds: 969, False Holds: 857
Accuracy: 0.5311
Buys: 1703, Holds: 1690
True Buys: 813, False Buys: 773, True Holds: 930, False Holds: 877
Accuracy: 0.5137


**Feature Extractors**

In [6]:
features = list()
features_x = list()
for i in range(num_nets):
    features.append(keras.Model(
        inputs=model[i].inputs, 
        outputs=model[i].layers[-4].output
    ))
    features_x.append(features[i](train_x).numpy())

features_conc = np.concatenate((features_x), axis=1)
features_conc.shape

(12578, 768)

**SVM Training**

In [7]:
param_grid = {'C': [0.01, 0.1, 1, 10, 100], 'kernel': ['rbf', 'linear','sigmoid']}
classifier = GridSearchCV(svm.SVC(class_weight='balanced'), param_grid)
classifier = classifier.fit(features_conc, train_y[:,0])
print("Best estimator found by grid search:")
print(classifier.best_estimator_)

Best estimator found by grid search:
SVC(C=0.1, kernel='linear')


In [11]:
y_hat = classifier.predict(features_conc)

buys = train_y.sum()
holds = len(train_y)-train_y.sum()

cm = confusion_matrix(train_y, y_hat)
FP = cm.sum(axis=0) - np.diag(cm)  
FN = cm.sum(axis=1) - np.diag(cm)
TP = np.diag(cm)
TN = cm.sum() - (FP + FN + TP)


print('Buys: {:d}, Holds: {:d}'.format(buys, holds))
print("True Buys: {:d}, False Buys: {:d}, True Holds: {:d}, False Holds: {:d}".format(TP[0],FP[0],TN[0],FN[0]))
print("Accuracy: {:.4f}".format(np.count_nonzero(((y_hat == train_y[:,0])))/len(train_y)))

Buys: 6289, Holds: 6289
True Buys: 6070, False Buys: 223, True Holds: 6066, False Holds: 219
Accuracy: 0.9649


**SVM Testing**

In [8]:
test_features = list()

for i in range(num_nets):
    test_features.append(features[i](test_x).numpy())

test_con = np.concatenate((test_features), axis =  1)

y_hat = classifier.predict(test_con)

buys = test_y.sum()
holds = len(test_y)-test_y.sum()

cm = confusion_matrix(test_y, y_hat)
FP = cm.sum(axis=0) - np.diag(cm)  
FN = cm.sum(axis=1) - np.diag(cm)
TP = np.diag(cm)
TN = cm.sum() - (FP + FN + TP)


print('Buys: {:d}, Holds: {:d}'.format(buys, holds))
print("True Buys: {:d}, False Buys: {:d}, True Holds: {:d}, False Holds: {:d}".format(TP[0],FP[0],TN[0],FN[0]))
print("Accuracy: {:.4f}".format(np.count_nonzero(((y_hat == test_y[:,0])))/len(test_y)))

Buys: 1703, Holds: 1690
True Buys: 795, False Buys: 682, True Holds: 1021, False Holds: 895
Accuracy: 0.5352
