In [4]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

import matplotlib.pyplot as plt

import numpy as np

from sklearn.preprocessing import StandardScaler

np.random.seed(7)

In [51]:
def validate_model(cls_name, model, X_train, y_train, X_test, y_test, period, pair, lag):
    info_format = '{3:.3f}: [period: {0}](lag: {1}): {2} | {4}'
    
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    print(info_format.format(period, lag, cls_name, score, pair))
    return score

def test_cls(period, pair, lag):
    df = pd.read_csv('../datasets/{}/{}.csv'.format(period, pair))
    close_lag_fields = ['close-{}'.format(n+1) for n in range(lag)]
    volume_lag_fields = ['volume-{}'.format(n+1) for n in range(lag)]
    df['datetime'] = pd.to_datetime(df['datetime'], unit='s')
    df['hour'] = df['datetime'].dt.hour
    df['dow'] = df['datetime'].dt.weekday
    df = pd.get_dummies(df, columns=['dow', 'hour'])
    dow_fields = [col for col in df if col.startswith('dow')]
    hour_fields = [col for col in df if col.startswith('hour')]
    for n in range(1, lag+1):
        df['close-' + str(n)] = df['close'] - df.shift(n)['close']
        df['volume-' + str(n)] = df['volume'] - df.shift(n)['volume']
    df['up'] = df['close'] < df.shift(-1)['close']
    df.dropna(inplace=True)
    X = df[close_lag_fields + volume_lag_fields + dow_fields + hour_fields].values
    y = df['up'].astype(int).values

    scaler = StandardScaler()

    X = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0)
    
    best_score = 0.0
    
#    model = RandomForestClassifier(n_jobs=4, random_state=0, n_estimators=20)
#    score = validate_model('Random forest', model, X_train, y_train, X_test, y_test, period, pair, lag)
#    if score > best_score:
#        best_score = score
    
#    model = SGDClassifier(loss="hinge", penalty="l2")
#    score = validate_model('SGD', model, X_train, y_train, X_test, y_test, period, pair, lag)
#    if score > best_score:
#        best_score = score
    
#    model = LogisticRegression()
#    score = validate_model('Logistic regression', model, X_train, y_train, X_test, y_test, period, pair, lag)
#    if score > best_score:
#        best_score = score
    
    model = GradientBoostingClassifier(max_depth=4)
    score = validate_model('Gradient boosting', model, X_train, y_train, X_test, y_test, period, pair, lag)
    if score > best_score:
        best_score = score
    
    model = MLPClassifier()
    score = validate_model('MLP', model, X_train, y_train, X_test, y_test, period, pair, lag)
    if score > best_score:
        best_score = score
    
#    model = DecisionTreeClassifier()
#    score = validate_model('Decision tree', model, X_train, y_train, X_test, y_test, period, pair, lag)
#    if score > best_score:
#        best_score = score

#    model = KNeighborsClassifier()
#    score = validate_model('KNeighbours', model, X_train, y_train, X_test, y_test, period, pair, lag)
#    if score > best_score:
#        best_score = score

    print()
    return best_score


In [52]:
best_score = 0.0

for period in ['5min', '30min', 'day']:
    for lag in range(2, 31):
        for pair in ['BTC_ETC', 'BTC_LTC']:
            score = test_cls(period, pair, lag)
            if score > best_score:
                best_score = score
            print('Best score: {:.3f}'.format(best_score))
            print()

0.570: [period: 5min](lag: 2): Gradient boosting | BTC_ETC
0.560: [period: 5min](lag: 2): MLP | BTC_ETC

Best score: 0.570

0.573: [period: 5min](lag: 2): Gradient boosting | BTC_LTC
0.560: [period: 5min](lag: 2): MLP | BTC_LTC

Best score: 0.573

0.572: [period: 5min](lag: 3): Gradient boosting | BTC_ETC
0.556: [period: 5min](lag: 3): MLP | BTC_ETC

Best score: 0.573

0.579: [period: 5min](lag: 3): Gradient boosting | BTC_LTC
0.564: [period: 5min](lag: 3): MLP | BTC_LTC

Best score: 0.579

0.571: [period: 5min](lag: 4): Gradient boosting | BTC_ETC
0.564: [period: 5min](lag: 4): MLP | BTC_ETC

Best score: 0.579

0.581: [period: 5min](lag: 4): Gradient boosting | BTC_LTC
0.567: [period: 5min](lag: 4): MLP | BTC_LTC

Best score: 0.581

0.571: [period: 5min](lag: 5): Gradient boosting | BTC_ETC
0.558: [period: 5min](lag: 5): MLP | BTC_ETC

Best score: 0.581

0.591: [period: 5min](lag: 5): Gradient boosting | BTC_LTC
0.574: [period: 5min](lag: 5): MLP | BTC_LTC

Best score: 0.591

0.572: [

0.510: [period: 30min](lag: 5): MLP | BTC_LTC

Best score: 0.593

0.534: [period: 30min](lag: 6): Gradient boosting | BTC_ETC
0.524: [period: 30min](lag: 6): MLP | BTC_ETC

Best score: 0.593

0.553: [period: 30min](lag: 6): Gradient boosting | BTC_LTC
0.510: [period: 30min](lag: 6): MLP | BTC_LTC

Best score: 0.593

0.531: [period: 30min](lag: 7): Gradient boosting | BTC_ETC
0.522: [period: 30min](lag: 7): MLP | BTC_ETC

Best score: 0.593

0.565: [period: 30min](lag: 7): Gradient boosting | BTC_LTC
0.510: [period: 30min](lag: 7): MLP | BTC_LTC

Best score: 0.593

0.545: [period: 30min](lag: 8): Gradient boosting | BTC_ETC
0.528: [period: 30min](lag: 8): MLP | BTC_ETC

Best score: 0.593

0.566: [period: 30min](lag: 8): Gradient boosting | BTC_LTC
0.528: [period: 30min](lag: 8): MLP | BTC_LTC

Best score: 0.593

0.528: [period: 30min](lag: 9): Gradient boosting | BTC_ETC
0.523: [period: 30min](lag: 9): MLP | BTC_ETC

Best score: 0.593

0.558: [period: 30min](lag: 9): Gradient boosting | 

0.611: [period: day](lag: 9): MLP | BTC_ETC

Best score: 0.625

0.528: [period: day](lag: 9): Gradient boosting | BTC_LTC
0.500: [period: day](lag: 9): MLP | BTC_LTC

Best score: 0.625

0.563: [period: day](lag: 10): Gradient boosting | BTC_ETC
0.592: [period: day](lag: 10): MLP | BTC_ETC

Best score: 0.625

0.535: [period: day](lag: 10): Gradient boosting | BTC_LTC
0.507: [period: day](lag: 10): MLP | BTC_LTC

Best score: 0.625

0.408: [period: day](lag: 11): Gradient boosting | BTC_ETC
0.535: [period: day](lag: 11): MLP | BTC_ETC

Best score: 0.625

0.577: [period: day](lag: 11): Gradient boosting | BTC_LTC
0.493: [period: day](lag: 11): MLP | BTC_LTC

Best score: 0.625

0.493: [period: day](lag: 12): Gradient boosting | BTC_ETC
0.493: [period: day](lag: 12): MLP | BTC_ETC

Best score: 0.625

0.535: [period: day](lag: 12): Gradient boosting | BTC_LTC
0.620: [period: day](lag: 12): MLP | BTC_LTC

Best score: 0.625

0.535: [period: day](lag: 13): Gradient boosting | BTC_ETC
0.662: [per