In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier

import matplotlib.pyplot as plt

import numpy as np

from sklearn.preprocessing import StandardScaler

np.random.seed(7)

In [2]:
def validate_model(cls_name, model, X_train, y_train, X_test, y_test, period, pair, lag):
    info_format = '{3:.3f}: [period: {0}](lag: {1}): {2} | {4}'
    
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    print(info_format.format(period, lag, cls_name, score, pair))
    return score

def test_cls(period, pair, lag):
    df = pd.read_csv('../datasets/{}/{}.csv'.format(period, pair))
    close_lag_fields = ['close-{}'.format(n+1) for n in range(lag)]
    volume_lag_fields = ['volume-{}'.format(n+1) for n in range(lag)]
    df['datetime'] = pd.to_datetime(df['datetime'], unit='s')
    df['hour'] = df['datetime'].dt.hour
    df['dow'] = df['datetime'].dt.weekday
    df = pd.get_dummies(df, columns=['dow', 'hour'])
    dow_fields = [col for col in df if col.startswith('dow')]
    hour_fields = [col for col in df if col.startswith('hour')]
    for n in range(1, lag+1):
        df['close-' + str(n)] = df['close'] - df.shift(n)['close']
        df['volume-' + str(n)] = df['volume'] - df.shift(n)['volume']
    df['up'] = df['close'] < df.shift(-1)['close']
    df.dropna(inplace=True)
    X = df[close_lag_fields + volume_lag_fields + dow_fields + hour_fields].values
    y = df['up'].astype(int).values

    scaler = StandardScaler()

    X = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0)
    
    best_score = 0.0

    model = CatBoostClassifier()
    score = validate_model('Catboost', model, X_train, y_train, X_test, y_test, period, pair, lag)
    if score > best_score:
        best_score = score
    
    print()
    return best_score


In [3]:
best_score = 0.0

for period in ['5min', '30min', 'day']:
    for lag in range(2, 31):
        for pair in ['BTC_ETC', 'BTC_LTC']:
            score = test_cls(period, pair, lag)
            if score > best_score:
                best_score = score
            print('Best score: {:.3f}'.format(best_score))
            print()

0.573: [period: 5min](lag: 2): Catboost | BTC_ETC

Best score: 0.573

0.578: [period: 5min](lag: 2): Catboost | BTC_LTC

Best score: 0.578

0.571: [period: 5min](lag: 3): Catboost | BTC_ETC

Best score: 0.578

0.576: [period: 5min](lag: 3): Catboost | BTC_LTC

Best score: 0.578

0.573: [period: 5min](lag: 4): Catboost | BTC_ETC

Best score: 0.578

0.579: [period: 5min](lag: 4): Catboost | BTC_LTC

Best score: 0.579

0.576: [period: 5min](lag: 5): Catboost | BTC_ETC

Best score: 0.579

0.591: [period: 5min](lag: 5): Catboost | BTC_LTC

Best score: 0.591

0.574: [period: 5min](lag: 6): Catboost | BTC_ETC

Best score: 0.591

0.589: [period: 5min](lag: 6): Catboost | BTC_LTC

Best score: 0.591

0.574: [period: 5min](lag: 7): Catboost | BTC_ETC

Best score: 0.591

0.589: [period: 5min](lag: 7): Catboost | BTC_LTC

Best score: 0.591

0.576: [period: 5min](lag: 8): Catboost | BTC_ETC

Best score: 0.591

0.586: [period: 5min](lag: 8): Catboost | BTC_LTC

Best score: 0.591

0.575: [period: 5min

0.548: [period: day](lag: 2): Catboost | BTC_ETC

Best score: 0.598

0.589: [period: day](lag: 2): Catboost | BTC_LTC

Best score: 0.598

0.562: [period: day](lag: 3): Catboost | BTC_ETC

Best score: 0.598

0.534: [period: day](lag: 3): Catboost | BTC_LTC

Best score: 0.598

0.575: [period: day](lag: 4): Catboost | BTC_ETC

Best score: 0.598

0.616: [period: day](lag: 4): Catboost | BTC_LTC

Best score: 0.616

0.597: [period: day](lag: 5): Catboost | BTC_ETC

Best score: 0.616

0.639: [period: day](lag: 5): Catboost | BTC_LTC

Best score: 0.639

0.486: [period: day](lag: 6): Catboost | BTC_ETC

Best score: 0.639

0.514: [period: day](lag: 6): Catboost | BTC_LTC

Best score: 0.639

0.542: [period: day](lag: 7): Catboost | BTC_ETC

Best score: 0.639

0.542: [period: day](lag: 7): Catboost | BTC_LTC

Best score: 0.639

0.472: [period: day](lag: 8): Catboost | BTC_ETC

Best score: 0.639

0.569: [period: day](lag: 8): Catboost | BTC_LTC

Best score: 0.639

0.528: [period: day](lag: 9): Catb