In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

In [34]:
def simulate_trading(close_price, predicted_change, trade_threshold):
    money = 100
    coins = 0
    for i in range(len(predicted_change) - 1):
        if predicted_change[i + 1] > trade_threshold:
            coins += money / close_price[i]
            money = 0
        elif predicted_change[i + 1] < trade_threshold:
            money += coins * close_price[i]
            coins = 0
    return money + coins * close_price[-1]

def simulate_holding(test):
    money = 100
    return (money / test[0]) * test[-1]

In [50]:
def read_train_evalute(path='../data/1day.csv', rolling=5, test_size=0.2, trading_threshold=0, plot=False, model=None):
    df = pd.read_csv(path)
    df.columns = ['open_time', 'open_price', 'high_price', 'low_price', 'close_price', 'volume', 'close_time']
    df['open_time'] = pd.to_datetime(df['open_time'] * 1000, unit='ms')
    df['close_time'] = pd.to_datetime(df['close_time'], unit='ms')
    # df = df[df['open_time'] > '2021-01-01']
    close_price_unchanged = df['close_price'].values[:int(test_size * len(df['close_price'].values))]
    df['ma'] = df['close_price'].rolling(rolling).mean()
    df['change'] = df['close_price'].pct_change() * 100
    df['future_change'] = df['change'].shift(-1)

    df = df.dropna()
    features = ['ma', 'change']
    target = 'future_change'
    X = df[features]
    y = df[target]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42, shuffle=False)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    # mse = mean_squared_error(y_test, y_pred)
    # print(f'Mean Squared Error: {mse}')
    if plot:
        # start date
        start_date = df['open_time'].values[-len(y_pred):][0]
        # end date
        end_date = df['open_time'].values[-1]
        print(f'Start Date: {start_date}')
        print(f'End Date: {end_date}')
        plt.figure(figsize=(20, 10))
        plt.plot(y_test.values, label='Actual')
        plt.plot(y_pred, label='Predicted')
        plt.legend()
        plt.show()
    close_price = df['close_price'].values[-len(y_pred):]
    predicted_change = y_pred
    trading_result = simulate_trading(close_price, predicted_change, trading_threshold)
    holding_result = simulate_holding(close_price_unchanged)

    return trading_result, holding_result

In [39]:
model = GradientBoostingRegressor(n_estimators=500, max_depth=10, learning_rate=0.01)
trading, holding = read_train_evalute(path='../data/1week.csv', model=model, rolling=20, trading_threshold=0, plot=False, test_size=0.1)
print(trading)
print(holding)

83.60002028729792
207.06345330767243


In [81]:
def find_best_params(path):
    lrs = [0.1]
    n_estimators = range(140, 240, 20)
    max_depth = range(2, 10, 2)
    rolling = range(5, 40, 2)

    best_score = 0
    best_params = None

    for lr in lrs:
        for n_estimator in n_estimators:
            for depth in max_depth:
                for roll in rolling:
                    model = GradientBoostingRegressor(n_estimators=n_estimator, max_depth=depth, learning_rate=lr)
                    trading, _ = read_train_evalute(path=path, model=model, rolling=roll, trading_threshold=0, plot=False, test_size=0.2)
                    if trading > best_score:
                        best_score = trading
                        best_params = (lr, n_estimator, depth, roll)

    print(best_score, best_params)

179.13965086604225 (0.1, 210, 2, 40)


In [60]:
model = GradientBoostingRegressor(n_estimators=210, max_depth=2, learning_rate=0.1)
trading, holding = read_train_evalute(path='../data/1week.csv', model=model, rolling=40, trading_threshold=0, plot=False, test_size=0.2)
print('trading: ', trading)
print('holding: ', holding)

trading:  179.13965086604225
holding:  66.91178582713354


In [84]:
best_paths = [
    '../data/1hour.csv',
    '../data/2hour.csv',
    '../data/4hour.csv',
    '../data/1day.csv', #63.22024016325607 (0.1, 200, 2, 5)
    '../data/1week.csv', #118.96780905018527 (0.1, 160, 8, 27)
    '../data/1month.csv', #158.05054942837288 (0.1, 200, 2, 23)
]

best_score = 0
best_path = None

for path in best_paths:
    find_best_params(path)

KeyboardInterrupt: 