In [1]:
# train.py

import os
import torch
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings(action='ignore')

from data_loader import load_data_1m
from feature_calculations import (
    resample_data, calculate_MA_data, calculate_ema_bollinger_bands, calculate_rsi,
    calculate_macd, calculate_stochastic_oscillator, calculate_adx, calculate_atr,
    calculate_obv, calculate_williams_r, base_feature_fn, cyclic_encode_fn, log_transform
)
from strategies import BB_fitness_fn, BB_MACD_fitness_fn
from dataset import make_dataset, replace_nan_with_zero
from train_functions_inference import inference, fitness_fn, generation_valid, generation_test

from Prescriptor import Prescriptor
from Evolution.crossover import UniformCrossover, WeightedSumCrossover, DifferentialEvolutionOperator
from Evolution.mutation import MultiplyNormalMutation, MultiplyUniformMutation, AddNormalMutation, AddUniformMutation, ChainMutation, FlipSignMutation
from Evolution.mutation import RandomValueMutation
from Evolution.selection import RouletteSelection, TournamentSelection, ParetoLexsortSelection
from Evolution import Evolution



from binance.client import Client
import time
from datetime import datetime, timedelta

In [2]:
WINDOW_SIZE_M = 240
WINDOW_SIZE_D = 60

In [3]:
# Initialize the Binance client
API_KEY = '5fdHtvB3ecaRqEojTnaaA697Owg1AemEYvw9aQF90mZ9Hwx3I4YChQS9sOECN0lP'
API_SECRET = 'AHYpkH1c2u1crNI8xDjO8VjLn4QBuidcsiRsv20E38Qorzo1J3Fsg6rUocYyifMp'
client = Client(API_KEY, API_SECRET)

# Define the trading pair and time frame
symbol = 'BTCUSDT'
interval = Client.KLINE_INTERVAL_1MINUTE

# Calculate the start date (60 days ago)
start_date = (datetime.utcnow() - timedelta(days=70)).strftime('%d %b, %Y %H:%M:%S')

# Fetch historical data
klines = client.get_historical_klines(symbol, interval, start_date)


# 데이터프레임으로 변환
cols = ['Open time', 'Open', 'High', 'Low', 'Close', 'Volume', 
        'Close time', 'Quote asset volume', 'Number of trades', 
        'Taker buy base asset volume', 'Taker buy quote asset volume', 'Ignore']
df = pd.DataFrame(klines, columns=cols)

# 시간 형식 변환
df['Open time'] = pd.to_datetime(df['Open time'], unit='ms')
df['Close time'] = pd.to_datetime(df['Close time'], unit='ms')

# df['Open'] = df['Open'].astype(float)
# df['High'] = df['High'].astype(float)
# df['Low'] = df['Low'].astype(float)
# df['Close'] = df['Close'].astype(float)

# Function to fetch the latest kline and update the DataFrame
def update_data_frame(df, symbol, interval):
    # Fetch the latest kline
    kline = client.get_klines(symbol=symbol, interval=interval, limit=1)
    # 데이터프레임으로 변환
    cols = ['Open time', 'Open', 'High', 'Low', 'Close', 'Volume', 
            'Close time', 'Quote asset volume', 'Number of trades', 
            'Taker buy base asset volume', 'Taker buy quote asset volume', 'Ignore']
    new_df = pd.DataFrame(klines, columns=cols)

    # 시간 형식 변환
    new_df['Open time'] = pd.to_datetime(new_df['Open time'], unit='ms')
    new_df['Close time'] = pd.to_datetime(new_df['Close time'], unit='ms')
    
    # Concatenate the new data with the existing DataFrame
    df = pd.concat([df, new_df], ignore_index=True)
    # Drop the oldest data to maintain the DataFrame size
    if len(df) > 60 * 24 * 70:
        df = df.iloc[1:].reset_index(drop=True)
    return df


In [4]:
def preprocessing_data(data_1m, data_1d, window_size_m, window_size_d):
    data_1d, ma_cols_1d, ma_cols_rel_1d = calculate_MA_data(data_1d, window_size_d, 'EMA', '_1d')
    data_1d, bb_cols_1d, bb_cols_rel_1d = calculate_ema_bollinger_bands(data_1d, window_size_d, extra_str='_1d')
    data_1d, rsi_cols_1d = calculate_rsi(data_1d, window=20, extra_str='_1d')
    data_1d, macd_cols_1d = calculate_macd(data_1d, 20, 120, 60, extra_str='_1d')
    data_1d, stoch_cols_1d = calculate_stochastic_oscillator(data_1d, window_size_d, 20, extra_str='_1d')
    data_1d, adx_cols_1d = calculate_adx(data_1d, window_size_d, extra_str='_1d')
    data_1d, atr_cols_1d = calculate_atr(data_1d, window_size_d, extra_str='_1d')
    data_1d, obv_cols_1d = calculate_obv(data_1d, extra_str='_1d')
    data_1d, will_cols_1d = calculate_williams_r(data_1d, window_size_d, extra_str='_1d')
    data_1d, base_feature_1d = base_feature_fn(data_1d, extra_str='_1d')
    data_1d, cyclice_encoding_1d = cyclic_encode_fn(data_1d, 'Close time', 'day_of_year')

    # For 1M Data
    data_1m, ma_cols, ma_cols_rel = calculate_MA_data(data_1m, window_size_m, 'EMA')
    data_1m, bb_cols, bb_cols_rel = calculate_ema_bollinger_bands(data_1m, window_size_m)
    data_1m, rsi_cols = calculate_rsi(data_1m, window=60)
    data_1m, macd_cols = calculate_macd(data_1m, 60, 600, window_size_m)
    data_1m, stoch_cols = calculate_stochastic_oscillator(data_1m, window_size_m, 60)
    data_1m, adx_cols = calculate_adx(data_1m, window_size_m)
    data_1m, atr_cols = calculate_atr(data_1m, window_size_m)
    data_1m, obv_cols = calculate_obv(data_1m)
    data_1m, will_cols = calculate_williams_r(data_1m, window_size_m)
    data_1m, base_feature = base_feature_fn(data_1m)
    data_1m, cyclice_encoding = cyclic_encode_fn(data_1m, 'Open time')

    data_1m, short_ma_cols, short_ma_cols_rel = calculate_MA_data(data_1m, 60, 'EMA')
    data_1m, long_ma_cols, long_ma_cols_rel = calculate_MA_data(data_1m, 180, 'EMA')

    # Prepare Feature Columns
    drop_column = [
        'Open time', 'Close time', 'Quote asset volume', 'Ignore',
        'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume'
    ]
    feature_column = (
        ma_cols_rel + bb_cols_rel + rsi_cols + macd_cols + stoch_cols +
        adx_cols + will_cols + base_feature + cyclice_encoding  # Excluding obv and atr
    )
    feature_column_1d = (
        ma_cols_rel_1d + bb_cols_rel_1d + rsi_cols_1d + macd_cols_1d + stoch_cols_1d +
        adx_cols_1d + will_cols_1d + base_feature_1d + cyclice_encoding_1d
    )


    # Apply Log Transform
    for feature in feature_column:
        data_1m[feature] = log_transform(data_1m[feature])

    for feature in feature_column_1d:
        data_1d[feature] = log_transform(data_1d[feature])
        
        
    return data_1m, data_1d, feature_column, feature_column_1d

In [5]:
# Mock investment parameters
initial_investment_krw = 5000000
try:
    while True:
        start_time = time.time()
        df = update_data_frame(df, symbol, interval)
        df_1d = resample_data(df, '1D')
        df_1d['Close time'] = df_1d.index
        df_1d = df_1d.reset_index(drop=True)
        df_1m = df.iloc[-WINDOW_SIZE_M*2:].reset_index(drop=True)

        df_1d['Open'] = df_1d['Open'].astype(float)
        df_1d['High'] = df_1m['High'].astype(float)
        df_1d['Low'] = df_1d['Low'].astype(float)
        df_1d['Close'] = df_1d['Close'].astype(float)
        
        df_1m['Open'] = df_1m['Open'].astype(float)
        df_1m['High'] = df_1m['High'].astype(float)
        df_1m['Low'] = df_1m['Low'].astype(float)
        df_1m['Close'] = df_1m['Close'].astype(float)
        
        df_1m, df_1d, feature_column, feature_column_1d = preprocessing_data(df_1m, df_1d, WINDOW_SIZE_M, WINDOW_SIZE_D)
        
        end_time = time.time()
        execution_time = end_time - start_time
        current_time = datetime.utcnow()
        seconds_to_next_minute = 60 - current_time.second - current_time.microsecond / 1_000_000
        sleep_time = max(0, seconds_to_next_minute - execution_time)
        print(f"Execution time: {execution_time:.2f} seconds, sleeping for {sleep_time:.2f} seconds.")
        time.sleep(sleep_time)
        break
except KeyboardInterrupt:
    print("Stopped by user")

Execution time: 1.04 seconds, sleeping for 40.40 seconds.


In [8]:
df_1m[feature_column].iloc[-1]

EMA_240_rel          0.133756
Upper_BB_240_rel     0.275637
Lower_BB_240_rel     0.472250
RSI_60               1.452559
MACD_60_600          0.341151
Signal_Line_240      0.351986
%K_240               3.196181
%D_60                3.414258
ADX_240              1.397711
Williams_%R_240     -3.196181
open_close_diff     -0.014123
open_high_diff      -0.019051
open_low_diff        0.000000
close_high_diff     -0.004998
close_low_diff       0.014121
high_low_diff        0.019048
close_diff           0.014111
minute_of_day_sin    0.689142
minute_of_day_cos    0.118848
Name: 479, dtype: float64

In [9]:
df_1d[feature_column_1d].iloc[-1]

EMA_60_rel__1d         2.144596
Upper_BB_60_rel__1d    3.100811
Lower_BB_60_rel__1d    3.618809
RSI_20__1d             0.462891
MACD_20_120__1d        2.950426
Signal_Line_60__1d     2.681675
%K_60__1d              3.952541
%D_20__1d                   NaN
ADX_60__1d                  NaN
Williams_%R_60__1d    -3.952541
open_close_diff__1d    0.188784
open_high_diff__1d     0.498750
open_low_diff__1d      0.489317
close_high_diff__1d    0.364501
close_low_diff__1d     0.353683
high_low_diff__1d     -0.015440
close_diff__1d        -0.189142
day_of_year_sin       -0.143502
day_of_year_cos        0.687141
Name: 70, dtype: float64