In [1]:
# train.py

import os
import torch
import numpy as np
import warnings
import pickle
warnings.filterwarnings(action='ignore')

from data_loader import load_data_1m
from feature_calculations_2 import (
    resample_data, calculate_MA_data, calculate_ema_bollinger_bands, calculate_rsi,
    calculate_macd, calculate_stochastic_oscillator, calculate_adx, calculate_atr,
    calculate_obv, calculate_williams_r, base_feature_fn, cyclic_encode_fn, calculate_support_resistance_numba
)
from strategies import BB_fitness_fn, BB_MACD_fitness_fn, simple_fitness_fn
from dataset import make_dataset, replace_nan_with_zero
from train_functions_bi_cul import inference, fitness_fn, generation_valid, generation_test

from Prescriptor import Prescriptor, CryptoModelTCN
from Evolution.crossover import UniformCrossover, WeightedSumCrossover, DifferentialEvolutionOperator, CenDE_DOBLOperator, SkipCrossover
from Evolution.mutation import MultiplyNormalMutation, MultiplyUniformMutation, AddNormalMutation, AddUniformMutation, ChainMutation, FlipSignMutation
from Evolution.mutation import RandomValueMutation
from Evolution.selection import RouletteSelection, TournamentSelection, ParetoLexsortSelection
from Evolution import Evolution

In [2]:
# Load Data
data_1m = load_data_1m('/root/daily/bit/data/1min_ethusdt.pkl')

# For 1M Data
# 다양한 window size 설정 (필요에 따라 추가/수정 가능)
ma_windows       = [20, 60, 180, 240]         # 이동평균 (MA)의 window size
bb_windows       = [20, 60, 240]               # Bollinger Bands의 window size
macd_params      = [(60, 600, 240), (30, 300, 120)]  # (short, long, signal) 설정 예시
rsi_windows      = [14, 60]                    # RSI window size (기본: 14)
stoch_params     = [(20, 60), (240, 60)]         # (stochastic window, period)
adx_windows      = [14, 60, 240]               # ADX window size
atr_windows      = [14, 60, 240]               # ATR window size
williams_windows = [14, 60, 240]               # Williams %R window size
sr_windows       = [20, 60, 240]               # Support/Resistance 탐색 window size

# MA: 'EMA' 대신 'MA' 사용
for window in ma_windows:
    data_1m, ma_cols, ma_cols_rel = calculate_MA_data(data_1m, window, 'MA')

# Bollinger Bands (여기서는 기존 함수 사용, window에 따라 값이 달라짐)
for window in bb_windows:
    data_1m, bb_cols, bb_cols_rel = calculate_ema_bollinger_bands(data_1m, window)

# MACD: 여러 파라미터 조합 적용
for short_win, long_win, signal_win in macd_params:
    data_1m, macd_cols = calculate_macd(data_1m, short_win, long_win, signal_win)

# RSI
for window in rsi_windows:
    data_1m, rsi_cols = calculate_rsi(data_1m, window=window)

# Stochastic Oscillator
for stoch_win, period in stoch_params:
    data_1m, stoch_cols = calculate_stochastic_oscillator(data_1m, stoch_win, period)

# ADX
for window in adx_windows:
    data_1m, adx_cols = calculate_adx(data_1m, window)

# ATR
for window in atr_windows:
    data_1m, atr_cols = calculate_atr(data_1m, window)

# OBV: window이 필요없는 경우 한번만 계산
data_1m, obv_cols = calculate_obv(data_1m)

# Williams %R
for window in williams_windows:
    data_1m, will_cols = calculate_williams_r(data_1m, window)

# Support/Resistance (Numba 이용)
for window in sr_windows:
    data_1m, sr_col = calculate_support_resistance_numba(data_1m, window=window)

# 기본 feature 및 cyclic encoding (필요에 따라 한 번만 계산)
data_1m, base_feature    = base_feature_fn(data_1m, alpha=100)
data_1m, cyclic_encoding = cyclic_encode_fn(data_1m, 'Open time')

# Prepare Feature Columns
drop_column = [
    'Open time', 'Close time', 'Quote asset volume', 'Ignore',
    'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume'
]

test_column = ['Quote asset volume', 'Number of trades', 'Taker buy base asset volume',
               'Taker buy quote asset volume']
feature_column = (
    test_column + cyclic_encoding + ma_cols_rel + bb_cols_rel + rsi_cols + macd_cols + stoch_cols +
    adx_cols + will_cols + sr_col + base_feature  # Excluding obv and atr
)
# feature_column_1d = (
#     test_column + cyclice_encoding_1d + ma_cols_rel_1d + bb_cols_rel_1d + rsi_cols_1d + macd_cols_1d + stoch_cols_1d +
#     adx_cols_1d + will_cols_1d + sr_col_1d + base_feature_1d
# )


# bb_entry_pos_list, patience_list, bb_entry_index_list = BB_fitness_fn(data_1m)
bb_macd_entry_pos_list, patience_list, bb_macd_entry_index_list = BB_MACD_fitness_fn(data_1m, 240, 60, 180)
# bb_macd_entry_pos_list, patience_list, bb_macd_entry_index_list = simple_fitness_fn(data_1m, 240, 60, 180)
# bb_macd_entry_pos_list, patience_list, bb_macd_entry_index_list = BB_MACD_fitness_fn(data_1m, 60, 20, 60)

# Prepare Dataset
data_tensor = make_dataset(
    data_1m, data_1d,
    using_column=feature_column, using_column_1d=feature_column_1d,
    window_size=240, window_size_1d=60,
    entry_pos_list=bb_macd_entry_pos_list, patience_list=patience_list,
    use_1d_data=True
)
entry_pos_list = np.array(bb_macd_entry_pos_list)[np.array(bb_macd_entry_pos_list) != 'hold']

dataset_1m = []
dataset_1d = []
skip_data_cnt = 0
for data in data_tensor:
    if len(data[0]) == 240 and len(data[1]) == 60:
        dataset_1m.append(torch.from_numpy(data[0]).unsqueeze(dim=0))
        dataset_1d.append(torch.from_numpy(data[1]).unsqueeze(dim=0))
    else:
        skip_data_cnt += 1
dataset_1m = torch.cat(dataset_1m, dim=0)
dataset_1d = torch.cat(dataset_1d, dim=0)

# Avoid division by zero by replacing zero denominators with a small epsilon value
epsilon = 1e-10
dataset_1m[:, :, :4] = dataset_1m[:, :, :4] / (torch.mean(dataset_1m[:, :, :4], dim=1).unsqueeze(dim=1) + epsilon)
dataset_1d[:, :, :4] = dataset_1d[:, :, :4] / (torch.mean(dataset_1d[:, :, :4], dim=1).unsqueeze(dim=1) + epsilon)

dataset_1m = replace_nan_with_zero(dataset_1m)
dataset_1d = replace_nan_with_zero(dataset_1d)

import pickle

data_to_save = {
    'dataset_1m': dataset_1m,
    'dataset_1d': dataset_1d,
    'skip_data_cnt': skip_data_cnt,
    'entry_pos_list': entry_pos_list,
    'bb_macd_entry_pos_list': bb_macd_entry_pos_list,
    'bb_macd_entry_index_list': bb_macd_entry_index_list
}

with open('/root/daily/bit_4/backup_feature_data/data.pkl', 'wb') as f:
    pickle.dump(data_to_save, f)

In [3]:
# Load data from the file into separate variables
with open('/root/daily/bit_4/backup_feature_data/data.pkl', 'rb') as f:
    loaded_data = pickle.load(f)

dataset_1m = loaded_data['dataset_1m']
dataset_1d = loaded_data['dataset_1d']
skip_data_cnt = loaded_data['skip_data_cnt']
entry_pos_list = loaded_data['entry_pos_list']
bb_macd_entry_pos_list = loaded_data['bb_macd_entry_pos_list']
bb_macd_entry_index_list = loaded_data['bb_macd_entry_index_list']

In [4]:
valid_skip_data_cnt = int(len(dataset_1m)*0.6) + skip_data_cnt
test_skip_data_cnt = int(len(dataset_1m)*0.8) + skip_data_cnt


In [None]:
# Evolution Setup
# 전역적으로 기울기 계산 비활성화
torch.set_grad_enabled(False)
torch.backends.cudnn.benchmark = True

chromosomes_size=30000
window_size=240
EPOCH = 5
gen_loop=50
best_size=30000
elite_size=6000
profit_init=1
device = 'cuda:1'
group = 30000
start_gen = 0
best_profit = None
best_chromosomes = None

# state_dict_path = '/root/daily/bit_3/generation/generation_36.pt'
# if os.path.exists(state_dict_path):
#     state_dict = torch.load(state_dict_path)
#     start_gen = state_dict['generation'] + 1
#     best_profit = state_dict['best_profit']
#     best_chromosomes = state_dict['best_chromosomes']
    # prescriptor.load_state_dict(state_dict['prescriptor_state_dict'],strict=True)

for i in range(EPOCH):
    prescriptor = Prescriptor(
        basic_block=CryptoModelTCN, 
        small_input_dim=25, 
        large_input_dim=25,
        fc_hidden_size=16,
        small_lstm_hidden_dim=16,
        large_lstm_hidden_dim=16,
        output_dim=8, 
        after_input_dim=11, 
        after_hidden_dim=16, 
        after_output_dim=5, 
        num_blocks=group,
    ).to(device).eval()

    # if i == 1:
    #     start_gen=0

    # if i == 0:
    #     prescriptor.load_state_dict(state_dict['prescriptor_state_dict'],strict=True)

    total_param = sum(p.numel() for p in prescriptor.parameters())
    print(f"Total parameters: {total_param}")

    selection = RouletteSelection(elite_num=6000, parents_num=6000, minimize=False)
    # selection = ParetoLexsortSelection(elite_num=2000, parents_num=4000,
    #                                     priority=[], prior_ratio= [],
    #                                     prob_method= 'softmax',minimize=False)
    # crossover = DifferentialEvolutionOperator()
    # crossover = UniformCrossover(num_parents=4)
    # crossover = CenDE_DOBLOperator()
    mutation = ChainMutation([RandomValueMutation(mut_prob=0.05), AddUniformMutation(mut_prob=0.1)])
    crossover = UniformCrossover(num_parents=1)
    # mutation = AddNormalMutation(mut_prob=0.1)
    evolution = Evolution(
        prescriptor=prescriptor,
        selection=selection,
        crossover=crossover,
        mutation=mutation
    )

    best_chromosomes, best_profit = generation_valid(
        data_1m=data_1m,
        dataset_1m=dataset_1m,
        dataset_1d=dataset_1d,
        prescriptor=prescriptor,
        evolution=evolution,
        skip_data_cnt=skip_data_cnt,
        valid_skip_data_cnt=valid_skip_data_cnt,
        test_skip_data_cnt=test_skip_data_cnt,
        chromosomes_size=chromosomes_size,
        window_size=window_size,
        gen_loop=gen_loop,
        best_size=best_size,
        elite_size=elite_size,
        profit_init=profit_init,
        entry_index_list=bb_macd_entry_index_list,
        entry_pos_list=entry_pos_list,
        best_profit=best_profit,
        best_chromosomes=best_chromosomes,
        start_gen=start_gen,
        device=device
    )

Total parameters: 521850000
generation  0: 


Inference Progress: 100%|██████████| 20/20 [14:36<00:00, 43.85s/it]
 61%|██████    | 25154/41319 [03:51<02:28, 108.50it/s]


generation  1: 


Inference Progress: 100%|██████████| 20/20 [15:09<00:00, 45.50s/it]
 61%|██████    | 25154/41319 [05:47<03:43, 72.29it/s]


generation  2: 


Inference Progress: 100%|██████████| 20/20 [15:13<00:00, 45.67s/it]
 61%|██████    | 25154/41319 [05:45<03:41, 72.82it/s]


generation  3: 


Inference Progress: 100%|██████████| 20/20 [15:16<00:00, 45.80s/it]
 61%|██████    | 25154/41319 [05:49<03:44, 71.93it/s]


generation  4: 


Inference Progress: 100%|██████████| 20/20 [15:14<00:00, 45.70s/it]
 61%|██████    | 25154/41319 [05:38<03:37, 74.40it/s]


generation  5: 


Inference Progress: 100%|██████████| 20/20 [15:11<00:00, 45.58s/it]
 61%|██████    | 25154/41319 [05:48<03:44, 72.12it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 389.69it/s]   


generation  6: 


Inference Progress: 100%|██████████| 20/20 [15:17<00:00, 45.90s/it]
 61%|██████    | 25154/41319 [05:34<03:35, 75.15it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 389.32it/s]   


generation  7: 


Inference Progress: 100%|██████████| 20/20 [15:15<00:00, 45.78s/it]
 61%|██████    | 25154/41319 [05:48<03:43, 72.19it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 388.41it/s]   


generation  8: 


Inference Progress: 100%|██████████| 20/20 [15:25<00:00, 46.29s/it]
 61%|██████    | 25154/41319 [05:47<03:43, 72.28it/s]
 80%|████████  | 33236/41319 [01:26<00:21, 384.65it/s]   


generation  9: 


Inference Progress: 100%|██████████| 20/20 [15:14<00:00, 45.71s/it]
 61%|██████    | 25154/41319 [06:00<03:51, 69.69it/s] 
 80%|████████  | 33236/41319 [01:25<00:20, 387.33it/s]   


generation  10: 


Inference Progress: 100%|██████████| 20/20 [15:19<00:00, 45.97s/it]
 61%|██████    | 25154/41319 [05:49<03:44, 72.01it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 387.11it/s]   


generation  11: 


Inference Progress: 100%|██████████| 20/20 [15:13<00:00, 45.67s/it]
 61%|██████    | 25154/41319 [05:46<03:42, 72.49it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 388.28it/s]   


generation  12: 


Inference Progress: 100%|██████████| 20/20 [15:21<00:00, 46.07s/it]
 61%|██████    | 25154/41319 [05:49<03:44, 72.01it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 386.10it/s]   


generation  13: 


Inference Progress: 100%|██████████| 20/20 [15:13<00:00, 45.68s/it]
 61%|██████    | 25154/41319 [05:50<03:45, 71.79it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 385.01it/s]   


generation  14: 


Inference Progress: 100%|██████████| 20/20 [15:17<00:00, 45.90s/it]
 61%|██████    | 25154/41319 [05:44<03:41, 73.00it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 386.64it/s]   


generation  15: 


Inference Progress: 100%|██████████| 20/20 [15:11<00:00, 45.59s/it]
 61%|██████    | 25154/41319 [05:49<03:44, 72.00it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 385.11it/s]   


generation  16: 


Inference Progress: 100%|██████████| 20/20 [15:17<00:00, 45.86s/it]
 61%|██████    | 25154/41319 [05:42<03:39, 73.54it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 386.06it/s]   


generation  17: 


Inference Progress: 100%|██████████| 20/20 [15:14<00:00, 45.74s/it]
 61%|██████    | 25154/41319 [05:49<03:44, 72.04it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 386.43it/s]   


generation  18: 


Inference Progress: 100%|██████████| 20/20 [15:26<00:00, 46.32s/it]
 61%|██████    | 25154/41319 [05:48<03:43, 72.22it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 386.32it/s]   


generation  19: 


Inference Progress: 100%|██████████| 20/20 [15:17<00:00, 45.89s/it]
 61%|██████    | 25154/41319 [05:49<03:44, 71.97it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 385.96it/s]   


generation  20: 


Inference Progress: 100%|██████████| 20/20 [15:19<00:00, 45.98s/it]
 61%|██████    | 25154/41319 [05:59<03:51, 69.94it/s]  
 80%|████████  | 33236/41319 [01:25<00:20, 386.72it/s]   


generation  21: 


Inference Progress: 100%|██████████| 20/20 [15:18<00:00, 45.91s/it]
 61%|██████    | 25154/41319 [05:42<03:40, 73.47it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 386.00it/s]   


generation  22: 


Inference Progress: 100%|██████████| 20/20 [15:20<00:00, 46.00s/it]
 61%|██████    | 25154/41319 [05:50<03:45, 71.81it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 385.25it/s]   


generation  23: 


Inference Progress: 100%|██████████| 20/20 [15:12<00:00, 45.62s/it]
 61%|██████    | 25154/41319 [05:26<03:29, 77.13it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 385.30it/s]   


generation  24: 


Inference Progress: 100%|██████████| 20/20 [15:21<00:00, 46.06s/it]
 61%|██████    | 25154/41319 [05:42<03:40, 73.48it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 386.51it/s]   


generation  25: 


Inference Progress: 100%|██████████| 20/20 [15:12<00:00, 45.64s/it]
 61%|██████    | 25154/41319 [05:40<03:38, 73.97it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 386.41it/s]   


generation  26: 


Inference Progress: 100%|██████████| 20/20 [15:23<00:00, 46.18s/it]
 61%|██████    | 25154/41319 [05:42<03:40, 73.36it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 387.43it/s]   


generation  27: 


Inference Progress: 100%|██████████| 20/20 [15:17<00:00, 45.85s/it]
 61%|██████    | 25154/41319 [05:46<03:42, 72.59it/s]
 80%|████████  | 33236/41319 [01:26<00:21, 382.56it/s]   


generation  28: 


Inference Progress: 100%|██████████| 20/20 [15:21<00:00, 46.06s/it]
 61%|██████    | 25154/41319 [05:37<03:37, 74.44it/s]
 80%|████████  | 33236/41319 [01:26<00:20, 385.69it/s]   


generation  29: 


Inference Progress: 100%|██████████| 20/20 [15:16<00:00, 45.84s/it]
 61%|██████    | 25154/41319 [06:02<03:52, 69.48it/s]  
 80%|████████  | 33236/41319 [01:26<00:21, 384.27it/s]   


generation  30: 


Inference Progress: 100%|██████████| 20/20 [15:19<00:00, 45.98s/it]
 61%|██████    | 25154/41319 [05:42<03:40, 73.48it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 386.69it/s]   


generation  31: 


Inference Progress: 100%|██████████| 20/20 [15:19<00:00, 45.97s/it]
 61%|██████    | 25154/41319 [05:49<03:44, 71.97it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 387.00it/s]   


generation  32: 


Inference Progress: 100%|██████████| 20/20 [16:33<00:00, 49.68s/it]
 61%|██████    | 25154/41319 [06:03<03:53, 69.24it/s]
 80%|████████  | 33236/41319 [01:28<00:21, 374.84it/s]   


generation  33: 


Inference Progress: 100%|██████████| 20/20 [15:12<00:00, 45.63s/it]
 61%|██████    | 25154/41319 [05:43<03:41, 73.13it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 387.98it/s]   


generation  34: 


Inference Progress: 100%|██████████| 20/20 [15:21<00:00, 46.06s/it]
 61%|██████    | 25154/41319 [05:27<03:30, 76.90it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 387.22it/s]   


generation  35: 


Inference Progress: 100%|██████████| 20/20 [15:16<00:00, 45.81s/it]
 61%|██████    | 25154/41319 [05:56<03:49, 70.53it/s]  
 80%|████████  | 33236/41319 [01:27<00:21, 381.98it/s]   


generation  36: 


Inference Progress: 100%|██████████| 20/20 [15:21<00:00, 46.10s/it]
 61%|██████    | 25154/41319 [05:50<03:45, 71.80it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 389.96it/s]   


generation  37: 


Inference Progress: 100%|██████████| 20/20 [15:12<00:00, 45.65s/it]
 61%|██████    | 25154/41319 [05:50<03:45, 71.76it/s]
 80%|████████  | 33236/41319 [01:25<00:20, 389.14it/s]   


generation  38: 


Inference Progress:  90%|█████████ | 18/20 [13:47<01:31, 45.81s/it]