In [1]:
# train.py

import os
import torch
import numpy as np
import warnings
import pickle
warnings.filterwarnings(action='ignore')

from data_loader import load_data_1m
from feature_calculations_2 import (
    resample_data, calculate_MA_data, calculate_ema_bollinger_bands, calculate_rsi,
    calculate_macd, calculate_stochastic_oscillator, calculate_adx, calculate_atr, calculate_volume,
    calculate_obv, calculate_williams_r, base_feature_fn, calculate_volatility_features, cyclic_encode_fn, calculate_support_resistance_numba, log_transform
)
from strategies import BB_fitness_fn, BB_MACD_fitness_fn, simple_fitness_fn, BB_MACD_EMA_RSI_fitness_fn
from dataset import make_dataset, replace_nan_with_zero
from train_functions_bi_cul import inference, fitness_fn, generation_valid, generation_test

from Prescriptor import Prescriptor, CryptoModelTCN
from Evolution.crossover import UniformCrossover, WeightedSumCrossover, DifferentialEvolutionOperator, CenDE_DOBLOperator, SkipCrossover
from Evolution.mutation import MultiplyNormalMutation, MultiplyUniformMutation, AddNormalMutation, AddUniformMutation, ChainMutation, FlipSignMutation
from Evolution.mutation import RandomValueMutation
from Evolution.selection import RouletteSelection, TournamentSelection, ParetoLexsortSelection
from Evolution import Evolution

In [2]:
# Load Data
data_1m = load_data_1m('/root/daily/bit/data/1min_bitusdt.pkl')
# # data_1m = data_1m.iloc[:200000]

# # 각 지표별 window 설정 및 계산
# ma_windows = [5, 20, 60]
# bb_windows = [5, 20, 60]
# macd_windows = [(60, 600, 240), (30, 300, 120), (6, 13, 4)]
# rsi_windows = [7, 20, 60]
# stoch_windows = [(240, 60), (120, 30), (9, 3)]
# adx_windows = [60, 20, 7]
# atr_windows = [60, 20, 7]
# williams_windows = [60, 20, 7]
# sr_windows =  [120, 60, 20]

# all_ma_cols, all_ma_cols_rel = [], []
# all_bb_cols, all_bb_cols_rel = [], []
# all_macd_cols = []
# all_rsi_cols = []
# all_stoch_cols = []
# all_adx_cols = []
# all_atr_cols = []
# all_will_cols = []
# all_sr_cols = []
 
# for ws in ma_windows:
#     data_1m, ma_cols, ma_cols_rel = calculate_MA_data(data_1m, ws, 'MA')
#     all_ma_cols.extend(ma_cols)
#     all_ma_cols_rel.extend(ma_cols_rel)
 
# data_1m, _, __ = calculate_MA_data(data_1m, 180, 'MA')
# for ws in bb_windows:
#     data_1m, bb_cols, bb_cols_rel = calculate_ema_bollinger_bands(data_1m, ws)

# for short_period, long_period, signal_period in macd_windows:
#     data_1m, macd_cols = calculate_macd(data_1m, short_period, long_period, signal_period)
#     all_macd_cols.extend(macd_cols)
 
# for ws in rsi_windows:
#     data_1m, rsi_cols = calculate_rsi(data_1m, window=ws)
#     all_rsi_cols.extend(rsi_cols)
 
# for k_period, d_period in stoch_windows:
#     data_1m, stoch_cols = calculate_stochastic_oscillator(data_1m, k_period, d_period)
#     all_stoch_cols.extend(stoch_cols)
 
# for ws in adx_windows:
#     data_1m, adx_cols = calculate_adx(data_1m, ws)
#     all_adx_cols.extend(adx_cols)
 
# for ws in atr_windows:
#     data_1m, atr_cols = calculate_atr(data_1m, ws)
#     all_atr_cols.extend(atr_cols)
 
# for ws in williams_windows:
#     data_1m, will_cols = calculate_williams_r(data_1m, ws)
#     all_will_cols.extend(will_cols)
 
# for ws in sr_windows:
#     data_1m, sr_col = calculate_support_resistance_numba(data_1m, window=ws)
#     if isinstance(sr_col, list):
#         all_sr_cols.extend(sr_col)
#     else:
#         all_sr_cols.append(sr_col)

# test_column = ['Quote asset volume', 'Number of trades', 'Taker buy base asset volume',
#                'Taker buy quote asset volume']

# # 기본 피처와 사이클릭 인코딩 계산 (window size와 무관한 경우)
# data_1m, base_feature = base_feature_fn(data_1m, alpha=100)
# data_1m, volume_feature = calculate_volume(data_1m, window_size=240, volume_column_list=test_column)
# data_1m, volatility_cols = calculate_volatility_features(data_1m, window=240, alpha=100)
# data_1m, cyclic_encoding = cyclic_encode_fn(data_1m, 'Open time')


# # 예시로 일부 test용 컬럼 정의
# drop_column = [
#     'Open time', 'Close time', 'Quote asset volume', 'Ignore',
#     'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume'
# ]


# # for cloumn in test_column:
# #     data_1m[cloumn] = log_transform(data_1m[cloumn])

# # 최종 feature 컬럼을 정리합니다.
# feature_column = (
#     test_column +
#     cyclic_encoding +
#     all_ma_cols_rel +
#     all_bb_cols_rel +
#     all_rsi_cols +
#     all_macd_cols +
#     all_stoch_cols +
#     all_adx_cols +
#     all_will_cols +
#     all_sr_cols +
#     volatility_cols + 
#     volume_feature + 
#     base_feature
# )
 
 
# # bb_entry_pos_list, patience_list, bb_entry_index_list = BB_fitness_fn(data_1m)
# # bb_macd_entry_pos_list, patience_list, bb_macd_entry_index_list = BB_MACD_fitness_fn(data_1m, 60, 20, 60)
# # bb_macd_entry_pos_list, patience_list, bb_macd_entry_index_list = simple_fitness_fn(data_1m, 240, 60, 180)
# # bb_macd_entry_pos_list, patience_list, bb_macd_entry_index_list = BB_MACD_fitness_fn(data_1m, 60, 20, 60)
# bb_macd_entry_pos_list, patience_list, bb_macd_entry_index_list = BB_MACD_EMA_RSI_fitness_fn(data_1m, 20, 20, 60, 60)

# # Prepare Dataset
# data_tensor = make_dataset(
#     data_1m,
#     using_column=feature_column,
#     window_size=1,
#     entry_pos_list=bb_macd_entry_pos_list,
#     patience_list=patience_list,

# )
# entry_pos_list = np.array(bb_macd_entry_pos_list)[np.array(bb_macd_entry_pos_list) != 'hold']

# dataset_1m = []
# skip_data_cnt = 0
# for data in data_tensor:
#     if type(data[0]) == np.ndarray:
#         dataset_1m.append(torch.from_numpy(data[0]).unsqueeze(dim=0))
 
#     else:
#         skip_data_cnt += 1
# dataset_1m = torch.cat(dataset_1m, dim=0)

# # # Avoid division by zero by replacing zero denominators with a small epsilon value
# # epsilon = 1e-10
# # dataset_1m[:, :, :4] = dataset_1m[:, :, :4] / (torch.mean(dataset_1m[:, :, :4], dim=0).unsqueeze(dim=1) + epsilon)

# dataset_1m = replace_nan_with_zero(dataset_1m)

# import pickle

# data_to_save = {
#     'dataset_1m': dataset_1m.squeeze(dim=1),
#     'skip_data_cnt': skip_data_cnt,
#     'entry_pos_list': entry_pos_list,
#     'bb_macd_entry_pos_list': bb_macd_entry_pos_list,
#     'bb_macd_entry_index_list': bb_macd_entry_index_list
# }

# with open('/root/daily/bit_5/backup_feature_data/data.pkl', 'wb') as f:
#     pickle.dump(data_to_save, f)

In [3]:
# Load data from the file into separate variables
with open('/root/daily/bit_5/backup_feature_data/data.pkl', 'rb') as f:
    loaded_data = pickle.load(f)

dataset_1m = loaded_data['dataset_1m']
# dataset_1d = loaded_data['dataset_1d']
skip_data_cnt = loaded_data['skip_data_cnt']
entry_pos_list = loaded_data['entry_pos_list']
bb_macd_entry_pos_list = loaded_data['bb_macd_entry_pos_list']
bb_macd_entry_index_list = loaded_data['bb_macd_entry_index_list']

In [4]:
valid_skip_data_cnt = int(len(dataset_1m)*0.6) + skip_data_cnt
test_skip_data_cnt = int(len(dataset_1m)*0.8) + skip_data_cnt


In [5]:
# Evolution Setup
# 전역적으로 기울기 계산 비활성화
torch.set_grad_enabled(False)
torch.backends.cudnn.benchmark = True

chromosomes_size=30000
window_size=240
EPOCH = 5
gen_loop=50
best_size=30000
elite_size=6000
profit_init=1
device = 'cuda:1'
group = 30000
start_gen = 0
best_profit = None
best_chromosomes = None

prescriptor = Prescriptor(input_dim=54, 
                fc_hidden_size=16, 
                output_dim=8, 
                after_input_dim=11, 
                after_hidden_dim=16, 
                after_output_dim=5, 
                num_blocks=group).to(device).eval()

# if i == 1:
#     start_gen=0

# if i == 0:
#     prescriptor.load_state_dict(state_dict['prescriptor_state_dict'],strict=True)

total_param = sum(p.numel() for p in prescriptor.parameters())
print(f"Total parameters: {total_param}")

selection = RouletteSelection(elite_num=6000, parents_num=12000, minimize=False)
# selection = ParetoLexsortSelection(elite_num=2000, parents_num=4000,
#                                     priority=[], prior_ratio= [],
#                                     prob_method= 'softmax',minimize=False)
# crossover = DifferentialEvolutionOperator()
# crossover = UniformCrossover(num_parents=4)
# crossover = CenDE_DOBLOperator()
mutation = ChainMutation([RandomValueMutation(mut_prob=0.05), AddUniformMutation(mut_prob=0.1)])
# crossover = UniformCrossover(num_parents=1)
crossover = DifferentialEvolutionOperator()
# mutation = AddNormalMutation(mut_prob=0.1)
evolution = Evolution(
    prescriptor=prescriptor,
    selection=selection,
    crossover=crossover,
    mutation=mutation,
    group_size=group
)

best_chromosomes, best_profit = generation_valid(
    data_1m=data_1m,
    dataset_1m=dataset_1m,
    # dataset_1d=dataset_1d,
    prescriptor=prescriptor,
    evolution=evolution,
    skip_data_cnt=skip_data_cnt,
    valid_skip_data_cnt=valid_skip_data_cnt,
    test_skip_data_cnt=test_skip_data_cnt,
    chromosomes_size=chromosomes_size,
    window_size=window_size,
    gen_loop=gen_loop,
    best_size=best_size,
    elite_size=elite_size,
    profit_init=profit_init,
    entry_index_list=bb_macd_entry_index_list,
    entry_pos_list=entry_pos_list,
    best_profit=best_profit,
    best_chromosomes=best_chromosomes,
    start_gen=start_gen,
    device=device
)

Total parameters: 197370000
generation  0: 


RuntimeError: Given groups=30000, weight of size [1920000, 54, 1], expected input[128, 1440000, 1] to have 1620000 channels, but got 1440000 channels instead