In [1]:
# train.py

import os
import torch
import numpy as np
import warnings
warnings.filterwarnings(action='ignore')

from data_loader import load_data_1m
from feature_calculations import (
    resample_data, calculate_MA_data, calculate_ema_bollinger_bands, calculate_rsi,
    calculate_macd, calculate_stochastic_oscillator, calculate_adx, calculate_atr,
    calculate_obv, calculate_williams_r, base_feature_fn, cyclic_encode_fn, log_transform
)
from strategies import BB_fitness_fn, BB_MACD_fitness_fn
from dataset import make_dataset, replace_nan_with_zero
from train_functions_bi_cul_split import inference, fitness_fn, generation_valid, generation_test

from Prescriptor import Prescriptor
from Evolution.crossover import UniformCrossover, WeightedSumCrossover, DifferentialEvolutionOperator, CenDE_DOBLOperator
from Evolution.mutation import MultiplyNormalMutation, MultiplyUniformMutation, AddNormalMutation, AddUniformMutation, ChainMutation, FlipSignMutation
from Evolution.mutation import RandomValueMutation
from Evolution.selection import RouletteSelection, TournamentSelection, ParetoLexsortSelection
from Evolution import Evolution

In [25]:
# Load Data
data_1m = load_data_1m('/root/daily/bit/data/1min_bitusdt.pkl')
data_1m = data_1m.iloc[:200000]

# Resample data to 1D
data_1d = resample_data(data_1m, '1D')
data_1d['Close time'] = data_1d.index
data_1d = data_1d.reset_index(drop=True)

# Apply Feature Calculations
# For 1D Data

data_1d, base_feature_1d = base_feature_fn(data_1d, extra_str='_1d', alpha=10)
data_1d, cyclice_encoding_1d = cyclic_encode_fn(data_1d, 'Close time', 'day_of_week')

# For 1M Data
data_1m, bb_cols, bb_cols_rel = calculate_ema_bollinger_bands(data_1m, 240)
data_1m, macd_cols = calculate_macd(data_1m, 60, 600, 240)
data_1m, base_feature = base_feature_fn(data_1m, alpha=100)
data_1m, cyclice_encoding = cyclic_encode_fn(data_1m, 'Open time')

data_1m, short_ma_cols, short_ma_cols_rel = calculate_MA_data(data_1m, 60, 'EMA')
data_1m, long_ma_cols, long_ma_cols_rel = calculate_MA_data(data_1m, 180, 'EMA')

# data_1m, short_ma_cols, short_ma_cols_rel = calculate_MA_data(data_1m, 20, 'EMA')
# data_1m, long_ma_cols, long_ma_cols_rel = calculate_MA_data(data_1m, 60, 'EMA')
# data_1m, _, _ = calculate_ema_bollinger_bands(data_1m, 60)

# Prepare Feature Columns
drop_column = [
    'Open time', 'Close time', 'Quote asset volume', 'Ignore',
    'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume'
]

test_column = ['Quote asset volume', 'Number of trades', 'Taker buy base asset volume',
               'Taker buy quote asset volume']
feature_column = (
    test_column + cyclice_encoding + base_feature  # Excluding obv and atr
)
feature_column_1d = (
    test_column + cyclice_encoding_1d + base_feature_1d
)




# bb_entry_pos_list, patience_list, bb_entry_index_list = BB_fitness_fn(data_1m)
bb_macd_entry_pos_list, patience_list, bb_macd_entry_index_list = BB_MACD_fitness_fn(data_1m, 240, 60, 180)
# bb_macd_entry_pos_list, patience_list, bb_macd_entry_index_list = BB_MACD_fitness_fn(data_1m, 60, 20, 60)

# Prepare Dataset
data_tensor = make_dataset(
    data_1m, data_1d,
    using_column=feature_column, using_column_1d=feature_column_1d,
    window_size=240, window_size_1d=60,
    entry_pos_list=bb_macd_entry_pos_list, patience_list=patience_list,
    use_1d_data=True
)
entry_pos_list = np.array(bb_macd_entry_pos_list)[np.array(bb_macd_entry_pos_list) != 'hold']

dataset_1m = []
dataset_1d = []
skip_data_cnt = 0
for data in data_tensor:
    if len(data[0]) == 240 and len(data[1]) == 60:
        dataset_1m.append(torch.from_numpy(data[0]).unsqueeze(dim=0))
        dataset_1d.append(torch.from_numpy(data[1]).unsqueeze(dim=0))
    else:
        skip_data_cnt += 1
dataset_1m = torch.cat(dataset_1m, dim=0)
dataset_1d = torch.cat(dataset_1d, dim=0)
dataset_1m = replace_nan_with_zero(dataset_1m)
dataset_1d = replace_nan_with_zero(dataset_1d)

dataset_1m[:, :, :4] = dataset_1m[:, :, :4] / dataset_1m[:, :, :4][:, -1].unsqueeze(dim=1)
dataset_1d[:, :, :4] = dataset_1d[:, :, :4] / dataset_1d[:, :, :4][:, -1].unsqueeze(dim=1)

100%|██████████| 200000/200000 [00:22<00:00, 8705.25it/s]


In [36]:
dataset_1m.shape

torch.Size([1394, 240, 14])

In [37]:
dataset_1d.shape

torch.Size([1394, 60, 14])