# Import libraries, set config

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import sys
sys.path.append('..')

import json
import joblib
from collections import defaultdict
import numpy as np
import pandas as pd
from glob import glob
from optimizer.optimizer import Optimizer
from os import environ
from datetime import timedelta
from tqdm.auto import tqdm

# Set environment variable
environ["ENV"] = "optimize"

from config.config import ConfigFactory
from indicators import indicators

import lightgbm as lgb

from eli5.sklearn import PermutationImportance
from shaphypetune import BoostBoruta

from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold, GroupShuffleSplit, train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import log_loss, mean_squared_error, precision_score

import torch

from colorama import Style, Fore


class CFG:
    cls_target_ratio = 1.031
    ttype = 'both'
    patterns_to_filter = ['MACD', 'STOCH_RSI']
    select_features = False
    train_NN = False
    train_LGBM = True
    n_repeats = 1
    n_folds = 5

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

tqdm.pandas()

# Load MACD buy data

In [3]:
work_timeframe = '1h'
higher_timeframe = '4h'
opt_limit = 100000

load = True
clean = True

ttype = 'buy'
pattern = ['MACD']
indicator_list = pattern
indicator_list_higher = ['Trend', 'MACD']

print(f'Timeframe is {work_timeframe}/{higher_timeframe}, trade type is {ttype}')

# Get configs
configs = ConfigFactory.factory(environ).configs
configs['Indicator_list'] = indicator_list
configs['Higher_TF_indicator_list'] = indicator_list_higher
configs['Timeframes']['work_timeframe'] = work_timeframe
configs['Timeframes']['higher_timeframe'] = higher_timeframe

optim_dict = {
                'MACD': {
                          "fastperiod": [14],
                          "slowperiod": [28],
                          "signalperiod": [11]
                        }
             }

opt = Optimizer(pattern, optim_dict, clean, **configs)
stat = opt.optimize(pattern, ttype, opt_limit, load, 'ml')

Timeframe is 1h/4h, trade type is buy
Number of combinations is 1


100%|██████████| 1/1 [01:04<00:00, 64.80s/it]


# Load MACD sell data

In [4]:
load = False
clean = False

ttype = 'sell'
pattern = ['MACD']
indicator_list = pattern
indicator_list_higher = ['Trend', 'MACD']

print(f'Timeframe is {work_timeframe}/{higher_timeframe}, trade type is {ttype}')

# Get configs
configs = ConfigFactory.factory(environ).configs
configs['Indicator_list'] = indicator_list
configs['Higher_TF_indicator_list'] = indicator_list_higher
configs['Timeframes']['work_timeframe'] = work_timeframe
configs['Timeframes']['higher_timeframe'] = higher_timeframe

optim_dict = {
                'MACD': {
                          "fastperiod": [14],
                          "slowperiod": [28],
                          "signalperiod": [11]
                        }
             }

opt = Optimizer(pattern, optim_dict, clean, **configs)
stat = opt.optimize(pattern, ttype, opt_limit, load, 'ml')

Timeframe is 1h/4h, trade type is sell
Number of combinations is 1


100%|██████████| 1/1 [01:57<00:00, 117.25s/it]


# Prepare data

In [5]:
# Set environment variable
environ["ENV"] = "1h_4h"

# Get configs
configs = ConfigFactory.factory(environ).configs

def get_file(ticker):
    ''' Find files buy ticker names, file names can be in different formats '''
    try:
        tmp_df_1h = pd.read_pickle(f'../optimizer/ticker_dataframes/{ticker}_1h.pkl')
        tmp_df_4h = pd.read_pickle(f'../optimizer/ticker_dataframes/{ticker}_4h.pkl')
    except FileNotFoundError:
        pass
    else:
        return tmp_df_1h, tmp_df_4h
    
    try:
        tmp_df_1h = pd.read_pickle(f'../optimizer/ticker_dataframes/{ticker[:-4]}-{ticker[-4:]}_1h.pkl')
        tmp_df_4h = pd.read_pickle(f'../optimizer/ticker_dataframes/{ticker[:-4]}-{ticker[-4:]}_4h.pkl')
    except FileNotFoundError:
        pass
    else:
        return tmp_df_1h, tmp_df_4h
    
    try:
        tmp_df_1h = pd.read_pickle(f'../optimizer/ticker_dataframes/{ticker[:-4]}-{ticker[-4:]}-SWAP_1h.pkl')
        tmp_df_4h = pd.read_pickle(f'../optimizer/ticker_dataframes/{ticker[:-4]}-{ticker[-4:]}-SWAP_4h.pkl')
    except FileNotFoundError:
        pass
    else:
        return tmp_df_1h, tmp_df_4h
    
    return None, None

def add_indicators(df, ttype, configs):
    # add RSI
    rsi = indicators.RSI(ttype, configs)
    df = rsi.get_indicator(df, '', '', 0)
    # add RSI
    stoch = indicators.STOCH(ttype, configs)
    df = stoch.get_indicator(df, '', '', 0)
    # add Trend
    trend = indicators.Trend(ttype, configs)
    df = trend.get_indicator(df, '', '', 0)
    # add MACD
    macd = indicators.MACD(ttype, configs)
    df = macd.get_indicator(df, '', '', 0)
    # add ATR
    atr = indicators.ATR(ttype, configs)
    df = atr.get_indicator(df, '', '', 0)
    # add SMA
    # sma = indicators.SMA(ttype, configs)
    # df = sma.get_indicator(df, '', '', 0)
    return df

def create_train_df(df, ttype, configs, target_offset, first, last, step):
    ''' Create train dataset from signal statistics and ticker candle data'''
    train_df = pd.DataFrame()
    tickers = df['ticker'].unique()
    
    for ticker in tqdm(tickers):
        # get signals with current ticker
        signal_df = df[df['ticker'] == ticker]
        times = signal_df['time']
        
        # load candle history of this ticker
        tmp_df_1h, _ = get_file(ticker)

        # add indicators 
        try:
            tmp_df_1h = add_indicators(tmp_df_1h, ttype, configs)
        except TypeError:
            continue

        # add historical data for current ticker
        for i, t in enumerate(times.to_list()):
            pass_cycle = False
            pattern = signal_df.iloc[i, signal_df.columns.get_loc('pattern')]
            row = tmp_df_1h.loc[tmp_df_1h['time'] == t, :].reset_index(drop=True)
            
            for i in range(first, last + step, step):
                time_prev = t + timedelta(hours= -i)
                try:
                    row_tmp = tmp_df_1h.loc[tmp_df_1h['time'] == time_prev, :].reset_index(drop=True)
                    row_tmp.columns = [c + f'_prev_{i}' for c in row_tmp.columns]
                except IndexError:
                    pass_cycle = True
                    break
                row = pd.concat([row, row_tmp.iloc[:,1:]], axis=1)
                row['ticker'] = ticker
                row['pattern'] = pattern
                
            if pass_cycle:
                continue

            row['target'] = 0
            row['ttype'] = ttype
            
            # If ttype = buy and during the selected period high price was higher than close_price * target_ratio
            # and earlier low price wasn't lower than close_price / target_ratio, than target is True, else target is False.
            # Similarly for ttype = sell 
            close_price = tmp_df_1h.loc[tmp_df_1h['time'] == t, 'close'].values
            
            for i in range(1, target_offset + 1):
                time_next = t + timedelta(hours=i)
                target_buy = tmp_df_1h.loc[tmp_df_1h['time'] == time_next, 'high'].reset_index(drop=True)
                target_sell = tmp_df_1h.loc[tmp_df_1h['time'] == time_next, 'low'].reset_index(drop=True)

                try:
                    target_buy = target_buy > close_price * CFG.cls_target_ratio
                    target_sell = target_sell < close_price * (2 - CFG.cls_target_ratio)
                except ValueError:
                    pass_cycle = True
                    break
                
                try:
                    if (row['ttype'].values == 'buy' and target_sell[0]) or (row['ttype'].values == 'sell' and target_buy[0]):
                        break
                    elif (row['ttype'].values == 'buy' and target_buy[0]) or (row['ttype'].values == 'sell' and target_sell[0]):
                        row['target'] = 1
                        break
                except (KeyError, TypeError):
                    pass_cycle = True
                    break
            
            if pass_cycle:
                continue

            # add data to the dataset
            if train_df.shape[0] == 0:
                train_df = row
            else:
                train_df = pd.concat([train_df, row])
    
    return train_df

# for how long time (in hours) we want to predict
target_offset = 96
# first previous data point to collect for model training (value represents number of hours before signal point)
first = 4
# last previous data point to collect for model training (value represents number of hours before signal point)
last = 192
# step of previous data points collecting (total number of points to collect is (last - first + step) / step)
step = 4

# Buy
# dataset with the signal statistics
df = pd.read_pickle('signal_stat/buy_stat_1h.pkl')
# dataset for model train
train_buy = create_train_df(df, 'buy', configs, target_offset, first, last, step)
train_buy = train_buy.dropna()

# Sell
# dataset with the signal statistics
df = pd.read_pickle('signal_stat/sell_stat_1h.pkl')
# dataset for model train
train_sell = create_train_df(df, 'sell', configs, target_offset, first, last, step)
train_sell = train_sell.dropna()

train_buy = pd.concat([train_buy, train_sell[train_sell['ttype'] == 'buy']]).sort_values('time').reset_index(drop=True)
train_sell = pd.concat([train_sell, train_buy[train_buy['ttype'] == 'sell']]).sort_values('time').reset_index(drop=True)

train_buy = train_buy[train_buy['ttype'] == 'buy']
train_sell = train_sell[train_sell['ttype'] == 'sell']

# train_buy.to_pickle(f'signal_stat/train_buy_{last}.pkl')
# train_sell.to_pickle(f'signal_stat/train_sell_{last}.pkl')

# display(df.head())
# display(df.shape)


  0%|          | 0/317 [00:00<?, ?it/s]

The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty

  0%|          | 0/334 [00:00<?, ?it/s]

The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty

# Check pattern / target distribution

In [6]:
display(train_buy['ttype'].value_counts()) # 1721
display(train_sell['ttype'].value_counts()) # 3391
display(train_buy[['target', 'pattern']].value_counts()) # 1.031 -- 1096 / 625
display(train_sell[['target', 'pattern']].value_counts()) # 1.031 -- 1972 / 1419

buy    1707
Name: ttype, dtype: int64

sell    3431
Name: ttype, dtype: int64

target  pattern
1       MACD       1093
0       MACD        614
dtype: int64

target  pattern
1       MACD       1974
0       MACD       1457
dtype: int64

# Check target corectness

In [7]:
# i = 654

# x = train_sell.loc[train_sell.target == 0, ['ticker', 'ttype', 'pattern', 'time', 'close', 'target']]
# y = x.iloc[i]
# low_price, high_price = y['close'] / CFG.cls_target_ratio, y['close'] * CFG.cls_target_ratio,
# print(y['ticker'], y['time'], y['ttype'])

# tmp_df_1h = pd.read_pickle(f'../optimizer/ticker_dataframes/{y["ticker"]}_1h.pkl')
# # tmp_df_1h = pd.read_pickle(f'../optimizer/ticker_dataframes/{y["ticker"][:-4]}-{y["ticker"][-4:]}_1h.pkl')
# # tmp_df_1h = pd.read_pickle(f'../optimizer/ticker_dataframes/{y["ticker"][:-4]}-{y["ticker"][-4:]}-SWAP_4h.pkl')

# tmp_df_1h['low_price'] = low_price
# tmp_df_1h['high_price'] = high_price
# idx = tmp_df_1h[tmp_df_1h['time'] == y['time']].index[0]

# tmp_df_1h = tmp_df_1h.iloc[idx:idx+target_offset+1][['time', 'close', 'high', 'high_price', 'low', 'low_price']]

# if y['ttype'] == 'buy':
#     tmp_df_1h['signal'] = tmp_df_1h['high'] > tmp_df_1h['high_price']
#     tmp_df_1h['anti_signal'] = tmp_df_1h['low'] < tmp_df_1h['low_price']
# else:
#     tmp_df_1h['signal'] = tmp_df_1h['low'] < tmp_df_1h['low_price']
#     tmp_df_1h['anti_signal'] = tmp_df_1h['high'] > tmp_df_1h['high_price']

# tmp_df_1h

# Concat buy and sell datasets

In [8]:
# last previous data point to collect for model training (value represents number of hours before signal point)
last = 192

if CFG.ttype == 'both':
    # df_buy = pd.read_pickle(f'signal_stat/train_buy_{last}.pkl')
    # df_sell = pd.read_pickle(f'signal_stat/train_sell_{last}.pkl')
    df = pd.concat([train_buy, train_sell])
elif CFG.ttype == 'buy':
    df = train_buy.copy() # pd.read_pickle(f'signal_stat/train_buy_{last}.pkl')
else:
    df = train_sell.copy() # pd.read_pickle(f'signal_stat/train_sell_{last}.pkl')


df = df[df['pattern'].isin(CFG.patterns_to_filter)]

display(df.head())
display(df.shape)


Unnamed: 0,time,open,high,low,close,volume,rsi,stoch_slowk,stoch_slowd,stoch_slowk_dir,stoch_slowd_dir,stoch_diff,linear_reg,linear_reg_angle,macd,macdsignal,macdhist,macd_dir,macdsignal_dir,atr,close_smooth,open_prev_4,high_prev_4,low_prev_4,close_prev_4,volume_prev_4,rsi_prev_4,stoch_slowk_prev_4,stoch_slowd_prev_4,stoch_slowk_dir_prev_4,stoch_slowd_dir_prev_4,stoch_diff_prev_4,linear_reg_prev_4,linear_reg_angle_prev_4,macd_prev_4,macdsignal_prev_4,macdhist_prev_4,macd_dir_prev_4,macdsignal_dir_prev_4,atr_prev_4,close_smooth_prev_4,ticker,pattern,open_prev_8,high_prev_8,low_prev_8,close_prev_8,volume_prev_8,rsi_prev_8,stoch_slowk_prev_8,stoch_slowd_prev_8,stoch_slowk_dir_prev_8,stoch_slowd_dir_prev_8,stoch_diff_prev_8,linear_reg_prev_8,linear_reg_angle_prev_8,macd_prev_8,macdsignal_prev_8,macdhist_prev_8,macd_dir_prev_8,macdsignal_dir_prev_8,atr_prev_8,close_smooth_prev_8,open_prev_12,high_prev_12,low_prev_12,close_prev_12,volume_prev_12,rsi_prev_12,stoch_slowk_prev_12,stoch_slowd_prev_12,stoch_slowk_dir_prev_12,stoch_slowd_dir_prev_12,stoch_diff_prev_12,linear_reg_prev_12,linear_reg_angle_prev_12,macd_prev_12,macdsignal_prev_12,macdhist_prev_12,macd_dir_prev_12,macdsignal_dir_prev_12,atr_prev_12,close_smooth_prev_12,open_prev_16,high_prev_16,low_prev_16,close_prev_16,volume_prev_16,rsi_prev_16,stoch_slowk_prev_16,stoch_slowd_prev_16,stoch_slowk_dir_prev_16,stoch_slowd_dir_prev_16,stoch_diff_prev_16,linear_reg_prev_16,linear_reg_angle_prev_16,macd_prev_16,macdsignal_prev_16,macdhist_prev_16,macd_dir_prev_16,macdsignal_dir_prev_16,atr_prev_16,close_smooth_prev_16,open_prev_20,high_prev_20,low_prev_20,close_prev_20,volume_prev_20,rsi_prev_20,stoch_slowk_prev_20,stoch_slowd_prev_20,stoch_slowk_dir_prev_20,stoch_slowd_dir_prev_20,stoch_diff_prev_20,linear_reg_prev_20,linear_reg_angle_prev_20,macd_prev_20,macdsignal_prev_20,macdhist_prev_20,macd_dir_prev_20,macdsignal_dir_prev_20,atr_prev_20,close_smooth_prev_20,open_prev_24,high_prev_24,low_prev_24,close_prev_24,volume_prev_24,rsi_prev_24,stoch_slowk_prev_24,stoch_slowd_prev_24,stoch_slowk_dir_prev_24,stoch_slowd_dir_prev_24,stoch_diff_prev_24,linear_reg_prev_24,linear_reg_angle_prev_24,macd_prev_24,macdsignal_prev_24,macdhist_prev_24,macd_dir_prev_24,macdsignal_dir_prev_24,atr_prev_24,close_smooth_prev_24,open_prev_28,high_prev_28,low_prev_28,close_prev_28,volume_prev_28,rsi_prev_28,stoch_slowk_prev_28,stoch_slowd_prev_28,stoch_slowk_dir_prev_28,stoch_slowd_dir_prev_28,stoch_diff_prev_28,linear_reg_prev_28,linear_reg_angle_prev_28,macd_prev_28,macdsignal_prev_28,macdhist_prev_28,macd_dir_prev_28,macdsignal_dir_prev_28,atr_prev_28,close_smooth_prev_28,open_prev_32,high_prev_32,low_prev_32,close_prev_32,volume_prev_32,rsi_prev_32,stoch_slowk_prev_32,stoch_slowd_prev_32,stoch_slowk_dir_prev_32,stoch_slowd_dir_prev_32,stoch_diff_prev_32,linear_reg_prev_32,linear_reg_angle_prev_32,macd_prev_32,macdsignal_prev_32,macdhist_prev_32,macd_dir_prev_32,macdsignal_dir_prev_32,atr_prev_32,close_smooth_prev_32,open_prev_36,high_prev_36,low_prev_36,close_prev_36,volume_prev_36,rsi_prev_36,stoch_slowk_prev_36,stoch_slowd_prev_36,stoch_slowk_dir_prev_36,stoch_slowd_dir_prev_36,stoch_diff_prev_36,linear_reg_prev_36,linear_reg_angle_prev_36,macd_prev_36,macdsignal_prev_36,macdhist_prev_36,macd_dir_prev_36,macdsignal_dir_prev_36,atr_prev_36,close_smooth_prev_36,open_prev_40,high_prev_40,low_prev_40,close_prev_40,volume_prev_40,rsi_prev_40,stoch_slowk_prev_40,stoch_slowd_prev_40,stoch_slowk_dir_prev_40,stoch_slowd_dir_prev_40,stoch_diff_prev_40,linear_reg_prev_40,linear_reg_angle_prev_40,macd_prev_40,macdsignal_prev_40,macdhist_prev_40,macd_dir_prev_40,macdsignal_dir_prev_40,atr_prev_40,close_smooth_prev_40,open_prev_44,high_prev_44,low_prev_44,close_prev_44,volume_prev_44,rsi_prev_44,stoch_slowk_prev_44,stoch_slowd_prev_44,stoch_slowk_dir_prev_44,stoch_slowd_dir_prev_44,stoch_diff_prev_44,linear_reg_prev_44,linear_reg_angle_prev_44,macd_prev_44,macdsignal_prev_44,macdhist_prev_44,macd_dir_prev_44,macdsignal_dir_prev_44,atr_prev_44,close_smooth_prev_44,open_prev_48,high_prev_48,low_prev_48,close_prev_48,volume_prev_48,rsi_prev_48,stoch_slowk_prev_48,...,linear_reg_angle_prev_144,macd_prev_144,macdsignal_prev_144,macdhist_prev_144,macd_dir_prev_144,macdsignal_dir_prev_144,atr_prev_144,close_smooth_prev_144,open_prev_148,high_prev_148,low_prev_148,close_prev_148,volume_prev_148,rsi_prev_148,stoch_slowk_prev_148,stoch_slowd_prev_148,stoch_slowk_dir_prev_148,stoch_slowd_dir_prev_148,stoch_diff_prev_148,linear_reg_prev_148,linear_reg_angle_prev_148,macd_prev_148,macdsignal_prev_148,macdhist_prev_148,macd_dir_prev_148,macdsignal_dir_prev_148,atr_prev_148,close_smooth_prev_148,open_prev_152,high_prev_152,low_prev_152,close_prev_152,volume_prev_152,rsi_prev_152,stoch_slowk_prev_152,stoch_slowd_prev_152,stoch_slowk_dir_prev_152,stoch_slowd_dir_prev_152,stoch_diff_prev_152,linear_reg_prev_152,linear_reg_angle_prev_152,macd_prev_152,macdsignal_prev_152,macdhist_prev_152,macd_dir_prev_152,macdsignal_dir_prev_152,atr_prev_152,close_smooth_prev_152,open_prev_156,high_prev_156,low_prev_156,close_prev_156,volume_prev_156,rsi_prev_156,stoch_slowk_prev_156,stoch_slowd_prev_156,stoch_slowk_dir_prev_156,stoch_slowd_dir_prev_156,stoch_diff_prev_156,linear_reg_prev_156,linear_reg_angle_prev_156,macd_prev_156,macdsignal_prev_156,macdhist_prev_156,macd_dir_prev_156,macdsignal_dir_prev_156,atr_prev_156,close_smooth_prev_156,open_prev_160,high_prev_160,low_prev_160,close_prev_160,volume_prev_160,rsi_prev_160,stoch_slowk_prev_160,stoch_slowd_prev_160,stoch_slowk_dir_prev_160,stoch_slowd_dir_prev_160,stoch_diff_prev_160,linear_reg_prev_160,linear_reg_angle_prev_160,macd_prev_160,macdsignal_prev_160,macdhist_prev_160,macd_dir_prev_160,macdsignal_dir_prev_160,atr_prev_160,close_smooth_prev_160,open_prev_164,high_prev_164,low_prev_164,close_prev_164,volume_prev_164,rsi_prev_164,stoch_slowk_prev_164,stoch_slowd_prev_164,stoch_slowk_dir_prev_164,stoch_slowd_dir_prev_164,stoch_diff_prev_164,linear_reg_prev_164,linear_reg_angle_prev_164,macd_prev_164,macdsignal_prev_164,macdhist_prev_164,macd_dir_prev_164,macdsignal_dir_prev_164,atr_prev_164,close_smooth_prev_164,open_prev_168,high_prev_168,low_prev_168,close_prev_168,volume_prev_168,rsi_prev_168,stoch_slowk_prev_168,stoch_slowd_prev_168,stoch_slowk_dir_prev_168,stoch_slowd_dir_prev_168,stoch_diff_prev_168,linear_reg_prev_168,linear_reg_angle_prev_168,macd_prev_168,macdsignal_prev_168,macdhist_prev_168,macd_dir_prev_168,macdsignal_dir_prev_168,atr_prev_168,close_smooth_prev_168,open_prev_172,high_prev_172,low_prev_172,close_prev_172,volume_prev_172,rsi_prev_172,stoch_slowk_prev_172,stoch_slowd_prev_172,stoch_slowk_dir_prev_172,stoch_slowd_dir_prev_172,stoch_diff_prev_172,linear_reg_prev_172,linear_reg_angle_prev_172,macd_prev_172,macdsignal_prev_172,macdhist_prev_172,macd_dir_prev_172,macdsignal_dir_prev_172,atr_prev_172,close_smooth_prev_172,open_prev_176,high_prev_176,low_prev_176,close_prev_176,volume_prev_176,rsi_prev_176,stoch_slowk_prev_176,stoch_slowd_prev_176,stoch_slowk_dir_prev_176,stoch_slowd_dir_prev_176,stoch_diff_prev_176,linear_reg_prev_176,linear_reg_angle_prev_176,macd_prev_176,macdsignal_prev_176,macdhist_prev_176,macd_dir_prev_176,macdsignal_dir_prev_176,atr_prev_176,close_smooth_prev_176,open_prev_180,high_prev_180,low_prev_180,close_prev_180,volume_prev_180,rsi_prev_180,stoch_slowk_prev_180,stoch_slowd_prev_180,stoch_slowk_dir_prev_180,stoch_slowd_dir_prev_180,stoch_diff_prev_180,linear_reg_prev_180,linear_reg_angle_prev_180,macd_prev_180,macdsignal_prev_180,macdhist_prev_180,macd_dir_prev_180,macdsignal_dir_prev_180,atr_prev_180,close_smooth_prev_180,open_prev_184,high_prev_184,low_prev_184,close_prev_184,volume_prev_184,rsi_prev_184,stoch_slowk_prev_184,stoch_slowd_prev_184,stoch_slowk_dir_prev_184,stoch_slowd_dir_prev_184,stoch_diff_prev_184,linear_reg_prev_184,linear_reg_angle_prev_184,macd_prev_184,macdsignal_prev_184,macdhist_prev_184,macd_dir_prev_184,macdsignal_dir_prev_184,atr_prev_184,close_smooth_prev_184,open_prev_188,high_prev_188,low_prev_188,close_prev_188,volume_prev_188,rsi_prev_188,stoch_slowk_prev_188,stoch_slowd_prev_188,stoch_slowk_dir_prev_188,stoch_slowd_dir_prev_188,stoch_diff_prev_188,linear_reg_prev_188,linear_reg_angle_prev_188,macd_prev_188,macdsignal_prev_188,macdhist_prev_188,macd_dir_prev_188,macdsignal_dir_prev_188,atr_prev_188,close_smooth_prev_188,open_prev_192,high_prev_192,low_prev_192,close_prev_192,volume_prev_192,rsi_prev_192,stoch_slowk_prev_192,stoch_slowd_prev_192,stoch_slowk_dir_prev_192,stoch_slowd_dir_prev_192,stoch_diff_prev_192,linear_reg_prev_192,linear_reg_angle_prev_192,macd_prev_192,macdsignal_prev_192,macdhist_prev_192,macd_dir_prev_192,macdsignal_dir_prev_192,atr_prev_192,close_smooth_prev_192,target,ttype
0,2022-12-31 15:00:00,145.9,146.1,145.8,145.9,987.752,53.009345,56.7718,52.945269,0.090417,0.057447,3.798186,14.683128,-5.54682,0.056229,0.050168,0.006061,0.215245,-0.006322,0.688059,145.7875,145.7,146.1,145.7,145.7,1903.836,50.059389,46.567718,44.094001,0.021193,-0.036528,0.110287,14.192272,-6.132961,0.051372,0.055049,-0.003677,0.0,0.004292,0.732651,145.625,XMRUSDT,MACD,145.8,145.8,145.4,145.5,376.951,47.257239,48.265306,55.963719,-0.114934,-0.098015,-6.997354,13.557113,-7.501018,0.052452,0.051774,0.000677,-0.169113,0.089785,0.785305,145.491667,146.0,146.2,145.1,145.6,2853.032,48.937855,76.479592,82.431973,-0.043146,0.000392,-2.598891,12.56259,-7.132906,0.112628,0.029734,0.082894,0.0,-0.540355,0.853049,145.466667,146.2,146.4,146.0,146.2,1344.549,58.508826,84.070295,76.173469,0.090709,0.096571,6.09858,12.399971,-4.016298,0.07021,-0.070136,0.140346,-1.03871,-0.215621,0.893081,145.391667,145.9,146.1,145.6,146.0,1574.559,56.624824,59.546485,49.927199,0.189311,0.125029,6.83753,12.354929,-5.288779,-0.100741,-0.164636,0.063896,-0.251482,-0.008322,0.916694,145.391667,145.3,145.6,144.0,144.7,9660.217,42.830224,35.676692,35.002785,0.037157,0.024478,0.985798,11.277749,-9.127296,-0.239328,-0.153281,-0.086048,0.0,0.173674,0.934629,145.5,144.7,145.2,144.4,144.8,1887.148,41.753422,32.807018,34.110276,-0.042184,-0.037332,-2.055138,9.412903,-8.247267,-0.187566,-0.067972,-0.119593,0.290716,1.336661,0.906863,145.695833,145.1,145.9,145.1,145.9,1346.509,51.832395,41.97995,38.721805,-0.016553,-0.09788,-2.778551,8.17287,-3.069163,-0.063568,0.010539,-0.074107,0.488023,-0.450739,0.890359,145.895833,145.5,145.9,145.2,145.7,1423.886,48.233616,53.467001,60.178223,-0.090037,-0.075892,-5.928721,7.74131,-3.18979,-0.004091,0.08669,-0.090781,-0.820875,-0.15545,0.907901,146.041667,146.1,146.4,144.8,145.2,2838.098,40.889721,78.142675,77.859271,0.06368,0.080836,4.832857,7.403734,-5.985498,0.094497,0.166991,-0.072494,-0.162175,-0.02982,0.909401,146.170833,146.4,146.7,146.1,146.5,6172.556,57.80805,61.685824,57.065012,0.088428,0.053506,4.113199,7.306483,-1.048946,0.160057,0.184365,-0.024307,0.0,-0.050522,0.911522,146.108333,145.4,146.1,144.0,145.4,6667.431,44.416016,48.571429,...,3.500058,0.285485,0.188871,0.096614,0.0,0.150437,0.698125,143.4,143.4,144.2,143.4,144.0,1749.959,59.041876,54.720525,54.864825,-0.017617,-0.009434,-0.974026,11.327935,2.509342,0.194963,0.102639,0.092324,0.0,0.289092,0.731739,143.066667,143.2,143.7,143.1,143.5,502.424,55.044665,54.85761,58.346469,-0.072962,-0.058369,-4.684209,12.651844,-0.070152,0.138696,0.024489,0.114207,0.0,4.071796,0.769009,142.841667,143.3,143.8,143.3,143.8,498.503,58.923362,70.453932,70.372532,-0.013473,-0.01334,-1.080198,14.305408,0.742376,0.103781,-0.091333,0.195114,1.702612,-0.234316,0.836497,142.65,143.0,144.2,142.8,143.9,7255.366,61.843454,74.107342,71.673008,0.047311,0.04501,3.384592,16.217936,0.33253,-0.037418,-0.246803,0.209385,-0.361511,-0.110834,0.877207,142.579167,142.8,143.1,142.6,142.7,1230.62,50.291331,60.952381,62.292769,-0.017457,0.016477,-0.425739,17.453175,-3.888671,-0.23473,-0.382404,0.147674,-0.121528,-0.062391,0.903139,142.508333,141.9,142.4,141.9,142.2,2153.721,45.421487,60.146194,54.522611,0.089174,0.126711,4.833291,18.407364,-8.413548,-0.391458,-0.487389,0.095931,0.0,-0.03952,0.946749,142.379167,142.9,143.3,142.4,142.4,1446.92,45.98228,37.357079,30.343239,0.134883,-0.040044,1.889245,18.506907,-7.082343,-0.440604,-0.579794,0.13919,0.0,-0.040726,0.997311,142.170833,142.4,142.8,142.2,142.3,1254.632,43.838488,34.52381,43.770053,-0.179092,-0.155085,-9.473839,18.552169,-6.204994,-0.542884,-0.681369,0.138485,0.0,-0.038372,0.991733,142.433333,143.1,143.3,142.3,142.3,1295.499,42.727128,73.827984,78.78176,-0.025942,0.022231,-0.535107,18.897398,-7.468074,-0.59942,-0.800559,0.201139,0.0,-0.048972,1.07331,143.05,142.6,142.9,142.1,142.7,3480.73,44.517012,73.528131,67.41059,0.138578,0.187805,8.288347,19.636617,-9.463848,-0.859028,-0.969259,0.11023,0.0,-0.012879,1.10623,143.475,142.0,142.6,141.8,142.2,3240.625,40.169447,40.434177,32.206476,0.248851,0.267609,6.119312,19.255751,-12.602491,-1.157222,-0.993656,-0.163567,0.0,0.056608,1.139408,143.85,140.4,141.5,140.0,141.0,4577.945,30.982643,14.697904,13.119195,-0.055079,-0.206656,-2.806782,18.034413,-17.533125,-1.228202,-0.762369,-0.465833,0.158853,0.157152,1.135257,144.495833,1,buy
1,2022-12-31 19:00:00,3.642,3.646,3.634,3.635,39534.0,61.23856,82.150627,81.754145,0.012704,0.034124,1.521756,7.329679,4.561903,0.007166,0.003109,0.004057,0.183504,0.802354,0.018756,3.610583,3.621,3.629,3.621,3.627,24050.13,60.274768,74.90512,68.549946,0.095811,0.069631,5.084139,6.594358,2.173371,0.003224,-0.000174,0.003398,1.192079,-0.502097,0.019694,3.603208,APEUSDT,MACD,3.612,3.625,3.612,3.614,60452.58,54.850962,58.041532,54.186657,0.042686,0.009265,1.591678,6.964507,0.356699,-0.000235,-0.002571,0.002335,-0.478815,-0.119365,0.02131,3.598917,3.602,3.611,3.595,3.599,32108.81,48.359576,52.302184,55.192744,-0.015749,-0.019262,-1.217842,7.466732,-2.161252,-0.002496,-0.004051,0.001555,-0.154038,-0.071976,0.02282,3.598875,3.603,3.608,3.596,3.603,37748.78,50.077566,60.009627,60.070096,-0.009777,0.016165,-0.251687,8.060153,-1.570645,-0.004353,-0.005237,0.000883,0.0,-0.012235,0.02328,3.603,3.588,3.603,3.588,3.602,28972.73,49.021493,59.03099,53.78584,0.117733,0.124379,5.322988,8.352061,-4.419928,-0.005679,-0.0054,-0.000279,0.0,0.026766,0.02467,3.605792,3.602,3.606,3.588,3.597,75787.41,46.39006,35.822511,34.578781,0.02731,0.019279,0.906806,7.676696,-5.317724,-0.006527,-0.004733,-0.001793,0.0,0.134734,0.026647,3.6075,3.581,3.61,3.576,3.603,149372.9,46.903072,32.351014,35.466189,-0.121932,-0.126641,-5.850488,6.701126,-5.015058,-0.00555,-0.002612,-0.002938,0.411048,0.509372,0.026065,3.61475,3.605,3.614,3.601,3.612,62000.44,47.759199,52.935221,58.098797,-0.083844,-0.045845,-4.800565,5.817319,-3.999689,-0.002136,-0.000495,-0.001642,-2.239694,-0.150285,0.026074,3.621583,3.617,3.628,3.615,3.626,101310.93,51.768373,69.755702,62.427015,0.154018,0.202917,8.34761,5.300887,-0.547143,0.000688,0.000545,0.000143,-9.633641,0.04074,0.026803,3.624792,3.619,3.633,3.602,3.624,171870.15,51.764028,35.906533,29.04295,0.137195,-0.013727,1.9562,5.902361,-0.776043,-0.001203,0.000589,-0.001792,-0.20726,-0.354912,0.028072,3.620167,3.607,3.607,3.58,3.585,238649.94,36.878287,30.451573,37.917545,-0.171265,-0.156635,-8.22409,6.239999,-3.802912,-0.000854,0.003129,-0.003984,-0.703492,-0.112717,0.026834,3.621667,3.641,3.641,3.631,3.636,53822.21,54.5635,64.843238,...,-7.532495,-0.006837,-0.005779,-0.001059,0.0,0.068447,0.024636,3.590833,3.558,3.573,3.542,3.57,119020.23,42.164262,25.708139,22.079407,0.064137,0.021268,0.898531,9.478541,-6.865082,-0.007996,-0.004046,-0.00395,0.152517,0.3711,0.024421,3.59375,3.583,3.583,3.567,3.569,32435.11,39.114392,20.096904,20.246649,-0.048097,-0.13523,-1.873938,9.010742,-4.38965,-0.004334,-0.000847,-0.003487,0.652016,0.903286,0.025104,3.596833,3.604,3.609,3.595,3.595,37490.05,46.890541,30.459559,39.344159,-0.174958,-0.142076,-7.951033,9.234636,0.894798,0.00083,0.001234,-0.000404,-0.279984,0.015994,0.026462,3.599958,3.613,3.616,3.593,3.608,69396.3,51.49795,64.074475,68.951229,-0.0514,-0.008322,-2.691256,10.225285,1.78313,0.003071,0.000935,0.002136,0.0,-5.560359,0.028952,3.603708,3.613,3.624,3.613,3.615,40688.39,55.12882,70.410166,67.022078,0.062742,0.088553,4.095887,10.793646,5.767514,0.001332,-0.001359,0.002691,0.0,-0.201119,0.028195,3.596542,3.59,3.616,3.585,3.611,57772.4,54.304341,53.51254,45.257524,0.137578,0.064111,4.628305,10.798185,3.561659,-0.002429,-0.002847,0.000418,-0.109472,0.049332,0.02985,3.589333,3.595,3.596,3.583,3.593,37694.31,48.597579,36.10119,41.016834,-0.131357,-0.1377,-6.924108,11.637587,1.570517,-0.003558,-0.002224,-0.001335,0.0,0.265762,0.031094,3.594958,3.594,3.617,3.58,3.581,102351.11,44.751011,64.953441,70.265384,-0.037741,0.020967,-1.304698,12.939478,2.094845,-0.001678,-0.00066,-0.001018,0.984095,0.031353,0.032375,3.607792,3.639,3.647,3.628,3.631,107808.15,56.400766,67.222523,57.008704,0.251303,0.295023,10.925393,14.270378,5.057308,0.000599,-0.001115,0.001715,-0.811025,0.179156,0.033393,3.614792,3.569,3.592,3.562,3.583,64755.56,46.485695,25.507239,19.840366,0.290408,0.169808,4.07331,15.108565,-0.095589,-0.008016,0.000413,-0.008429,0.132794,-0.547993,0.034742,3.617208,3.578,3.593,3.53,3.538,212910.07,35.052997,9.456241,17.282526,-0.339349,-0.234948,-7.803922,16.949594,-2.117886,-0.003226,0.008729,-0.011955,-1.326156,-0.157737,0.03544,3.619125,3.62,3.635,3.612,3.618,108215.29,49.288382,38.895594,44.378397,-0.103953,-0.053375,-4.358438,18.776155,6.972738,0.013083,0.015791,-0.002709,-0.140132,-0.011339,0.03424,3.614208,1,buy
2,2023-01-01 11:00:00,0.1221,0.1221,0.121,0.1214,271447.1,54.286723,75.980296,73.911589,0.087652,0.137396,6.740455,9.209135,3.094464,0.00036,0.000253,0.000108,0.0,0.134075,0.000898,0.120979,0.1216,0.1218,0.1211,0.1215,90040.7,57.095933,49.374574,42.781799,0.221519,0.206529,7.239507,8.972262,3.620076,0.000263,0.000146,0.000117,0.931982,0.199899,0.000906,0.120558,WOOUSDT,MACD,0.12,0.1205,0.12,0.1201,216361.5,47.87406,21.140195,26.87018,-0.211609,-0.203882,-8.206331,8.997829,-2.623875,2.4e-05,9.1e-05,-6.7e-05,-0.441566,-0.095223,0.000871,0.120196,0.1206,0.1208,0.1206,0.1206,26463.1,51.913853,53.816023,62.80922,-0.128442,-0.094689,-8.743764,9.08457,-0.425122,0.000176,0.00012,5.6e-05,-0.102008,0.1978,0.000899,0.120229,0.121,0.1216,0.121,0.1215,306626.5,60.632989,86.305819,85.488107,0.032211,0.091648,3.97119,10.218542,3.057348,0.000263,3.6e-05,0.000227,0.333765,-1.961797,0.00093,0.120217,0.1205,0.1213,0.1205,0.1212,372653.7,60.414265,66.466387,54.104068,0.282606,0.324053,11.300397,10.339625,2.368512,2.4e-05,-0.000146,0.00017,-0.720515,-0.125219,0.000981,0.119896,0.1196,0.1201,0.1195,0.1196,227695.5,48.133088,20.780882,17.913419,-0.007669,-0.171224,-2.24683,11.419198,-1.835926,-0.000244,-0.000226,-1.7e-05,0.0,0.043168,0.001025,0.119675,0.1189,0.1193,0.1188,0.1192,110309.2,43.643928,32.11469,42.058503,-0.200069,-0.165341,-10.530715,11.747866,-5.274942,-0.000219,-0.000186,-3.3e-05,0.263859,0.000154,0.00108,0.119633,0.1205,0.1205,0.1198,0.1199,347358.5,47.909711,73.16792,79.647495,-0.057873,-0.014314,-3.693824,11.706513,-2.602784,-0.000104,-0.000201,9.7e-05,0.0,-0.091506,0.001174,0.119862,0.1201,0.1208,0.1199,0.1208,477984.2,54.038961,85.536637,78.60496,0.08593,0.096088,5.941601,12.552845,-1.889218,-0.000171,-0.000295,0.000124,-0.22494,-0.050766,0.00128,0.120138,0.1196,0.12,0.1192,0.12,390291.6,49.260863,60.708194,51.126768,0.189398,0.159066,7.492739,13.232464,-4.728635,-0.000418,-0.00034,-7.8e-05,0.0,0.103121,0.00136,0.120417,0.1194,0.1201,0.1183,0.1195,1047904.7,45.226061,33.213903,30.625912,0.105883,0.076616,2.738969,13.08532,-5.053718,-0.000478,-0.000203,-0.000274,0.0,10.996967,0.001405,0.120992,0.1186,0.1189,0.1171,0.1175,1308096.8,33.128846,21.435318,...,-8.603067,-0.000339,-0.0004,6.1e-05,0.0,-0.03628,0.000722,0.114004,0.1145,0.1147,0.114,0.114,117727.6,44.22272,77.277433,80.168589,-0.000591,0.037533,1.091886,14.167356,-7.593928,-0.000335,-0.000472,0.000137,0.0,-0.055768,0.000767,0.114229,0.1144,0.1145,0.1138,0.1142,102739.8,46.087858,72.108844,65.816327,0.118252,0.111081,6.424939,13.881085,-8.285648,-0.000509,-0.000583,7.4e-05,0.0,-0.013035,0.000797,0.114517,0.114,0.1141,0.1135,0.1136,92029.3,39.943153,48.998049,43.155267,0.113144,0.102111,4.183138,13.220915,-10.466829,-0.000715,-0.000597,-0.000119,0.0,0.067487,0.00082,0.114837,0.1122,0.1128,0.1119,0.1127,248255.8,26.469615,32.403632,28.589468,0.235906,0.194788,4.642903,10.896227,-18.223914,-0.000723,-0.000432,-0.000291,0.208402,0.146785,0.000819,0.115408,0.1151,0.1152,0.1147,0.1149,82696.4,37.673085,16.831291,14.892416,0.127003,-0.034012,1.108658,7.577075,-5.077425,-0.000394,-0.000262,-0.000132,0.0,0.157999,0.000643,0.11595,0.1149,0.1149,0.1145,0.1145,196557.2,28.477047,15.459254,23.355746,-0.203578,-0.084456,-4.457612,6.577027,-7.19831,-0.000321,-0.00013,-0.000191,0.302987,0.530522,0.000669,0.116238,0.116,0.1162,0.1158,0.1158,78255.9,41.088817,31.197479,31.944444,-0.07239,-0.087991,-3.291317,5.71534,-1.517883,-0.000108,-1.5e-05,-9.3e-05,0.587206,-2.117241,0.000692,0.116483,0.1161,0.117,0.1161,0.1165,131027.6,49.854484,42.016807,45.378151,-0.080563,-0.081428,-4.40843,6.201463,0.757125,-3.8e-05,4.4e-05,-8.2e-05,0.676906,-0.237167,0.000705,0.116663,0.1169,0.1172,0.1167,0.1168,42542.3,52.436901,60.204082,60.79932,0.006886,0.025733,0.63581,6.397121,1.444392,9.3e-05,0.000112,-1.8e-05,0.0,-0.038888,0.000702,0.116737,0.1164,0.1168,0.1164,0.1166,103969.0,50.307737,58.053221,52.410543,0.08094,0.065042,3.633407,6.952066,-0.250889,7.2e-05,0.000135,-6.4e-05,-0.169478,-0.081176,0.000757,0.116654,0.1167,0.1169,0.1165,0.1168,56286.6,52.494609,43.308271,41.760652,0.011515,-0.052184,-0.450514,7.697569,0.167132,0.000128,0.000184,-5.6e-05,0.0,-0.06436,0.000805,0.116683,0.1167,0.1168,0.1159,0.1162,271415.0,46.442756,47.636002,56.594427,-0.092214,-0.026285,-4.031156,8.611952,-1.04641,0.000169,0.000246,-7.7e-05,-0.131526,-0.036094,0.000844,0.116646,1,buy
3,2023-01-02 15:00:00,246.8,247.6,246.8,247.1,5653.787,64.061825,83.941481,81.322513,0.039338,0.067904,3.64379,11.312941,5.384608,0.417035,0.029113,0.387922,0.521933,-0.860837,1.047558,244.883333,246.4,247.5,246.3,247.0,12176.814,64.744893,67.546171,57.176767,0.185416,0.146906,8.400007,11.361695,4.065029,-0.007256,-0.262463,0.255208,-0.507226,-0.069419,1.039996,244.470833,BNBUSDT,MACD,243.9,243.9,243.0,243.4,4734.857,42.611713,36.233211,37.129683,-0.06331,-0.097225,-2.921614,11.318861,-9.040849,-0.442103,-0.313105,-0.128998,0.0,0.125288,0.947196,244.229167,244.5,244.6,243.5,243.6,6272.074,36.645058,47.200108,58.552149,-0.141031,-0.074808,-7.735245,8.933053,-5.453936,-0.20171,-0.19361,-0.0081,0.243535,-0.045187,0.804283,244.579167,245.3,245.9,245.2,245.3,3273.398,53.604652,74.357,72.315698,0.025596,0.057045,2.557502,8.991664,1.242247,-0.134242,-0.245593,0.111351,-0.189956,-0.060583,0.788318,244.8875,244.6,245.0,244.4,244.9,4306.663,49.782758,61.095981,52.259381,0.190752,0.200766,8.20219,10.046965,-2.497009,-0.329224,-0.304645,-0.024579,0.0,0.055537,0.78163,245.145833,244.4,244.7,244.2,244.3,3644.056,40.999017,29.432106,24.31021,0.277677,0.299249,5.177232,10.042224,-4.129958,-0.398628,-0.230789,-0.167839,0.0,0.289617,0.819678,245.554167,244.1,244.1,243.4,243.5,5504.355,30.066464,8.743831,9.659713,-0.075464,-0.11529,-0.71403,9.743101,-6.954567,-0.397095,-0.051314,-0.345781,0.36052,-1.701212,0.857653,245.779167,245.3,245.3,243.7,243.8,9096.266,30.271073,11.841396,19.404622,-0.30399,-0.220521,-7.684406,9.000986,-5.222482,-0.058858,0.212075,-0.270933,-0.800122,-0.136115,0.901106,246.025,246.3,246.7,245.5,245.7,4233.967,45.681201,41.017498,47.039907,-0.120539,-0.082962,-5.820132,9.66342,1.349768,0.288312,0.349941,-0.061628,-0.130258,-0.004943,0.912133,246.108333,247.0,247.2,246.7,246.9,2442.541,60.879395,61.001344,62.19182,-0.006526,-0.012323,-0.734127,9.658929,6.388101,0.431795,0.338453,0.093341,0.0,0.069454,0.889924,245.954167,247.0,247.3,246.8,246.9,4362.137,61.819577,66.060868,64.586214,0.024336,0.059524,2.344363,8.963609,6.811361,0.40854,0.249808,0.158732,0.148302,0.159286,0.944117,245.658333,245.6,246.0,245.0,246.0,11488.523,56.989073,53.392857,...,3.323821,0.053668,-0.017826,0.071494,-1.533697,-0.361763,0.966699,243.170833,243.4,243.4,242.5,242.6,5683.681,43.954466,29.102025,32.557222,-0.121141,-0.099779,-4.441752,16.740978,2.802367,-0.034426,-0.08236,0.047934,-1.921386,-0.160085,0.966987,243.070833,243.4,243.7,242.9,243.6,6105.981,53.045147,45.27008,46.07594,-0.017303,0.001841,-0.605569,17.564608,3.693103,-0.027839,-0.164012,0.136173,-0.375021,-0.12842,0.970337,243.075,244.3,246.1,242.9,243.6,14458.676,53.749148,46.194153,41.855991,0.130259,0.097684,4.322623,18.381043,5.27264,-0.148858,-0.27266,0.123802,-0.22268,-0.046255,0.998177,243.2125,242.7,242.9,242.4,242.6,1985.14,43.319609,32.789116,30.004618,0.043634,0.012632,0.73275,19.224283,-6.316297,-0.333478,-0.311383,-0.022096,0.0,0.022609,0.873001,243.175,242.4,242.8,242.2,242.7,5017.853,43.05006,30.858201,28.239475,0.055037,-0.019006,0.680578,19.071398,-8.592513,-0.309738,-0.2829,-0.026838,0.0,0.010279,0.90967,243.154167,243.2,243.5,243.0,243.1,4023.226,45.058373,29.483198,34.670643,-0.150956,-0.163158,-6.977924,18.799443,-5.898483,-0.259675,-0.27515,0.015475,0.0,-0.010811,0.925118,243.329167,243.2,243.3,242.5,242.9,4278.233,42.323957,59.643838,67.082788,-0.060184,-0.007697,-2.759208,19.090621,-8.653893,-0.246975,-0.284489,0.037514,0.0,-0.045963,0.968295,243.608333,244.5,244.8,243.9,244.1,3992.282,51.464965,69.183673,62.619048,0.151914,0.183777,8.509506,19.264414,-4.486581,-0.204231,-0.353545,0.149313,-0.220997,-0.060026,1.022804,243.895833,243.2,243.3,242.3,242.7,5738.978,40.943674,35.796703,33.623321,0.020893,-0.033511,-0.264122,20.265305,-10.000175,-0.528601,-0.424481,-0.10412,0.0,0.069766,1.00173,244.029167,242.7,242.7,241.4,241.8,3642.54,28.071573,38.347793,39.422274,-0.014251,0.000728,-0.35816,19.517583,-14.131646,-0.573623,-0.306866,-0.266757,0.18085,0.233747,0.963505,244.304167,243.2,243.6,242.4,243.5,4906.432,37.609091,39.593214,38.302441,0.051821,-0.004743,1.097889,17.522181,-12.263613,-0.251863,-0.126372,-0.12549,0.394049,0.187933,0.936972,244.675,244.8,245.0,244.5,244.7,2685.726,46.752455,39.595838,43.073896,-0.091754,-0.084805,-4.377306,15.933739,-4.808189,-0.054825,-0.07114,0.016315,0.394717,-0.071718,0.754032,244.741667,1,buy
4,2023-01-02 15:00:00,0.1077,0.1081,0.1076,0.108,325371.0,68.903445,84.71439,78.244813,0.086191,0.110524,6.523183,14.551155,5.765866,0.000291,-3.6e-05,0.000327,20.305795,-0.425514,0.000634,0.106063,0.1063,0.1079,0.1063,0.1073,834695.0,64.064269,57.103785,48.494709,0.144214,0.073001,5.338828,15.047255,4.809751,-0.000116,-0.000264,0.000148,-0.281209,-0.038885,0.000636,0.105704,CHRUSDT,MACD,0.1054,0.1054,0.1049,0.105,95466.0,39.698497,37.741758,41.42674,-0.083011,-0.10195,-4.09768,15.125053,-10.623273,-0.000355,-0.000288,-6.8e-05,0.0,0.060488,0.000536,0.105721,0.1062,0.1062,0.1049,0.1051,529683.0,33.862045,56.730769,64.771062,-0.070537,0.007257,-3.034188,12.956182,-12.136373,-0.000228,-0.000229,1e-06,0.0,-0.026047,0.000484,0.106021,0.106,0.1064,0.1058,0.1061,148765.0,46.753117,64.505495,56.614338,0.14922,0.190951,7.265611,12.045384,-7.264854,-0.000224,-0.000258,3.4e-05,0.0,-0.015362,0.000463,0.1062,0.1059,0.1062,0.1058,0.1061,185934.0,46.350302,34.2118,25.594584,0.343565,0.248322,5.671987,11.817325,-6.654115,-0.000328,-0.000266,-6.2e-05,0.0,0.093479,0.00046,0.106433,0.1056,0.1057,0.1055,0.1056,124767.0,32.217549,13.122711,11.413309,0.150256,0.076631,1.359672,10.339724,-9.025904,-0.000335,-0.00017,-0.000164,0.100989,0.386547,0.000476,0.106771,0.106,0.1061,0.1058,0.1059,112256.0,34.239132,8.398744,10.34188,-0.193888,-0.106138,-2.096634,8.597022,-7.305207,-0.0002,-2.8e-05,-0.000172,0.374032,-2.120267,0.000505,0.107088,0.1067,0.1068,0.1064,0.1064,153759.0,39.737793,13.877551,14.973545,-0.149066,-0.147538,-3.144369,7.849081,-4.054463,-5e-05,9.9e-05,-0.000149,1.190258,-0.186626,0.000539,0.107329,0.1069,0.1071,0.1065,0.1067,144841.0,43.074018,23.809524,26.031746,-0.1282,-0.140688,-4.761905,7.743216,-3.72407,8.1e-05,0.000213,-0.000132,-0.281274,-0.087907,0.000578,0.107371,0.1076,0.1078,0.1076,0.1076,194822.0,55.651327,43.809524,42.526455,0.024783,-0.022519,0.505451,7.888942,1.467945,0.000265,0.000292,-2.7e-05,0.0,-0.015477,0.000597,0.107342,0.1079,0.108,0.1077,0.1077,139094.0,58.116681,44.736652,51.197903,-0.095898,-0.074358,-4.849051,8.760454,1.470709,0.000333,0.000307,2.7e-05,0.0,0.024454,0.000643,0.10715,0.1072,0.1075,0.1072,0.1075,243137.0,56.644522,63.336092,...,16.368288,0.000608,0.000452,0.000157,0.0,0.089261,0.000554,0.107304,0.1083,0.1083,0.1078,0.108,353127.0,65.208585,90.186071,87.98016,0.023142,0.029271,2.20459,13.307237,11.52819,0.000466,0.000315,0.000151,0.102926,0.128046,0.000482,0.106817,0.1076,0.1077,0.1073,0.1075,121903.0,64.996359,81.07797,76.71289,0.05649,0.036667,3.652683,11.297682,9.90748,0.000313,0.000194,0.000119,0.157724,0.155418,0.00047,0.106438,0.1073,0.1075,0.1067,0.107,345166.0,60.38182,68.501439,68.347531,9.7e-05,-0.014031,-0.474955,10.193827,6.571251,0.000184,0.00011,7.3e-05,0.310205,0.115715,0.000487,0.106358,0.1065,0.1066,0.1062,0.1063,118864.0,52.452134,73.59944,70.742297,0.05008,0.071293,3.678168,10.572879,-0.040008,9.4e-05,7.9e-05,1.6e-05,0.0,0.049773,0.000464,0.1063,0.1065,0.1067,0.1064,0.1065,250501.0,57.084718,58.710211,49.517443,0.297536,0.312982,10.370512,11.832613,-0.169168,7.7e-05,6.5e-05,1.2e-05,0.772237,-0.023838,0.000485,0.106179,0.1061,0.1062,0.106,0.106,88019.0,49.664349,20.061115,19.74649,-0.079222,-0.185298,-3.279685,12.827491,-0.526291,1.8e-05,8.1e-05,-6.3e-05,-0.183289,-0.132072,0.000474,0.106029,0.1058,0.1059,0.1053,0.1053,468667.0,39.409099,35.243397,48.366794,-0.204839,-0.142409,-11.05934,14.266838,-3.582748,7.2e-05,0.000142,-7e-05,-0.314741,-0.020927,0.00048,0.105979,0.1069,0.1072,0.1066,0.1067,157713.0,58.581323,77.631066,81.072283,-0.032768,0.00283,-1.727412,15.696979,5.738545,0.000254,0.000134,0.00012,0.0,0.367304,0.000476,0.106021,0.1063,0.1066,0.1062,0.1066,244820.0,62.039405,81.670084,75.492877,0.075403,0.086873,5.245511,16.178081,0.850579,0.00012,2.5e-05,9.6e-05,0.569781,0.393245,0.000449,0.105846,0.1059,0.1065,0.1058,0.1061,775271.0,57.902637,58.617131,51.098334,0.212297,0.221455,8.357495,18.294732,-1.34262,-3e-05,-3.5e-05,5e-06,-0.269113,0.313528,0.000445,0.105737,0.1053,0.1054,0.1051,0.1054,181578.0,43.406877,28.011204,26.92577,-0.033988,-0.091012,-2.476917,18.948797,-12.779389,-6.9e-05,-2e-06,-6.6e-05,-19.816006,-0.687628,0.00041,0.1058,0.1054,0.1057,0.1054,0.1057,231757.0,48.149971,35.912698,40.145503,-0.123218,-0.097596,-6.175989,18.378101,-7.4336,7e-06,4.2e-05,-3.5e-05,-0.52019,-0.095311,0.000385,0.105829,1,buy


(5138, 985)

# Train_test_split by ticker group

In [9]:
from sklearn.model_selection import GroupShuffleSplit 

splitter = GroupShuffleSplit(test_size=0.1, n_splits=2, random_state = 7)

split = splitter.split(df, groups=df['ticker'])
train_inds, test_inds = next(split)

train_df = df.iloc[train_inds]
test_df = df.iloc[test_inds]

# Pytorch

### Create dataset

In [10]:
test_size=0.2

x_data = train_df.drop(['target', 'time', 'ticker', 'pattern', 'ttype'], axis=1)
y_data = train_df['target']
x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=test_size, shuffle=True)
input_channels = x_data.shape[1]
output_size = len(set(y_data))

scaler = StandardScaler()
x_train[x_train.columns] = scaler.fit_transform(x_train)
x_valid[x_valid.columns] = scaler.transform(x_valid)

x_train = torch.tensor(x_train.values, dtype=torch.float32)
x_valid = torch.tensor(x_valid.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
y_valid = torch.tensor(y_valid.values, dtype=torch.float32)

display(type(x_train), type(y_train))

torch.Tensor

torch.Tensor

### Find available device

In [11]:
# find available device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Build model

In [12]:
class SigModel(torch.nn.Module):
    def __init__(self, input_channels):
        super(SigModel, self, ).__init__()
        self.layers = torch.nn.Sequential()
        self.layers.add_module('lin1', torch.nn.Linear(input_channels, 64))
        self.layers.add_module('relu1', torch.nn.ReLU())
        self.layers.add_module('do1', torch.nn.Dropout(p=0.25))
        self.layers.add_module('lin2', torch.nn.Linear(64, 128))
        self.layers.add_module('relu2', torch.nn.ReLU())
        self.layers.add_module('do2', torch.nn.Dropout(p=0.25))
        self.layers.add_module('lin3', torch.nn.Linear(128, 96))
        self.layers.add_module('relu3', torch.nn.ReLU())
        self.layers.add_module('do3', torch.nn.Dropout(p=0.25))
        self.layers.add_module('lin4', torch.nn.Linear(96, 32))
        self.layers.add_module('relu4', torch.nn.ReLU())
        self.layers.add_module('do4', torch.nn.Dropout(p=0.25))
        self.layers.add_module('lin5', torch.nn.Linear(32, 1))
        self.layers.add_module('sigmoid', torch.nn.Sigmoid())
    
    def forward(self, input):
        return self.layers(input)

# Train model

In [13]:
from torch import nn
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR

# train function
def train_epoch(model, train_loader, criterion, optimizer, scheduler):
    # put the model in train mode
    model.train()
    
    x_train, x_valid, y_train, y_valid = train_loader

    # get output of the model
    train_preds = model(x_train).squeeze()
    # calculate train loss
    train_loss = criterion(train_preds, y_train)
    train_acc = (train_preds.round() == y_train).float().mean()
    
    # set gradient to zero to prevent it accumulation
    optimizer.zero_grad() # ~ model.zero_grad()
    # calculate gradient
    train_loss.backward() 
    # update weights
    optimizer.step()
    
    # put the model in evaluation mode
    model.eval()

    with torch.no_grad():
        val_preds = model(x_valid).squeeze()
        val_loss = criterion(val_preds, y_valid)
        val_acc = (val_preds.round() == y_valid).float().mean()
    
    # update weights according to gradient value
    scheduler.step(val_loss)
    
    return train_loss, train_acc, val_loss, val_acc

# Initialize model
if CFG.train_NN:
    model = SigModel(input_channels).to(device)

    # Number of epochs
    epochs = 100000

    # Send data to the device
    x_train, x_valid = x_train.to(device), x_valid.to(device)
    y_train, y_valid = y_train.to(device), y_valid.to(device)
    train_loader = x_train, x_valid, y_train, y_valid

    # Empty loss lists to track values
    epoch_count, train_loss_values, valid_loss_values = [], [], []

    criterion = nn.BCELoss()
    learning_rate = 1e-6
    optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2000, threshold=1e-2)

    # Loop through the data
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc, val_loss, val_acc = train_epoch(model, train_loader, criterion, optimizer, scheduler)

        # Print progress a total of 20 times
        if epoch % int(epochs / 20) == 0:
            print(f'Epoch: {epoch:4.0f} | Train Loss: {train_loss:.5f}, Train Acc: {train_acc:.5f}\
                Validation Loss: {val_loss:.5f}, Val Acc: {val_acc:.5f}\
                    LR: {optimizer.state_dict()["param_groups"][0]["lr"]}')

            epoch_count.append(epoch)
            train_loss_values.append(train_loss.cpu().detach().numpy())
            valid_loss_values.append(val_loss.cpu().detach().numpy())


# Plot NN train results

In [14]:
import matplotlib.pyplot as plt

if CFG.train_NN:
    plt.plot(epoch_count, train_loss_values, label='Training Loss')
    plt.plot(epoch_count, valid_loss_values, label='Validation Loss')
    plt.title('Training & Validation Loss Curves')
    plt.ylabel('Loss')
    plt.xlabel('Epochs')
    plt.legend()
    plt.show()

# Select features

In [15]:
palette = ['#302c36', '#037d97', '#E4591E', '#C09741',
           '#EC5B6D', '#90A6B1', '#6ca957', '#D8E3E2']

blk = Style.BRIGHT + Fore.BLACK
red = Style.BRIGHT + Fore.RED
blu = Style.BRIGHT + Fore.BLUE
res = Style.RESET_ALL

def lgbm_tuning(df, permut=False, boruta=False):
    features = [c for c in df.columns if c not in ['time', 'target', 'ticker', 'pattern', 'ttype']]
    groups = df['ticker']

    outer_cv_score = [] # store all cv scores of outer loop inference

    perm_df_ = pd.DataFrame()
    feature_importances_ = pd.DataFrame()
    boruta_df_ = pd.DataFrame()
    
    for i in range(CFG.n_repeats):
        print(f'Repeat {blu}#{i+1}')
        
        if task_type == 'cls':
            y_fold = df['target']
            kf = StratifiedGroupKFold(n_splits=CFG.n_folds, shuffle=True, random_state=180820231)
            eval_metric = 'logloss'
        else:
            y_fold = (df['target'] - df['close']) / df['close']
            kf = GroupKFold(n_splits=CFG.n_folds)
            eval_metric = 'mse'

        X, y = df[features], y_fold
        oof = np.zeros(len(df))
        models_ = [] # Used to store models trained in the inner loop.
        
        # Stratify based on Class and Alpha (3 types of conditions)
        for fold, (train_idx, val_idx) in enumerate(kf.split(X, y, groups)):
            # Split the dataset according to the fold indexes.
            X_train = X.iloc[train_idx]
            X_val = X.iloc[val_idx]
            y_train = y.iloc[train_idx]
            y_val = y.iloc[val_idx]

            if task_type == 'cls':
                clf = lgb.LGBMClassifier(**params)
            else:
                clf = lgb.LGBMRegressor(**params)
            clf.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], 
                    eval_metric=eval_metric, 
                    callbacks=[lgb.log_evaluation(100)])

            models_.append(clf)

            if task_type == 'cls':
                val_preds = clf.predict_proba(X_val)[:,1]
                val_score = log_loss(y_val, val_preds)
            else:
                val_preds = clf.predict(X_val)
                val_score = mean_squared_error(y_val, val_preds, squared=False)
            
            oof[val_idx] = val_preds
            best_iter = clf.best_iteration_

            print(f'Fold: {blu}{fold + 1:>3}{res}| loss: {blu}{val_score:.5f}{res}| Best iteration: {blu}{best_iter:>4}{res}')

            # permutation importance
            if permut:
                perm = PermutationImportance(clf, scoring=None, n_iter=1, 
                                             random_state=42, cv=None, refit=False).fit(X_val, y_val)

                perm_importance_df = pd.DataFrame({'importance': perm.feature_importances_}, 
                                                    index=X_val.columns).sort_index()

                if perm_df_.shape[0] == 0:
                    perm_df_ = perm_importance_df.copy()
                else:
                    perm_df_ += perm_importance_df

            # gboost feature importance
            f_i = pd.DataFrame(sorted(zip(clf.feature_importances_, X.columns), 
                                      reverse=True, key=lambda x: x[1]), 
                                columns=['Value','Feature'])

            if feature_importances_.shape[0] == 0:
                feature_importances_ = f_i.copy()
            else:
                feature_importances_['Value'] += f_i['Value']
                    
            # BORUTA importance
            if boruta:
                model = BoostBoruta(clf, importance_type='shap_importances', train_importance=False)
                try:
                    model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], 
                            eval_metric=eval_metric, 
                            callbacks=[lgb.log_evaluation(100)])
                except RuntimeError:
                    continue
                
                boruta_importance_df = pd.DataFrame({'importance': model.ranking_}, 
                                                        index=X_train.columns).sort_index()
                if boruta_df_.shape[0] == 0:
                    boruta_df_ = boruta_importance_df.copy()
                else:
                    boruta_df_ += boruta_importance_df

        if task_type == 'cls':
            outer_cv = log_loss(y, oof)
        else:
            outer_cv = mean_squared_error(y, oof, squared=False)
        
        outer_cv_score.append(outer_cv)

    print(f'{red} Outer Holdout avg score: {res} log_loss: {red}{np.mean(outer_cv_score):.5f}{res}')
    print(f'{"*" * 50}\n')
    
    if permut:
        perm_df_ = perm_df_.sort_values('importance', ascending=False)
        perm_df_ = perm_df_.reset_index().rename({'index': 'Feature'}, axis=1)
        
    if boruta and boruta_df_.shape[0] > 0:
        boruta_df_ = boruta_df_.sort_values('importance')
        boruta_df_ = boruta_df_.reset_index().rename({'index': 'Feature'}, axis=1)
                                    
    feature_importances_ = feature_importances_.sort_values('Value', ascending=False).reset_index(drop=True)
    
    return perm_df_, feature_importances_, boruta_df_, np.mean(outer_cv_score)


params = {
          'n_estimators': 1000,
          'learning_rate': 0.02,
        #   'early_stopping_round': 100,
          'max_depth': 10,
          'subsample' : 0.7,
          'colsample_bytree': 0.85,
          'num_leaves': 24,
          'verbosity': -1,
          'importance_type': 'gain',
          'max_bin': 255,
          'reg_alpha': 1e-6,
          'reg_lambda': 1e-8
        }

task_type = 'cls'

if task_type == 'cls':
    params['boosting_type'] = 'dart'
    params['objective'] = 'binary'
else:
    params['boosting_type'] = 'gbdt'
    params['objective'] = 'regression'

if CFG.select_features:
    perm_df_, feature_importances_, boruta_df_, outer_cv_score = lgbm_tuning(df, permut=True, boruta=True)

# Combine importances and save them

In [16]:
if CFG.select_features:
    perm_df_['rank'] = perm_df_['importance'].rank(ascending=False)
    boruta_df_['rank'] = boruta_df_['importance'].rank()
    feature_importances_['rank'] = feature_importances_['Value'].rank(ascending=False)

    fi = pd.concat([perm_df_[['Feature','rank']], feature_importances_[['Feature','rank']], boruta_df_[['Feature','rank']]])
    fi = fi.groupby('Feature')['rank'].sum().sort_values().reset_index()
    fi.to_csv(f'feature_importance_{CFG.ttype}.csv')
else:
    fi = pd.read_csv(f'feature_importance_{CFG.ttype}.csv')

# Train_test split based on ticker groups

In [21]:
def model_train(df, train_df, test_df, features, task_type, how, n_folds, low_bound, high_bound, train_test): 
    X, groups = train_df[features], train_df['ticker']
    # X = pd.concat([X, pd.get_dummies(train_df[['ttype']], drop_first=True)], axis=1)
    y = train_df['target']
    
    if train_test == 'fold':
        oof = np.zeros([train_df['target'].shape[0], 1])
        
        kf = StratifiedGroupKFold(n_splits=n_folds, shuffle=True, random_state=24082023)

        oe_enc = OrdinalEncoder()
        groups = oe_enc.fit_transform(groups.values.reshape(-1, 1))

        print(f"Training with {len(features)} features")
        
        for fold, (fit_idx, val_idx) in enumerate(kf.split(X, y, groups)):
            print(f'Fold #{fold + 1}')
            # Split the dataset according to the fold indexes.
            X_train = X.iloc[fit_idx]
            X_val = X.iloc[val_idx]
            y_train = y.iloc[fit_idx]
            y_val = y.iloc[val_idx]
            
            models = list()
            if how == 'lgbm':
                model = lgb.LGBMClassifier(**params)
                model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], 
                          eval_metric='logloss', callbacks = [lgb.log_evaluation(100)])
            elif how == 'lreg':
                model = LogisticRegression(C=0.1, max_iter=100000)#, class_weight='balanced')
                model.fit(X_train, y_train)

            val_preds = model.predict_proba(X_val)
            val_score = log_loss(y_val, val_preds)
            prec_score, prec_obj_pct = confident_score(y_val, val_preds[:,1], low_bound, high_bound)
            print(f'Logloss: {val_score}, Confident objects precision: {prec_score}, % of confident objects: {prec_obj_pct}')
            oof[val_idx, 0] = val_preds[:,1]

        return oof, model
    elif train_test == 'full':
        print('Train on full data')
        X, y = df[features], df['target']
        # X = pd.concat([X, pd.get_dummies(df[['ttype']], drop_first=True)], axis=1)
        model = lgb.LGBMClassifier(**params)
        model.fit(X, y, eval_set=[(X, y)], eval_metric='logloss', callbacks = [lgb.log_evaluation(100)])
        return np.zeros([df.shape[0], 1]), model
    else:
        # fit model on full dataset and predict on test
        print("Test fold")
        X_test, y_test = test_df[features], test_df['target']
        # X_test = pd.concat([X_test, pd.get_dummies(test_df[['ttype']], drop_first=True)], axis=1)
        model = lgb.LGBMClassifier(**params)
        model.fit(X, y, eval_set=[(X, y), (X_test, y_test)], eval_metric='logloss', callbacks = [lgb.log_evaluation(100)])
        oof_test = np.zeros([test_df['target'].shape[0], 1])
        oof_test[:,0] = model.predict_proba(X_test)[:,1]
        return oof_test, model

def prepare_features(fi, feature_num):
    ''' Get features, sort them by their time appearance and return for using in train and inference'''
    fi = fi['Feature'].head(feature_num)
    feature_dict = defaultdict(list)
    features = list()

    for f in fi:
        period = f.split('_')
        if period[-1].isdigit():
            feature_dict[int(period[-1])].append('_'.join(period[:-2]))
        else:
            feature_dict[0].append(f)

    feature_dict = dict(sorted(feature_dict.items()))
    
    for item in feature_dict.items():
        if item[0] > 0:
            features.extend([i + f'_prev_{item[0]}' for i in item[1]])
        else:
            features.extend([i for i in item[1]])

    feature_dict['features'] = features # + ['Pattern_Trend', 'STOCH_RSI']

    return features, feature_dict

def confident_score(y, oof, low_bound, high_bound):
    ''' Consider only high confident objects for accuracy and precision scores calculation;
        object probability must be lower than low_bound or higher than high_bound '''
    pred_conf = np.zeros_like(oof)
    pred_conf[oof > high_bound] = 1
    pred_conf[oof < low_bound] = 0
    # pred_conf_acc = pred_conf[(oof < low_bound) | (oof > high_bound)]
    pred_conf_prec = pred_conf[(oof > high_bound)]
    # y_conf_acc = y.values.reshape(-1,1)[(oof < low_bound) | (oof > high_bound)]
    y_conf_prec = y.values.reshape(-1,1)[(oof > high_bound)]

    return precision_score(y_conf_prec, pred_conf_prec), y_conf_prec.shape[0]/y.shape[0]

train_test = 'full' # fold, test, full, inference
feature_num = 160

low_bound, high_bound = 0.34, 0.66
params = {
    'boosting_type': 'dart',
    'n_estimators': 1500,
    'learning_rate': 0.02,
    # 'early_stopping_round': 50,
    'max_depth': 10,
    'colsample_bytree': 0.75,
    'subsample': 0.85,
    'subsample_freq': 1,
    'num_leaves': 25,
    'verbosity': -1,
    'max_bin': 255,
    'reg_alpha': 1e-6,
    'reg_lambda': 1e-8,
    'objective': 'binary',
    # 'is_unbalance': True,
    # 'class_weight': 'balanced',
    'metric': 'average_precision'
    }

if CFG.train_LGBM:
    fi = pd.read_csv(f'feature_importance_{CFG.ttype}.csv')
    features, feature_dict = prepare_features(fi, feature_num)
    if train_test != 'inference':
        oof, model = model_train(df, train_df, test_df, features, task_type=task_type, how='lgbm', n_folds=5, low_bound=low_bound, high_bound=high_bound, train_test=train_test)

    if train_test == 'fold':
        y = train_df['target']
        oof_val_score = log_loss(y, oof)
        oof_conf_prec_score, oof_conf_obj_pct = confident_score(y, oof, low_bound, high_bound)
        print(f'Total fold Logloss: {oof_val_score}, Total confident objects precision: {oof_conf_prec_score}, Total % of confident objects: {oof_conf_obj_pct}')
    elif train_test == 'test':
        y_test = test_df['target']
        test_val_score = log_loss(y_test, oof)
        test_conf_prec_score, test_conf_obj_pct = confident_score(y_test, oof, low_bound, high_bound)
        print(f'Total test Logloss: {test_val_score}, Total test confident objects precision: {test_conf_prec_score}, Total % of test confident objects: {test_conf_obj_pct}')
        # save feature dictionary for further inference
        joblib.dump(model, f'lgbm.pkl')
    elif train_test == 'full':
        joblib.dump(model, f'lgbm.pkl')
        # save feature dictionary for further inference
        with open(f'features_{CFG.ttype}.json', 'w') as f:
            json.dump(feature_dict, f)
    elif train_test == 'inference':
        model = joblib.load(f'lgbm.pkl')
        X_test, y_test = test_df[features], test_df['target']
        # X_test = pd.concat([X_test, pd.get_dummies(test_df[['ttype']], drop_first=True)], axis=1)
        oof = np.nan_to_num(model.predict_proba(X_test)[:,1])
        test_val_score = log_loss(y_test, oof)
        test_conf_prec_score, test_conf_obj_pct = confident_score(y_test, oof, low_bound, high_bound)
        print(f'Loaded model test Logloss: {test_val_score}, Loaded model test confident objects precision: {test_conf_prec_score}, Total % of test confident objects: {test_conf_obj_pct}')
    

Train on full data
[100]	training's binary_logloss: 0.60329	training's average_precision: 0.916053
[200]	training's binary_logloss: 0.571638	training's average_precision: 0.940393
[300]	training's binary_logloss: 0.536997	training's average_precision: 0.96016
[400]	training's binary_logloss: 0.5061	training's average_precision: 0.972267
[500]	training's binary_logloss: 0.475203	training's average_precision: 0.983647
[600]	training's binary_logloss: 0.462208	training's average_precision: 0.988328
[700]	training's binary_logloss: 0.439926	training's average_precision: 0.992147
[800]	training's binary_logloss: 0.42499	training's average_precision: 0.994568
[900]	training's binary_logloss: 0.404499	training's average_precision: 0.996757
[1000]	training's binary_logloss: 0.390277	training's average_precision: 0.997851
[1100]	training's binary_logloss: 0.373714	training's average_precision: 0.998671
[1200]	training's binary_logloss: 0.36366	training's average_precision: 0.999049
[1300]	train

Total fold Logloss: 0.6171721885231537, Total confident objects precision: 0.7375923970432946, Total % of confident objects: 0.4104008667388949

Total test Logloss: 0.5738583154065954, Total test confident objects precision: 0.7880184331797235, Total % of test confident objects: 0.4149139579349904

# Count predictions according to pattern

In [18]:
train_df['target'].value_counts()

1    2750
0    1865
Name: target, dtype: int64

In [19]:
if train_test == 'fold':
    train_df.loc[:,'oof'] = oof >= high_bound
    display(train_df.groupby('pattern')['oof'].agg(['mean', 'count']))