In [5]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import os
import sys
import gc
from enum import IntEnum
sys.path.append("./utils")

from generate_features import *
from get_targets import *
from dnn_utils import *

from stockstats import StockDataFrame

from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import fbeta_score, recall_score
from sklearn.model_selection import TimeSeriesSplit

import optuna

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
TRAIN_PATH = "data/prices/apple_1m_prices_train.csv"
SAMPLE_PATH = "data/prices/apple_1m_cb_dnn_sample.csv"
MODEL_PATH = "models/apple_1m_cb_buy_signal.cb"

In [3]:
TEST_RATIO = 0.2
EWM_PERIODS = 10
EWM_ALPHA = 0.3

BETA = 0.3

TIME_LAG = 9
THRESHOLD = 0.3

In [4]:
AFS_RESULT_BTC_PATH = 'data_crypto_2021/afs_all_btc_embeddings.csv'
train_df = pd.read_csv(AFS_RESULT_BTC_PATH, index_col=0)
train_df.drop(columns=[str(i) for i in range(100)], inplace=True)
train_df

Unnamed: 0,open,high,low,close,volume
0,29484.39,29535.95,29484.39,29518.58,420109.235827
1,29459.58,29459.58,29429.67,29439.16,356581.967102
2,29446.25,29450.00,29439.07,29441.25,51637.618692
3,29353.67,29396.07,29338.14,29370.50,170835.156823
4,29398.82,29418.94,29375.71,29377.43,191779.310775
...,...,...,...,...,...
106043,46468.80,46468.80,46420.09,46420.09,135536.969821
106044,46389.97,46420.87,46385.63,46420.87,11566.314954
106045,46434.34,46434.34,46421.01,46428.48,7935.098945
106046,46368.00,46368.00,46354.76,46356.31,96443.304904


In [15]:
def cv_objective(trial):
    NUM_FOLDS = 5
    time_series_cv = TimeSeriesSplit(n_splits=NUM_FOLDS)
    
    time_lag = trial.suggest_int("time_lag", 25, 50)
    window = trial.suggest_int("window", 17, 30)
    factor = trial.suggest_float("factor", 1., 10.)
    ewm_period = trial.suggest_int("ewm_periods", 10, 30)
    ewm_alpha = trial.suggest_float("ewm_alpha", 0.1, 0.9)
    
    trial_train_df = indicators_with_rowwise_ewm(train_df, ewm_alpha, ewm_period)
    targets = get_buy_signal_with_std_threshold_target(trial_train_df, time_lag, window, factor)
    trial_train_df = pd.concat([trial_train_df, targets], axis=1).dropna().reset_index(drop=True)
    
    X = trial_train_df.drop(["target"], axis=1).to_numpy()
    y = trial_train_df["target"].to_numpy()

    scores = []
    for train_index, test_index in time_series_cv.split(X):
        X_train = X[train_index]
        y_train = y[train_index]
        
        X_test = X[test_index]
        y_test = y[test_index]
        
        if len(np.unique(y_train)) < 2 or len(np.unique(y_train)) < 2:
            scores.append(0)
            continue

        # class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(y_train), y=y_train)
        
        lc = LGBMClassifier(class_weight="balanced")
        lc.fit(X_train, y_train)
        
#         predictions = lc.predict(X_test)
        probas = lc.predict_proba(X_test)
        score = roc_auc_score(y_test, probas[:, 1])
        
        print("Current score:", score)
        scores.append(score)
        
        del X_train, y_train, X_test, y_test
        gc.collect()
        
    del trial_train_df, targets, X, y
    gc.collect()
    return np.mean(scores)

In [16]:
study = optuna.create_study(direction="maximize")
study.optimize(cv_objective, timeout=360)

[32m[I 2022-04-06 19:20:35,125][0m A new study created in memory with name: no-name-6490f24a-2451-4ddf-ac6a-b1fcc187c884[0m


Current score: 0.752304478391435
Current score: 0.7754528813470517
Current score: 0.6391332281268778
Current score: 0.6455111320399052
Current score: 0.7290956267520242


[32m[I 2022-04-06 19:20:48,514][0m Trial 0 finished with value: 0.7082994693314588 and parameters: {'time_lag': 46, 'window': 23, 'factor': 9.614543300725265, 'ewm_periods': 19, 'ewm_alpha': 0.3449321313195505}. Best is trial 0 with value: 0.7082994693314588.[0m


Current score: 0.5678555739910527
Current score: 0.5522664034363287
Current score: 0.5583874938221206
Current score: 0.5561560353571856
Current score: 0.5554094635208549


[32m[I 2022-04-06 19:21:00,792][0m Trial 1 finished with value: 0.5580149940255085 and parameters: {'time_lag': 49, 'window': 28, 'factor': 1.9512043885354684, 'ewm_periods': 12, 'ewm_alpha': 0.38165063003514765}. Best is trial 0 with value: 0.7082994693314588.[0m


Current score: 0.7277854154070575
Current score: 0.7540424179090348
Current score: 0.6698380489432008
Current score: 0.648203991461188
Current score: 0.7327034773826024


[32m[I 2022-04-06 19:21:13,609][0m Trial 2 finished with value: 0.7065146702206168 and parameters: {'time_lag': 38, 'window': 30, 'factor': 7.052698607021825, 'ewm_periods': 24, 'ewm_alpha': 0.4032984211007633}. Best is trial 0 with value: 0.7082994693314588.[0m


Current score: 0.6615145287531449
Current score: 0.6713154277753378
Current score: 0.6343231439880188
Current score: 0.5938463015904437
Current score: 0.6360722735758835


[32m[I 2022-04-06 19:21:27,741][0m Trial 3 finished with value: 0.6394143351365658 and parameters: {'time_lag': 34, 'window': 26, 'factor': 3.736065864957747, 'ewm_periods': 23, 'ewm_alpha': 0.6506583905111216}. Best is trial 0 with value: 0.7082994693314588.[0m


Current score: 0.6875671541833168
Current score: 0.7385548363635905
Current score: 0.6614831830499587
Current score: 0.6422280269228058
Current score: 0.7213163681637624


[32m[I 2022-04-06 19:21:43,008][0m Trial 4 finished with value: 0.690229913736687 and parameters: {'time_lag': 35, 'window': 25, 'factor': 8.016538792015167, 'ewm_periods': 28, 'ewm_alpha': 0.25766839904187944}. Best is trial 0 with value: 0.7082994693314588.[0m


Current score: 0.6309384660779236
Current score: 0.6017186332199964
Current score: 0.5697684399867963
Current score: 0.5589288126060248
Current score: 0.5896405411081742


[32m[I 2022-04-06 19:21:57,141][0m Trial 5 finished with value: 0.5901989785997831 and parameters: {'time_lag': 44, 'window': 30, 'factor': 2.782103263434121, 'ewm_periods': 13, 'ewm_alpha': 0.46746224039596107}. Best is trial 0 with value: 0.7082994693314588.[0m


Current score: 0.6810080017187277
Current score: 0.7126415670246659
Current score: 0.6544055917226124
Current score: 0.6181373563819242


[32m[I 2022-04-06 19:22:10,416][0m Trial 6 finished with value: 0.6664498968817686 and parameters: {'time_lag': 36, 'window': 30, 'factor': 4.92944609636468, 'ewm_periods': 29, 'ewm_alpha': 0.46790463896497225}. Best is trial 0 with value: 0.7082994693314588.[0m


Current score: 0.6660569675609126
Current score: 0.6934241538098109
Current score: 0.7098401739660701
Current score: 0.6550207333075762
Current score: 0.6376325469970632
Current score: 0.696689291278173


[32m[I 2022-04-06 19:22:23,096][0m Trial 7 finished with value: 0.6785213798717387 and parameters: {'time_lag': 35, 'window': 26, 'factor': 5.898600655640477, 'ewm_periods': 16, 'ewm_alpha': 0.7644559367234185}. Best is trial 0 with value: 0.7082994693314588.[0m


Current score: 0.6794803619179759
Current score: 0.7268225715414688
Current score: 0.6291589238205488
Current score: 0.6520444887660224
Current score: 0.6971357104961265


[32m[I 2022-04-06 19:22:36,691][0m Trial 8 finished with value: 0.6769284113084285 and parameters: {'time_lag': 39, 'window': 21, 'factor': 7.528544141193784, 'ewm_periods': 11, 'ewm_alpha': 0.186906731899599}. Best is trial 0 with value: 0.7082994693314588.[0m


Current score: 0.7536445628600861
Current score: 0.804558457350877
Current score: 0.6741553983906128
Current score: 0.6853422014902701
Current score: 0.7743050941582583


[32m[I 2022-04-06 19:22:50,074][0m Trial 9 finished with value: 0.7384011428500209 and parameters: {'time_lag': 32, 'window': 26, 'factor': 9.854089481401637, 'ewm_periods': 12, 'ewm_alpha': 0.1947972785571248}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.7092977140323179
Current score: 0.712666003792594
Current score: 0.7130657214262737
Current score: 0.6387397727057886
Current score: 0.7656512895629193


[32m[I 2022-04-06 19:23:04,687][0m Trial 10 finished with value: 0.7078841003039787 and parameters: {'time_lag': 28, 'window': 17, 'factor': 9.86458139921952, 'ewm_periods': 17, 'ewm_alpha': 0.12326177945045491}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.702052839896056
Current score: 0.7717459308072004
Current score: 0.6578980418806
Current score: 0.6668310705678128
Current score: 0.7536398511344125


[32m[I 2022-04-06 19:23:17,951][0m Trial 11 finished with value: 0.7104335468572163 and parameters: {'time_lag': 27, 'window': 22, 'factor': 9.907412276307472, 'ewm_periods': 19, 'ewm_alpha': 0.2971418590630972}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.68908629314932
Current score: 0.725686507909985
Current score: 0.6663040062575358
Current score: 0.6046395314367271
Current score: 0.7183366956300732


[32m[I 2022-04-06 19:23:31,112][0m Trial 12 finished with value: 0.6808106068767282 and parameters: {'time_lag': 26, 'window': 21, 'factor': 8.324843336639724, 'ewm_periods': 22, 'ewm_alpha': 0.2514922376477817}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.7211509019329744
Current score: 0.776072129379686
Current score: 0.6494822223940774
Current score: 0.6399771642947437
Current score: 0.7333961546247835


[32m[I 2022-04-06 19:23:48,840][0m Trial 13 finished with value: 0.704015714525253 and parameters: {'time_lag': 30, 'window': 22, 'factor': 9.087956219995661, 'ewm_periods': 15, 'ewm_alpha': 0.6350775400820741}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.6981743166663347
Current score: 0.6766996879628531
Current score: 0.6401769843268654
Current score: 0.6024307865826949
Current score: 0.6672456590475757


[32m[I 2022-04-06 19:24:05,760][0m Trial 14 finished with value: 0.6569454869172648 and parameters: {'time_lag': 31, 'window': 19, 'factor': 6.6863199183926145, 'ewm_periods': 10, 'ewm_alpha': 0.1002943360655042}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.7076860786780246
Current score: 0.744187356631743
Current score: 0.6726237780783175
Current score: 0.6817220610506474


[32m[I 2022-04-06 19:24:21,666][0m Trial 15 finished with value: 0.7125386194607433 and parameters: {'time_lag': 25, 'window': 24, 'factor': 8.616664110399592, 'ewm_periods': 20, 'ewm_alpha': 0.2943553733522599}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.7564738228649844
Current score: 0.744565430629874
Current score: 0.8124613741389649
Current score: 0.6957447735473703
Current score: 0.6467907888801644
Current score: 0.7162490393752059


[32m[I 2022-04-06 19:24:44,070][0m Trial 16 finished with value: 0.7231622813143159 and parameters: {'time_lag': 31, 'window': 25, 'factor': 8.618223170452103, 'ewm_periods': 14, 'ewm_alpha': 0.5673451163161305}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.6990475892585487
Current score: 0.7137010406472988
Current score: 0.6461545529452912
Current score: 0.625723707685286
Current score: 0.651977701698431


[32m[I 2022-04-06 19:25:01,049][0m Trial 17 finished with value: 0.6673209184469712 and parameters: {'time_lag': 32, 'window': 27, 'factor': 4.945766580917706, 'ewm_periods': 14, 'ewm_alpha': 0.6020751611692736}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.7324126140981895
Current score: 0.7279687292321885
Current score: 0.6495930554184097
Current score: 0.6544732357956539
Current score: 0.7104040200737103


[32m[I 2022-04-06 19:25:18,313][0m Trial 18 finished with value: 0.6949703309236304 and parameters: {'time_lag': 40, 'window': 28, 'factor': 6.481302269575949, 'ewm_periods': 17, 'ewm_alpha': 0.8266806447440312}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.7579158299208354
Current score: 0.754438096419449
Current score: 0.6792532740871069
Current score: 0.6354957726052067
Current score: 0.6986607791592696


[32m[I 2022-04-06 19:25:37,183][0m Trial 19 finished with value: 0.7051527504383734 and parameters: {'time_lag': 42, 'window': 24, 'factor': 8.98523167906152, 'ewm_periods': 10, 'ewm_alpha': 0.5560157934144083}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.7585805925991909
Current score: 0.7851341618050025
Current score: 0.6636392231832307
Current score: 0.6423683250099166
Current score: 0.7466129866427385


[32m[I 2022-04-06 19:26:01,072][0m Trial 20 finished with value: 0.7192670578480158 and parameters: {'time_lag': 29, 'window': 28, 'factor': 7.802219899062141, 'ewm_periods': 13, 'ewm_alpha': 0.7042952598327846}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.7542601483327294
Current score: 0.7760611791274435
Current score: 0.6745494514967315
Current score: 0.656216827618864
Current score: 0.7280074893149826


[32m[I 2022-04-06 19:26:16,516][0m Trial 21 finished with value: 0.7178190191781503 and parameters: {'time_lag': 30, 'window': 28, 'factor': 7.7710382344523605, 'ewm_periods': 13, 'ewm_alpha': 0.7376321903086547}. Best is trial 9 with value: 0.7384011428500209.[0m


Current score: 0.7463633497022818
Current score: 0.7977430098506202
Current score: 0.6683838291614527
Current score: 0.6679143129276782
Current score: 0.730780157254722


[32m[I 2022-04-06 19:26:35,828][0m Trial 22 finished with value: 0.7222369317793509 and parameters: {'time_lag': 33, 'window': 26, 'factor': 8.968421229947563, 'ewm_periods': 15, 'ewm_alpha': 0.8806429557570271}. Best is trial 9 with value: 0.7384011428500209.[0m


In [17]:
study.best_params

{'time_lag': 32,
 'window': 26,
 'factor': 9.854089481401637,
 'ewm_periods': 12,
 'ewm_alpha': 0.1947972785571248}

In [7]:
X = train_df.drop(["target"], axis=1).to_numpy()
y = train_df["target"].to_numpy()

X.shape, y.shape

((322915, 168), (322915,))

In [8]:
def get_sample_weights(y: np.ndarray, class_weights: np.ndarray) -> np.ndarray:
    weights = np.zeros(len(y))
    for current_class in np.unique(y):
        weights[y == current_class] = class_weights[int(current_class)]
        
    return weights

In [13]:
test_size = int(X.shape[0] * TEST_RATIO)

X_train = X[:-test_size]
y_train = y[:-test_size]
X_test = X[-test_size:]
y_test = y[-test_size:]

train_weights = compute_class_weight(class_weight="balanced", classes=np.unique(y_train), y=y_train) * np.array([1, 6])
test_weights = compute_class_weight(class_weight="balanced", classes=np.unique(y_test), y=y_test)

In [14]:
cb_classifier = CatBoostClassifier(class_weights=train_weights)
cb_classifier.fit(X_train, y_train)

Learning rate set to 0.110391
0:	learn: 0.6144765	total: 127ms	remaining: 2m 6s
1:	learn: 0.5539155	total: 248ms	remaining: 2m 3s
2:	learn: 0.5088539	total: 387ms	remaining: 2m 8s
3:	learn: 0.4742508	total: 525ms	remaining: 2m 10s
4:	learn: 0.4479226	total: 643ms	remaining: 2m 7s
5:	learn: 0.4266896	total: 776ms	remaining: 2m 8s
6:	learn: 0.4122655	total: 906ms	remaining: 2m 8s
7:	learn: 0.4002079	total: 1.04s	remaining: 2m 9s
8:	learn: 0.3902273	total: 1.18s	remaining: 2m 9s
9:	learn: 0.3815652	total: 1.28s	remaining: 2m 7s
10:	learn: 0.3744436	total: 1.38s	remaining: 2m 3s
11:	learn: 0.3693137	total: 1.47s	remaining: 2m 1s
12:	learn: 0.3654737	total: 1.57s	remaining: 1m 59s
13:	learn: 0.3622783	total: 1.67s	remaining: 1m 57s
14:	learn: 0.3594213	total: 1.76s	remaining: 1m 55s
15:	learn: 0.3570889	total: 1.86s	remaining: 1m 54s
16:	learn: 0.3548723	total: 1.98s	remaining: 1m 54s
17:	learn: 0.3526718	total: 2.08s	remaining: 1m 53s
18:	learn: 0.3512076	total: 2.18s	remaining: 1m 52s
19:

157:	learn: 0.3140042	total: 18s	remaining: 1m 36s
158:	learn: 0.3138508	total: 18.2s	remaining: 1m 36s
159:	learn: 0.3137033	total: 18.3s	remaining: 1m 36s
160:	learn: 0.3135590	total: 18.5s	remaining: 1m 36s
161:	learn: 0.3133974	total: 18.7s	remaining: 1m 36s
162:	learn: 0.3132161	total: 18.9s	remaining: 1m 36s
163:	learn: 0.3130525	total: 19.1s	remaining: 1m 37s
164:	learn: 0.3128482	total: 19.2s	remaining: 1m 37s
165:	learn: 0.3126965	total: 19.3s	remaining: 1m 36s
166:	learn: 0.3125375	total: 19.4s	remaining: 1m 36s
167:	learn: 0.3123964	total: 19.5s	remaining: 1m 36s
168:	learn: 0.3122174	total: 19.6s	remaining: 1m 36s
169:	learn: 0.3120243	total: 19.7s	remaining: 1m 36s
170:	learn: 0.3118891	total: 19.8s	remaining: 1m 35s
171:	learn: 0.3117008	total: 19.9s	remaining: 1m 35s
172:	learn: 0.3115253	total: 20s	remaining: 1m 35s
173:	learn: 0.3113038	total: 20.2s	remaining: 1m 35s
174:	learn: 0.3110723	total: 20.3s	remaining: 1m 35s
175:	learn: 0.3109461	total: 20.4s	remaining: 1m 3

313:	learn: 0.2886246	total: 38.7s	remaining: 1m 24s
314:	learn: 0.2884581	total: 38.8s	remaining: 1m 24s
315:	learn: 0.2883426	total: 39s	remaining: 1m 24s
316:	learn: 0.2882267	total: 39.1s	remaining: 1m 24s
317:	learn: 0.2881541	total: 39.2s	remaining: 1m 24s
318:	learn: 0.2879552	total: 39.3s	remaining: 1m 23s
319:	learn: 0.2877571	total: 39.4s	remaining: 1m 23s
320:	learn: 0.2876422	total: 39.5s	remaining: 1m 23s
321:	learn: 0.2875111	total: 39.6s	remaining: 1m 23s
322:	learn: 0.2873455	total: 39.7s	remaining: 1m 23s
323:	learn: 0.2871577	total: 39.8s	remaining: 1m 23s
324:	learn: 0.2870292	total: 39.9s	remaining: 1m 22s
325:	learn: 0.2868678	total: 40s	remaining: 1m 22s
326:	learn: 0.2867328	total: 40.1s	remaining: 1m 22s
327:	learn: 0.2865867	total: 40.2s	remaining: 1m 22s
328:	learn: 0.2865039	total: 40.3s	remaining: 1m 22s
329:	learn: 0.2863298	total: 40.5s	remaining: 1m 22s
330:	learn: 0.2861582	total: 40.7s	remaining: 1m 22s
331:	learn: 0.2859912	total: 40.8s	remaining: 1m 2

470:	learn: 0.2676612	total: 57.3s	remaining: 1m 4s
471:	learn: 0.2675608	total: 57.5s	remaining: 1m 4s
472:	learn: 0.2674326	total: 57.6s	remaining: 1m 4s
473:	learn: 0.2673219	total: 57.7s	remaining: 1m 4s
474:	learn: 0.2671918	total: 57.8s	remaining: 1m 3s
475:	learn: 0.2671117	total: 57.9s	remaining: 1m 3s
476:	learn: 0.2669926	total: 58.1s	remaining: 1m 3s
477:	learn: 0.2668500	total: 58.2s	remaining: 1m 3s
478:	learn: 0.2667068	total: 58.3s	remaining: 1m 3s
479:	learn: 0.2666002	total: 58.4s	remaining: 1m 3s
480:	learn: 0.2664782	total: 58.5s	remaining: 1m 3s
481:	learn: 0.2663183	total: 58.6s	remaining: 1m 3s
482:	learn: 0.2662239	total: 58.8s	remaining: 1m 2s
483:	learn: 0.2660919	total: 58.9s	remaining: 1m 2s
484:	learn: 0.2659245	total: 59s	remaining: 1m 2s
485:	learn: 0.2657740	total: 59.1s	remaining: 1m 2s
486:	learn: 0.2656178	total: 59.3s	remaining: 1m 2s
487:	learn: 0.2654745	total: 59.4s	remaining: 1m 2s
488:	learn: 0.2653715	total: 59.5s	remaining: 1m 2s
489:	learn: 0.

630:	learn: 0.2496160	total: 1m 15s	remaining: 44.1s
631:	learn: 0.2495420	total: 1m 15s	remaining: 44s
632:	learn: 0.2494364	total: 1m 15s	remaining: 43.9s
633:	learn: 0.2493500	total: 1m 15s	remaining: 43.7s
634:	learn: 0.2492649	total: 1m 15s	remaining: 43.6s
635:	learn: 0.2491673	total: 1m 15s	remaining: 43.5s
636:	learn: 0.2490838	total: 1m 16s	remaining: 43.4s
637:	learn: 0.2489639	total: 1m 16s	remaining: 43.2s
638:	learn: 0.2488644	total: 1m 16s	remaining: 43.1s
639:	learn: 0.2487714	total: 1m 16s	remaining: 43s
640:	learn: 0.2486520	total: 1m 16s	remaining: 42.9s
641:	learn: 0.2485199	total: 1m 16s	remaining: 42.8s
642:	learn: 0.2484545	total: 1m 16s	remaining: 42.6s
643:	learn: 0.2483611	total: 1m 16s	remaining: 42.5s
644:	learn: 0.2482477	total: 1m 17s	remaining: 42.4s
645:	learn: 0.2481335	total: 1m 17s	remaining: 42.3s
646:	learn: 0.2480265	total: 1m 17s	remaining: 42.2s
647:	learn: 0.2479166	total: 1m 17s	remaining: 42.1s
648:	learn: 0.2478285	total: 1m 17s	remaining: 42s

787:	learn: 0.2345448	total: 1m 36s	remaining: 26s
788:	learn: 0.2344402	total: 1m 36s	remaining: 25.8s
789:	learn: 0.2343484	total: 1m 36s	remaining: 25.7s
790:	learn: 0.2342752	total: 1m 36s	remaining: 25.6s
791:	learn: 0.2341975	total: 1m 36s	remaining: 25.5s
792:	learn: 0.2340535	total: 1m 37s	remaining: 25.4s
793:	learn: 0.2339761	total: 1m 37s	remaining: 25.2s
794:	learn: 0.2338787	total: 1m 37s	remaining: 25.1s
795:	learn: 0.2337896	total: 1m 37s	remaining: 25s
796:	learn: 0.2337047	total: 1m 37s	remaining: 24.9s
797:	learn: 0.2335637	total: 1m 37s	remaining: 24.7s
798:	learn: 0.2334870	total: 1m 37s	remaining: 24.6s
799:	learn: 0.2333927	total: 1m 37s	remaining: 24.5s
800:	learn: 0.2333058	total: 1m 38s	remaining: 24.4s
801:	learn: 0.2332098	total: 1m 38s	remaining: 24.2s
802:	learn: 0.2331117	total: 1m 38s	remaining: 24.1s
803:	learn: 0.2330247	total: 1m 38s	remaining: 24s
804:	learn: 0.2329028	total: 1m 38s	remaining: 23.9s
805:	learn: 0.2328349	total: 1m 38s	remaining: 23.7s

943:	learn: 0.2210287	total: 1m 58s	remaining: 7.04s
944:	learn: 0.2209903	total: 1m 58s	remaining: 6.91s
945:	learn: 0.2209373	total: 1m 58s	remaining: 6.79s
946:	learn: 0.2208617	total: 1m 59s	remaining: 6.66s
947:	learn: 0.2208023	total: 1m 59s	remaining: 6.53s
948:	learn: 0.2207075	total: 1m 59s	remaining: 6.41s
949:	learn: 0.2205943	total: 1m 59s	remaining: 6.28s
950:	learn: 0.2205424	total: 1m 59s	remaining: 6.16s
951:	learn: 0.2204613	total: 1m 59s	remaining: 6.03s
952:	learn: 0.2203898	total: 1m 59s	remaining: 5.91s
953:	learn: 0.2202818	total: 1m 59s	remaining: 5.78s
954:	learn: 0.2201938	total: 1m 59s	remaining: 5.65s
955:	learn: 0.2201179	total: 2m	remaining: 5.53s
956:	learn: 0.2200360	total: 2m	remaining: 5.4s
957:	learn: 0.2199401	total: 2m	remaining: 5.28s
958:	learn: 0.2198684	total: 2m	remaining: 5.15s
959:	learn: 0.2198028	total: 2m	remaining: 5.03s
960:	learn: 0.2197544	total: 2m	remaining: 4.9s
961:	learn: 0.2196487	total: 2m	remaining: 4.77s
962:	learn: 0.2195691	t

<catboost.core.CatBoostClassifier at 0x213579605e0>

In [15]:
predictions = cb_classifier.predict(X_test)
print(balanced_accuracy_score(y_true=y_test, y_pred=predictions, sample_weight=get_sample_weights(y_test, test_weights)))
confusion_matrix(y_test, predictions)

0.5455449816798632


array([[  738,  6097],
       [  975, 56773]], dtype=int64)

In [16]:
probas = cb_classifier.predict_proba(X_test)

In [64]:
a = [(i, f) for i, f in zip(cb_classifier.get_feature_importance(), train_df.drop(["target"], axis=1).columns)]
a.sort(reverse=True)
a

[(3.7549248096132044, 'macdh'),
 (2.7835582991571197, 'kdjk_6'),
 (2.459433975268147, 'chop_14_ewm'),
 (2.152971959057527, 'rsv_6_ewm'),
 (1.9781197184204957, 'chop_14'),
 (1.4524342687859593, 'chop_86'),
 (1.4491147303585075, 'mfi_174'),
 (1.3641875795983818, 'volume_174_ema'),
 (1.3514565917278643, 'adxr'),
 (1.3434289107438182, 'wt2_ewm'),
 (1.3062096811820483, 'adx'),
 (1.3029184701620442, 'volume_363_mstd_ewm'),
 (1.2851374267851565, 'high_delta'),
 (1.2520462619667483, 'mfi_363'),
 (1.2505039623583618, 'chop_363_ewm'),
 (1.2484091581783208, 'cr-ma3'),
 (1.2300037390735163, 'macdh_ewm'),
 (1.2269897374674616, 'chop_174'),
 (1.1409240831728789, 'change'),
 (1.126096112294118, 'volume_86_mstd_ewm'),
 (1.0790880961058746, 'chop_174_ewm'),
 (1.075002281910072, 'volume_174_mstd'),
 (1.0436083719043394, 'volume_363_ema'),
 (1.016942673199603, 'chop_6'),
 (0.96440278938113, 'vr_86'),
 (0.9587927155235945, 'volume_363_mstd'),
 (0.9420113105624761, 'chop_363'),
 (0.9049972878035286, 'atr_1

In [19]:
import torch
from torch import nn

In [56]:
class Model(nn.Module):
    def __init__(self, input_size: int, num_classes: int):
        super(Model, self).__init__()
        
        self.seq = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=input_size, out_features=num_classes),
#             nn.ReLU(),
#             nn.Dropout(p=0.3),
#             nn.Linear(in_features=input_size // 4, out_features=input_size // 8),
#             nn.ReLU(),
#             nn.Dropout(p=0.3),
#             nn.Linear(in_features=input_size // 8, out_features=input_size // 16),
#             nn.ReLU(),
#             nn.Dropout(p=0.3),
#             nn.Linear(in_features=input_size // 16, out_features=num_classes)
        )
        
    def forward(self, X):
        return self.seq(X)

In [57]:
model = Model(X_train.shape[1], 2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

In [58]:
train_dataset = StockDataset(torch.tensor(X_train), torch.tensor(y_train))
val_dataset = StockDataset(torch.tensor(X_test), torch.tensor(y_test))

train_loader = DataLoader(train_dataset, batch_size=200)
val_loader = DataLoader(val_dataset, batch_size=200)

In [59]:
train_model(
    model=model,
    optimizer=optimizer,
    criterion=criterion,
    train_loader=train_loader,
    validation_loader=val_loader,
    num_epoch=5
)

////////////////////////////////////////
// Epoch: 1
// Train loss: nan
// Validation loss: nan
////////////////////////////////////////


KeyboardInterrupt: 