In [None]:
import os
# Данная переменная нужна для torch.use_deterministic_algorithms(True), что позволит повысить воспроизводимость при
# обучении на видеокарте с помощью PyTorch
# Подробнее см:
# https://docs.nvidia.com/cuda/cublas/index.html
# set a debug environment variable CUBLAS_WORKSPACE_CONFIG to :16:8 (may limit overall performance) or
# :4096:8 (will increase library footprint in GPU memory by approximately 24MiB).
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

In [None]:
# для backtesting необходим даунгрейд библиотеки bokeh
!pip uninstall panel -y --quiet
!pip uninstall bokeh -y --quiet
!pip install bokeh==3.1.0 --quiet

[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
holoviews 1.20.2 requires panel>=1.0, which is not installed.[0m[31m
[0m

In [None]:
# список установленных библиотек
!pip freeze

absl-py==1.4.0
accelerate==1.7.0
aiofiles==24.1.0
aiohappyeyeballs==2.6.1
aiohttp==3.11.15
aiosignal==1.3.2
alabaster==1.0.0
albucore==0.0.24
albumentations==2.0.8
ale-py==0.11.1
altair==5.5.0
annotated-types==0.7.0
antlr4-python3-runtime==4.9.3
anyio==4.9.0
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
array_record==0.7.2
arviz==0.21.0
astropy==7.1.0
astropy-iers-data==0.2025.5.26.0.37.21
astunparse==1.6.3
atpublic==5.1
attrs==25.3.0
audioread==3.0.1
autograd==1.8.0
babel==2.17.0
backcall==0.2.0
backports.tarfile==1.2.0
backtesting==0.6.4
beautifulsoup4==4.13.4
betterproto==2.0.0b6
bigframes==2.4.0
bigquery-magics==0.9.0
bleach==6.2.0
blinker==1.9.0
blis==1.3.0
blobfile==3.0.0
blosc2==3.3.4
bokeh==3.1.0
Bottleneck==1.4.2
bqplot==0.12.45
branca==0.8.1
build==1.2.2.post1
CacheControl==0.14.3
cachetools==5.5.2
catalogue==2.0.10
certifi==2025.4.26
cffi==1.17.1
chardet==5.2.0
charset-normalizer==3.4.2
chex==0.1.89
clarabel==0.11.0
click==8.2.1
cloudpathlib==0.21.1
cloudpickle==3.1.1
cma

In [None]:
import random

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.ensemble import (AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier,
                              StackingClassifier, VotingClassifier)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
import tensorflow as tf
from tensorflow.keras import layers
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm.auto import tqdm
import yfinance as yf
import warnings

warnings.filterwarnings('ignore')



In [None]:
!pip install catboost


Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [None]:
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

In [None]:
pip install cuml-cu11 --extra-index-url=https://pypi.nvidia.com  # Для CUDA 11.x


Looking in indexes: https://pypi.org/simple, https://pypi.nvidia.com
Collecting cuml-cu11
  Downloading https://pypi.nvidia.com/cuml-cu11/cuml_cu11-25.4.0-cp311-cp311-manylinux_2_28_x86_64.whl (9.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m48.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cuda-python<12.0a0,>=11.8.5 (from cuml-cu11)
  Downloading cuda_python-11.8.7-py3-none-any.whl.metadata (14 kB)
Collecting cudf-cu11==25.4.* (from cuml-cu11)
  Downloading https://pypi.nvidia.com/cudf-cu11/cudf_cu11-25.4.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m132.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cupy-cuda11x>=12.0.0 (from cuml-cu11)
  Downloading cupy_cuda11x-13.4.1-cp311-cp311-manylinux2014_x86_64.whl.metadata (2.7 kB)
Collecting cuvs-cu11==25.4.* (from cuml-cu11)
  Downloading https://pypi.nvidia.com/cuvs-cu11/cuvs_cu11-25

Зафиксируем seed для воспроизводимости результатов

In [None]:
SEED = 777

In [None]:
def seed_everything(seed: int = 42) -> None:
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)
    print(f"Using {seed} seed")

In [None]:
seed_everything(SEED)

Using 777 seed


In [None]:
   from google.colab import files
   uploaded = files.upload()

Saving out.csv to out.csv


In [None]:
data = pd.read_csv('out.csv')
data['time_dt'] = pd.to_datetime(data['time'])

data['hour'] = data['time_dt'].dt.hour
data['day_of_week'] = data['time_dt'].dt.dayofweek
data['day_of_month'] = data['time_dt'].dt.day
data['month'] = data['time_dt'].dt.month
data.drop(columns=['time_dt'], inplace=True)
data.set_index('time', inplace=True)
data = data.iloc[1:, :]
data['target_1'] = data['target']
data = data.drop(columns=['target'], axis=1)
data

Unnamed: 0_level_0,open,high,low,close,volume,open_ratio_1,open_log_diff_1,open_momentum_5,open_roc_5,open_ema_5,...,sol_doji,sol_engulfing,sol_rolling_vol_10,sol_range,sol_range_pct,hour,day_of_week,day_of_month,month,target_1
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-03-18 09:58:00+00:00,82886.27,82886.27,82801.39,82801.39,0.024308,1.000061,0.000061,-129.72,-0.156259,82920.913300,...,0,0,0.001241,0.09,0.000721,9,1,18,3,0
2025-03-18 10:06:00+00:00,82838.16,82857.22,82838.16,82857.22,0.154500,0.999033,-0.000968,27.20,0.032846,82865.354281,...,0,0,0.001333,0.12,0.000962,10,1,18,3,0
2025-03-18 10:15:00+00:00,82709.48,82717.51,82613.55,82695.77,1.297885,0.998875,-0.001125,-136.60,-0.164884,82776.654587,...,0,0,0.001454,0.32,0.002577,10,1,18,3,1
2025-03-18 10:18:00+00:00,82623.03,82623.03,82592.84,82592.84,0.040534,0.999089,-0.000912,-154.62,-0.186790,82691.406174,...,0,0,0.001461,0.14,0.001129,10,1,18,3,1
2025-03-18 10:24:00+00:00,82700.72,82729.15,82700.72,82727.88,0.066341,1.000438,0.000438,77.69,0.094029,82673.583529,...,0,0,0.001627,0.17,0.001369,10,1,18,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-05-26 18:00:00+00:00,109165.78,109165.78,109086.68,109086.68,0.064524,1.000193,0.000193,107.91,0.098947,109139.389645,...,0,0,0.000901,0.10,0.000576,18,0,26,5,0
2025-05-26 18:02:00+00:00,109090.40,109090.41,109041.09,109041.09,0.029468,0.999309,-0.000691,-38.38,-0.035169,109123.059763,...,100,0,0.000731,0.01,0.000058,18,0,26,5,1
2025-05-26 18:08:00+00:00,109137.79,109137.79,109092.93,109126.68,0.310429,0.999538,-0.000462,-27.99,-0.025640,109141.733904,...,0,0,0.001537,0.06,0.000344,18,0,26,5,1
2025-05-26 18:09:00+00:00,109128.68,109128.69,109067.91,109073.21,0.111587,0.999917,-0.000083,38.28,0.035090,109137.382603,...,0,0,0.001525,0.16,0.000918,18,0,26,5,1


In [None]:
dataset = data.values

X = dataset[:,:132]
y = dataset[:,132]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=SEED)

# GPU based boostings

In [None]:
# Инициализация моделей
xgb = XGBClassifier(tree_method='gpu_hist', n_estimators=200)
cat = CatBoostClassifier(verbose=0, task_type='GPU')
lgbm = LGBMClassifier(device='gpu')

In [None]:
# VotingClassifier
ensemble = VotingClassifier(
    estimators=[('xgb', xgb), ('cat', cat), ('lgbm', lgbm)],
    voting='soft'
)

In [None]:
ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)

[LightGBM] [Info] Number of positive: 13571, number of negative: 13176
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 31192
[LightGBM] [Info] Number of data points in the train set: 26747, number of used features: 132
[LightGBM] [Info] Using GPU Device: Tesla T4, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 126 dense feature groups (3.27 MB) transferred to GPU in 0.005047 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507384 -> initscore=0.029538
[LightGBM] [Info] Start training from score 0.029538


In [None]:
ensemble

In [None]:
print(f"""
Accuracy of {ensemble.estimators_[0]}: {accuracy_score(y_test, ensemble.estimators_[0].predict(X_test)):.4f}
Accuracy of {ensemble.estimators_[1]}: {accuracy_score(y_test, ensemble.estimators_[2].predict(X_test)):.4f}
Accuracy of {ensemble.estimators_[2]}: {accuracy_score(y_test, ensemble.estimators_[2].predict(X_test)):.4f}
Accuracy of Voting Classifier : {accuracy_score(y_test, y_pred):.4f}
""".strip())

Accuracy of XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=200, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...): 0.7090
Accuracy of <catboost.core.CatBoostClassifier object at 0x78b6f81dfa10>: 0.6838
Accuracy of LGBMClassifier(device='gpu'): 0.6838
Accuracy of Voting Classifier : 0.7200


Сравним точность ансамбля с точностью каждой базовой модели

===============