In [1]:
import os
import sys
import pandas as pd
import warnings
import optuna
import joblib
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning, message="X does not have valid feature names")
warnings.filterwarnings("ignore", category=UserWarning, module='lightgbm')
optuna.logging.set_verbosity(optuna.logging.WARNING)
sys.path.append(os.path.dirname(os.getcwd()))

from ml import Trainer, TrainArgs

In [2]:
binance_path = '../data/1d_timeframe/binance/binance_ETH_USDT_1d_20150427_20250427.csv'
binance = pd.read_csv(binance_path, index_col='timestamp', parse_dates=True)

bybit_path = '../data/1d_timeframe/bybit/bybit_ETH_USDT_1d_20150427_20250427.csv'
bybit = pd.read_csv(bybit_path, index_col='timestamp', parse_dates=True)

hyperliquid_path = '../data/1d_timeframe/hyperliquid/hyperliquid_ETH_USDT_1d_20150425_20250425.csv'
hyperliquid = pd.read_csv(hyperliquid_path, index_col='timestamp', parse_dates=True)


In [3]:
def train(args, data, d):
    trainer = Trainer(args)
    models = trainer.train(data)
    for window, model_dict in models.items():
        path = os.path.join(d, f'model_{window}.joblib')
        joblib.dump(model_dict, path)


In [4]:
args = TrainArgs(
    LOOK_BACK_WINDOWS=[5, 10, 20, 30, 60],
    TARGET_VOLATILITY=0.9,
    HIGH_CORRELATION_THRESHOLD=.95,
    ML_TRAIN_END_DATE="2023-01-01",
    ML_VALID_END_DATE="2024-01-01",
    N_CV_SPLITS=2,
    FORWARD_LOOK_PERIOD=40
)

In [10]:
train(args, binance, '../dump/binance')

  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.4647) ---
Accuracy:  0.3830
Precision: 0.2258
Recall:    0.5833
F1 Score:  0.3256
AUC:       0.4095
Log Loss:  0.6934
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.5559) ---
Accuracy:  0.7222
Precision: 0.5000
Recall:    0.8000
F1 Score:  0.6154
AUC:       0.7769
Log Loss:  0.7127
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.5708) ---
Accuracy:  0.7500
Precision: 0.3333
Recall:    0.5000
F1 Score:  0.4000
AUC:       0.7750
Log Loss:  0.7771
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.7143) ---
Accuracy:  0.2857
Precision: 0.2857
Recall:    1.0000
F1 Score:  0.4444
AUC:       0.5000
Log Loss:  0.9910
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.5556) ---
Accuracy:  0.6667
Precision: 0.6667
Recall:    1.0000
F1 Score:  0.8000
AUC:       0.5000
Log Loss:  0.6622
----------------------------------------------------
--- ML Model Training Complete ---


In [11]:
train(args, bybit, '../dump/bybit')

  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.4269) ---
Accuracy:  0.5745
Precision: 0.0000
Recall:    0.0000
F1 Score:  0.0000
AUC:       0.3726
Log Loss:  0.6288
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.5388) ---
Accuracy:  0.6667
Precision: 0.4286
Recall:    0.6000
F1 Score:  0.5000
AUC:       0.6692
Log Loss:  0.6835
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.6364) ---
Accuracy:  0.1667
Precision: 0.1667
Recall:    1.0000
F1 Score:  0.2857
AUC:       0.5000
Log Loss:  0.9183
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.7143) ---
Accuracy:  0.2857
Precision: 0.2857
Recall:    1.0000
F1 Score:  0.4444
AUC:       0.5000
Log Loss:  0.9910
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.4000) ---
Accuracy:  0.6667
Precision: 0.6667
Recall:    1.0000
F1 Score:  0.8000
AUC:       0.5000
Log Loss:  0.7811
----------------------------------------------------
--- ML Model Training Complete ---


In [5]:
train(args, hyperliquid, '../dump/hyperliquid')

  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.4459) ---
Accuracy:  0.2553
Precision: 0.2553
Recall:    1.0000
F1 Score:  0.4068
AUC:       0.5000
Log Loss:  0.6459
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.5405) ---
Accuracy:  0.2778
Precision: 0.2778
Recall:    1.0000
F1 Score:  0.4348
AUC:       0.5000
Log Loss:  0.7326
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.5882) ---
Accuracy:  0.1667
Precision: 0.1667
Recall:    1.0000
F1 Score:  0.2857
AUC:       0.5000
Log Loss:  0.8279
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.8000) ---
Accuracy:  0.2857
Precision: 0.2857
Recall:    1.0000
F1 Score:  0.4444
AUC:       0.5000
Log Loss:  1.2134
----------------------------------------------------
--- ML Model Training Complete ---


  0%|          | 0/100 [00:00<?, ?it/s]

Evaluating on validation set to find threshold...

--- Test Set Evaluation Metrics (Threshold = 0.5714) ---
Accuracy:  0.6667
Precision: 0.6667
Recall:    1.0000
F1 Score:  0.8000
AUC:       0.5000
Log Loss:  0.6555
----------------------------------------------------
--- ML Model Training Complete ---
