In [1]:
from managers.history_manager import HistoryManager
from managers.model_manager import ModelManager
from binance.client import Client
from dotenv import load_dotenv
import os

load_dotenv()

client = Client(api_key=os.getenv("BINANCE_API_KEY"), api_secret=os.getenv("BINANCE_API_SECRET"))


hm = HistoryManager(
    client=client,
    start_str="1 Jan, 2023",
    symbol="BTCUSDT",
    interval="4h",
    timelag=12
)

X, y_cls, y_reg = hm.dataset_dual

mm = ModelManager(predictor_cols=list(X.columns), cls_name="hgb", reg_name="hgb", random_state=42)


In [2]:
hm.last_closed_open_time_ms

1759060800000

In [3]:
hm.df_features.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5977 entries, 1673006400000 to 1759060800000
Data columns (total 41 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   EMA                                  5977 non-null   float64
 1   MINUSDM                              5977 non-null   float64
 2   PLUSDM                               5977 non-null   float64
 3   CLOSE                                5977 non-null   float64
 4   CLOSEL1                              5977 non-null   float64
 5   CLOSEL2                              5977 non-null   float64
 6   PATT_3OUT                            5977 non-null   float64
 7   PATT_CMB                             5977 non-null   float64
 8   RSI                                  5977 non-null   float64
 9   MACD                                 5977 non-null   float64
 10  MACD_SIGNAL                          5977 non-null   float64
 11  MACD_HIST     

In [4]:
hm.df_features.columns

Index(['EMA', 'MINUSDM', 'PLUSDM', 'CLOSE', 'CLOSEL1', 'CLOSEL2', 'PATT_3OUT',
       'PATT_CMB', 'RSI', 'MACD', 'MACD_SIGNAL', 'MACD_HIST', 'ADX', 'ATR',
       'NATR', 'BB_UPPER', 'BB_MIDDLE', 'BB_LOWER', 'BB_WIDTH', 'OBV', 'MFI',
       'AD', 'ADOSC', 'STOCH_K', 'STOCH_D', 'fng_ordinal_smooth',
       'fng_ordinal_smooth_int', 'unique_addresses_used_smooth',
       'confirmed_tx_per_day_smooth', 'output_value_per_day_btc_smooth',
       'avg_tx_per_block_smooth', 'median_confirmation_time_min_smooth',
       'avg_confirmation_time_min_smooth', 'total_hash_rate_ths_smooth',
       'network_difficulty_smooth', 'miners_revenue_usd_smooth',
       'total_tx_fees_btc_smooth', 'blockchain_size_smooth',
       'avg_block_size_smooth', 'UP_DOWN', 'RET_NEXT'],
      dtype='object')

In [5]:
metrics = mm.train_dual(X, y_cls, y_reg, test_size=0.2)
print("Holdout metrics:", metrics)

# C) feature importance (permutation on classifier)
imp_cls = mm.feature_importances(which="cls", kind="auto", scoring="accuracy", n_repeats=10)
print(imp_cls.head(15))

# D) quick sanity: importance for regressor too
imp_reg = mm.feature_importances(which="reg", kind="auto", scoring="neg_mean_absolute_error", n_repeats=10)
print(imp_reg.head(15))

Holdout metrics: {'cls_acc': 0.4866220735785953, 'reg_mae': 0.00689503005802104, 'reg_mape_like': 2.006588952794208}
                             feature  importance_mean  importance_std
0                                RSI         0.004013        0.012898
1                        MACD_SIGNAL         0.003930        0.007299
2           total_tx_fees_btc_smooth         0.003679        0.002155
3                                ATR         0.002007        0.005599
4                              ADOSC         0.001923        0.011118
5                           PATT_CMB         0.001839        0.000729
6                              CLOSE         0.001421        0.006368
7                          BB_MIDDLE         0.001171        0.002429
8                            CLOSEL2         0.000753        0.007060
9   avg_confirmation_time_min_smooth         0.000753        0.002349
10           avg_tx_per_block_smooth         0.000334        0.000766
11         miners_revenue_usd_smooth       

In [6]:
p1 = y_cls.mean()
null_acc = max(p1, 1 - p1)
print("Null accuracy:", null_acc)


Null accuracy: 0.5176509954826837


In [7]:
from sklearn.metrics import roc_auc_score, balanced_accuracy_score, matthews_corrcoef
y_hat = mm.cls_pipe.predict(mm._X_te)
p_hat = mm.cls_pipe.predict_proba(mm._X_te)[:,1]
print("AUC:", roc_auc_score(mm._yc_te, p_hat))
print("Balanced Acc:", balanced_accuracy_score(mm._yc_te, y_hat))
print("MCC:", matthews_corrcoef(mm._yc_te, y_hat))


AUC: 0.4894319960354236
Balanced Acc: 0.4844006798016592
MCC: -0.03142754977400782


In [8]:
import numpy as np
y_true = mm._yr_te.values
y_pred = mm.reg_pipe.predict(mm._X_te)
mae_model = np.mean(np.abs(y_true - y_pred))
mae_naive = np.mean(np.abs(y_true - 0.0))
print("MAE model:", mae_model, "MAE naive:", mae_naive, "Skill:", 1 - mae_model/mae_naive)


MAE model: 0.00689503005802104 MAE naive: 0.006521660001752204 Skill: -0.0572507699218483
