In [None]:
from xgboost import XGBClassifier
from ETL import ETL
import os
import pickle

In [None]:
name = 'AAVEUSDT1'
number = 1
inputs = {
  "gamma": {
    "value": 11
  },
  "device": {
    "value": "cuda"
  },
  "n_jobs": {
    "value": -1
  },
  "max_bin": {
    "value": 512
  },
  "leverage": {
    "value": 4
  },
  "n_splits": {
    "value": 34
  },
  "max_depth": {
    "value": 15
  },
  "objective": {
    "value": "binary:logistic"
  },
  "predictor": {
    "value": "gpu_predictor"
  },
  "reg_alpha": {
    "value": 1
  },
  "stop_loss": {
    "value": 3
  },
  "subsample": {
    "value": 0.5
  },
  "test_size": {
    "value": 720
  },
  "look_ahead": {
    "value": 108
  },
  "max_leaves": {
    "value": 13
  },
  "model_name": {
    "value": "XGB"
  },
  "reg_lambda": {
    "value": 12
  },
  "trade_mode": {
    "value": "long"
  },
  "eval_metric": {
    "value": "auc"
  },
  "take_profit": {
    "value": 4
  },
  "tree_method": {
    "value": "hist"
  },
  "n_estimators": {
    "value": 200
  },
  "random_state": {
    "value": 42
  },
  "learning_rate": {
    "value": 0.1
  },
  "target_symbol": {
    "value": "AAVEUSDT"
  },
  "max_delta_step": {
    "value": 5
  },
  "max_train_size": {
    "value": 51840
  },
  "n_rand_features": {
    "value": None
  },
  "save_model_mode": {
    "value": None
  },
  "colsample_bynode": {
    "value": 0.4719540277905477
  },
  "colsample_bytree": {
    "value": 0.8659709124945025
  },
  "min_child_weight": {
    "value": 8
  },
  "scale_pos_weight": {
    "value": 10.65278391644559
  },
  "colsample_bylevel": {
    "value": 0.41992723239336704
  },
  "early_stopping_rounds": {
    "value": None
  }
}

In [None]:
inputs_params = {}
inputs_params['name'] = name
inputs_params['number'] = number
inputs_params['inputs'] = inputs
with open(f"model_inputs/{name}.pkl", "wb") as f:
    pickle.dump(inputs_params, f)

In [None]:
def load_all_pickles_from_dir(directory):
    data_dict = {}
    for filename in os.listdir(directory):
        if filename.endswith(".pkl"):
            file_path = os.path.join(directory, filename)
            with open(file_path, "rb") as f:
                try:
                    data = pickle.load(f)
                    data_dict[filename.split()[0]] = data
                except Exception as e:
                    print(f"Error loading {filename}: {e}")
    return data_dict

directory_path = "model_inputs"
all_pickles = load_all_pickles_from_dir(directory_path)

print("Loaded files:", list(all_pickles.keys()))


In [None]:
def ETL_fit_save(all_pickles, till_date):
    for model in all_pickles.keys():
        # load params
        number = all_pickles[model]['number']
        inputs = all_pickles[model]['inputs']

        # initiate trg params
        target_symbol = inputs['target_symbol']["value"]
        trade_mode = inputs['trade_mode']["value"]
        trg_look_ahead = int(inputs['look_ahead']["value"])
        trg_take_profit = float(inputs['take_profit']["value"])
        trg_stop_loss = float(inputs['stop_loss']["value"])
        dataset_path = f"{os.path.dirname(os.getcwd())}/Framework data/{target_symbol}/dataset.parquet"
        C5M_data_path = f"{os.path.dirname(os.getcwd())}/Framework data/{target_symbol}"
        n_rand_features=None
        target_col=f"trg_clf_{trade_mode}_{target_symbol}_M{trg_look_ahead}_TP{trg_take_profit}_SL{trg_stop_loss}"
        base_time_frame=5

        # ETL and slice df
        df = ETL(dataset_path,
         C5M_data_path,
         trade_mode,
         target_symbol,
         trg_look_ahead,
         trg_take_profit,
         trg_stop_loss,
         n_rand_features,
         target_col,
         base_time_frame)

        df = df[-180*288:].reset_index(drop=True)  # last 180 days of 5m data
        print(f'df_shape: {df.shape}')
        # fit
        X = df.drop(["symbol", "target"], axis=1)
        y = df["target"]
        model = assign_params(inputs)
        model.fit(X, y)

        # save
        input_cols = dict(df[model.feature_names_in_].dtypes)
        model.save_model(f"models/xgb_model_till_{till_date}-{target_symbol}_{number}_simple.json") 

        info = {}
        info['target_symbol'] = target_symbol
        info['trade_mode'] = 'buy' if trade_mode=='long' else 'sell'
        info['look_ahead'] = trg_look_ahead
        info['take_profit'] = trg_take_profit
        info['stop_loss'] = trg_stop_loss
        info['input_cols'] = input_cols


        with open(f"models/xgb_model_till_{till_date}-{target_symbol}_{number}_simple_info.pkl", "wb") as f:
            pickle.dump(info, f)

def assign_params(inputs):
    model = XGBClassifier(
            colsample_bylevel=float(inputs['colsample_bylevel']["value"]),
            colsample_bynode=float(inputs['colsample_bynode']["value"]),
            colsample_bytree=float(inputs['colsample_bytree']["value"]),
            device='cpu',
            eval_metric=inputs['eval_metric']["value"],
            gamma=int(inputs['gamma']["value"]),
            learning_rate=float(inputs['learning_rate']["value"]),
            max_bin=int(inputs['max_bin']["value"]),
            max_delta_step=int(inputs['max_delta_step']["value"]),
            max_depth=int(inputs['max_depth']["value"]),
            max_leaves=int(inputs['max_leaves']["value"]),
            min_child_weight=int(inputs['min_child_weight']["value"]),
            n_estimators=int(inputs['n_estimators']["value"]),
            n_jobs=1,
            objective=inputs['objective']["value"],
            random_state=int(inputs['random_state']["value"]),
            reg_alpha=int(inputs['reg_alpha']["value"]),
            reg_lambda=int(inputs['reg_lambda']["value"]),
            scale_pos_weight=float(inputs['scale_pos_weight']["value"]),
            subsample=float(inputs['subsample']["value"]),
            tree_method='hist'
        )
    return model


In [None]:
ETL_fit_save(all_pickles, '2025-09-30')