In [1]:
from IPython.utils import io
from IPython import get_ipython
with io.capture_output() as captured:  
   get_ipython().run_line_magic('run', '2.Feature_Extraction.ipynb')

# Imports

In [2]:
import os
import numpy as np
import pandas as pd

import cupy as cp
import optuna
from catboost import CatBoostClassifier, Pool
import lightgbm as lgb

# Config

In [3]:
from config import run_config

run_config()

# High Accuracy Pipeline (XGBoost/CatBoost)

In [None]:
def objective_catboost(trial):
    params = {
        "iterations": trial.suggest_int("iterations", 100, 1000),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 1e-1, log=True),
        "depth": trial.suggest_int("depth", 4, 10),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-8, 100.0, log=True),        
        "task_type": trial.suggest_categorical("task_type", ["GPU" if cp.cuda.is_available() else "CPU"]),

        # "bootstrap_type": trial.suggest_categorical("bootstrap_type", ["Bayesian"]),
        # "random_strength": trial.suggest_float("random_strength", 1e-8, 10.0, log=True),
        # "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 10.0),
        # "od_type": trial.suggest_categorical("od_type", ["IncToDec", "Iter"]),
        # "od_wait": trial.suggest_int("od_wait", 10, 50),

        "verbose": False
    }
    model = CatBoostClassifier(**params)
    model.fit(X_train, y_train, eval_set=(X_val, y_val), early_stopping_rounds=50, verbose=False)
    return model.score(X_val, y_val)

study = optuna.create_study(
    study_name="catboost_hyperopt",
    # storage="sqlite:///data/optuna/catboost_hyperopt.db", 
    # load_if_exists=True,
    direction="maximize",
    # sampler=optuna.samplers.TPESampler(seed=42),
)
study.optimize(objective_catboost, n_trials=20)

best_model_catboost = CatBoostClassifier(**study.best_params, verbose=False)
best_model_catboost.fit(X_train, y_train)

[I 2025-04-16 22:30:27,331] A new study created in memory with name: catboost_hyperopt
[W 2025-04-16 22:30:27,376] Trial 7 failed with parameters: {'iterations': 520, 'learning_rate': 0.007499018972067938, 'depth': 8, 'l2_leaf_reg': 3.744916014717621e-05, 'task_type': 'GPU'} because of the following error: CatBoostError("catboost/cuda/cuda_lib/cuda_manager.cpp:201: Condition violated: `State == nullptr'").
Traceback (most recent call last):
  File "e:\College\4- Senior 2\Semester 2\Pattern\Project\venv\lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\ahmed\AppData\Local\Temp\ipykernel_17544\172854637.py", line 18, in objective_catboost
    model.fit(X_train, y_train, eval_set=(X_val, y_val), early_stopping_rounds=50, verbose=False)
  File "e:\College\4- Senior 2\Semester 2\Pattern\Project\venv\lib\site-packages\catboost\core.py", line 5245, in fit
    self._fit(X, y, cat_features, text_features, embedding_features,

CatBoostError: catboost/cuda/cuda_lib/devices_provider.h:190: Error: device already requested 0

# High Speed Pipeline (LightGBM)

In [None]:
def objective_lightgbm(trial):
    params = {
        "objective": "multiclass",
        "num_class": len(set(y_train)),
        "metric": "multi_logloss",
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.3, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 20, 150),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "n_jobs": -1,
        "verbosity": -1,
    }
    model = lgb.LGBMClassifier(**params)
    model.fit(X_train, y_train, eval_set=[(X_val, y_val)])
    return model.score(X_val, y_val)

study = optuna.create_study(
    study_name="lightgbm_hyperopt",
    # storage="sqlite:///data/optuna/lightgbm_hyperopt.db",
    # load_if_exists=True,
    direction="maximize",
    # sampler=optuna.samplers.TPESampler(seed=42),
)
study.optimize(objective_lightgbm, n_trials=50, n_jobs=-1)

best_model_lgb = lgb.LGBMClassifier(**study.best_params, verbosity=-1)
best_model_lgb.fit(X_train, y_train)

[I 2025-04-16 22:27:02,536] A new study created in memory with name: lightgbm_hyperopt
[I 2025-04-16 22:27:03,027] Trial 0 finished with value: 0.8235294117647058 and parameters: {'learning_rate': 0.015914266280346534, 'num_leaves': 134, 'max_depth': 3}. Best is trial 0 with value: 0.8235294117647058.
[I 2025-04-16 22:27:03,460] Trial 1 finished with value: 0.8725490196078431 and parameters: {'learning_rate': 0.029443512309972247, 'num_leaves': 88, 'max_depth': 5}. Best is trial 1 with value: 0.8725490196078431.
[I 2025-04-16 22:27:03,520] Trial 2 finished with value: 0.7450980392156863 and parameters: {'learning_rate': 0.00229732928485254, 'num_leaves': 81, 'max_depth': 6}. Best is trial 1 with value: 0.8725490196078431.
[I 2025-04-16 22:27:03,592] Trial 11 finished with value: 0.7450980392156863 and parameters: {'learning_rate': 0.0010513502792759203, 'num_leaves': 107, 'max_depth': 6}. Best is trial 1 with value: 0.8725490196078431.
[I 2025-04-16 22:27:03,667] Trial 3 finished with 

In [24]:
best_model_lgb.score(X_test, y_test), best_model_catboost.score(X_test, y_test)



(0.9333333333333333, 0.9555555555555556)

In [None]:
classification_report(y_test, y_pred)