# 🔍 Hyperparameter Tuning with Optuna
Optimize LightGBM model on engineered features

In [None]:
!pip install optuna lightgbm pandas scikit-learn


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import optuna
from lightgbm import LGBMClassifier

# Load data
df = pd.read_csv('../data/labeled_events_enhanced.csv', parse_dates=['date'])
df = df[df['event_label'] != 'neutral']
df['event_label'] = df['event_label'].astype('category')
df['target'] = df['event_label'].cat.codes

features = [
    'open', 'close', 'high', 'low', 'volume', 'avg_sentiment', 'tx_spike',
    'daily_return', 'volatility', 'sentiment_volatility', 'tweet_count',
    'whale_tx_count', 'bot_tx_flag', 'rsi', 'bollinger_upper',
    'bollinger_lower', 'daily_return_lag1', 'volume_lag1', 'avg_sentiment_lag1'
]

X = df[features]
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)


In [None]:
def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'num_leaves': trial.suggest_int('num_leaves', 20, 300),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'random_state': 42
    }
    model = LGBMClassifier(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    return accuracy_score(y_test, preds)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)


In [None]:
print("Best trial:")
print(study.best_trial)
