In [None]:
import optuna
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder

# Load the Iris dataset
iris = load_iris()

# Create feature and target arrays
X = iris.data
y = iris.target

# Checking class balance
class_balance = pd.Series(y).value_counts(normalize=True)
print(f"Class balance: \n{class_balance}")

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)

# KFold cross-validation
kf = KFold(n_splits=3)

def evaluate(model, X_train, y_train, X_test, y_test):
    # Fit the model and make predictions
    model.fit(X_train, y_train)
    train_preds = model.predict(X_train)
    test_preds = model.predict(X_test)

    # Calculate metrics
    metrics = {
        "accuracy_train": accuracy_score(y_train, train_preds),
        "accuracy_test": accuracy_score(y_test, test_preds),
        "f1_train": f1_score(y_train, train_preds, average='macro'),
        "f1_test": f1_score(y_test, test_preds, average='macro'),
    }

    return metrics

def objective_xgboost(trial):
    # Suggest hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.1)
    max_depth = trial.suggest_int("max_depth", 5, 20)
    n_estimators = trial.suggest_int("n_estimators", 100, 200)
    min_child_weight = trial.suggest_int('min_child_weight', 1, 6)
    
    model = XGBClassifier(learning_rate=learning_rate, max_depth=max_depth, n_estimators=n_estimators, 
                          min_child_weight=min_child_weight, random_state=42, objective='multi:softmax', num_class=3)

    metrics = evaluate(model, X_train, y_train, X_test, y_test)

    # Set additional attributes to trial
    for key, value in metrics.items():
        trial.set_user_attr(key, value)
    
    return metrics["accuracy_test"]

# Create the optuna study
study = optuna.create_study(direction="maximize")
study.optimize(objective_xgboost, n_trials=10)

# Print the result
best_trials = study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.COMPLETE])[:5]

# Collecting the results to a dataframe
results_df = pd.DataFrame()
for i, trial in enumerate(best_trials):
    results_df.loc[i, 'Trial'] = trial.number
    for key in trial.user_attrs:
        results_df.loc[i, key] = trial.user_attrs[key]
    results_df.loc[i, 'Hyperparameters'] = str(trial.params)

print(results_df)
