In [19]:
import optuna
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import (
    SelectKBest,
    mutual_info_classif,
)
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import log_loss
from functools import partial

In [6]:
X, y = load_breast_cancer(return_X_y=True, as_frame=True)

In [20]:
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

def precompute_mutual_information(X, y, cv):
    mi_dict = {}
    for fold_idx, (train_idx, _) in enumerate(cv.split(X, y)):
        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]

        mi = mutual_info_classif(X_train, y_train, random_state=42)
        mi_dict[fold_idx] = mi
    
    return mi_dict

def objective(trial, X, y, cv, mi_dict):
    k = trial.suggest_int("k", 1, X.shape[1])
    C = trial.suggest_float("C", 1e-5, 1e5, log=True)
    clf = Pipeline(
        steps=[
            ("scaler", StandardScaler()),
            ("estimator", LogisticRegression(random_state=42, max_iter=300, C=C, penalty="l2")),
        ]
    )

    def fake_mutual_info_classif(X, y, fold_idx, mi_dict):
        return mi_dict[fold_idx]

    cv_scores = []
    for fold_idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):
        X_train_fold, y_train_fold = X.iloc[train_idx], y.iloc[train_idx]
        X_test_fold, y_test_fold = X.iloc[test_idx], y.iloc[test_idx]

        mutual_info_func = partial(fake_mutual_info_classif, fold_idx=fold_idx, mi_dict=mi_dict)
        selector = SelectKBest(mutual_info_func, k=k)
        X_train_fold_selected = selector.fit_transform(X_train_fold, y_train_fold)
        X_test_fold_selected = selector.transform(X_test_fold)
        clf.fit(X_train_fold_selected, y_train_fold)

        y_pred = clf.predict_proba(X_test_fold_selected)
        loss = log_loss(y_test_fold, y_pred)
        cv_scores.append(loss)
    
    return sum(cv_scores) / len(cv_scores)

mi_dict = precompute_mutual_information(X, y, cv)
study = optuna.create_study(direction="minimize")
study.optimize(partial(objective, X=X, y=y, cv=cv, mi_dict=mi_dict), n_trials=100)

print(f"Best parameters: {study.best_params}")
print(f"Best score: {study.best_value}")

[I 2025-03-16 01:38:18,821] A new study created in memory with name: no-name-961b9880-f6ed-4c1f-bcba-d72e1319a022
[I 2025-03-16 01:38:18,926] Trial 0 finished with value: 0.27319204966316624 and parameters: {'k': 6, 'C': 0.0063490024569489425}. Best is trial 0 with value: 0.27319204966316624.
[I 2025-03-16 01:38:19,021] Trial 1 finished with value: 0.08145161605462603 and parameters: {'k': 20, 'C': 7.236643437811068}. Best is trial 1 with value: 0.08145161605462603.
[I 2025-03-16 01:38:19,108] Trial 2 finished with value: 0.08390416675044451 and parameters: {'k': 29, 'C': 0.19751394782146522}. Best is trial 1 with value: 0.08145161605462603.
[I 2025-03-16 01:38:19,184] Trial 3 finished with value: 0.38430100163626524 and parameters: {'k': 11, 'C': 0.001164789039640344}. Best is trial 1 with value: 0.08145161605462603.
[I 2025-03-16 01:38:19,288] Trial 4 finished with value: 0.12895622533451806 and parameters: {'k': 10, 'C': 72.235457880887}. Best is trial 1 with value: 0.08145161605462

Best parameters: {'k': 22, 'C': 6.830933024885389}
Best score: 0.06900212997132803
