In [1]:
import pandas as pd
from sklearn.datasets import load_iris  # changed from breast_cancer to iris
from sklearn.linear_model import LogisticRegression
from mlxtend.feature_selection import ExhaustiveFeatureSelector
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, f1_score, make_scorer

In [2]:
# Load Iris dataset
X, y = load_iris(return_X_y=True)
X = pd.DataFrame(X, columns=load_iris().feature_names)

In [3]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

In [4]:
# Define model
model = LogisticRegression(max_iter=5000, multi_class='auto', solver='liblinear')

In [5]:
# Define stratified cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [6]:
scorer = make_scorer(f1_score, average='macro')

In [9]:
# Automated feature-count tuning
best_score = 0
best_k = None
best_selector = None

for k in range(1, 5):  # adjust max features for Iris dataset
    print(f"\nRunning Exhaustive Search for k={k}")
    efs = ExhaustiveFeatureSelector(
        model,
        min_features=k,
        max_features=k,
        scoring=scorer,
        print_progress=True,
        cv=cv,
        n_jobs=-1
    )
    efs.fit(X_train, y_train)

    if efs.best_score_ > best_score:
        best_score = efs.best_score_
        best_k = k
        best_selector = efs


Running Exhaustive Search for k=1


Features: 4/4


Running Exhaustive Search for k=2


Features: 1/1


Running Exhaustive Search for k=3

Running Exhaustive Search for k=4


In [10]:
best_idx = list(best_selector.best_idx_)
best_features = X.columns[best_idx]
print(f"\nBest k: {best_k}")
print("Best features:", list(best_features))
print(f"Best CV F1-Score: {best_score:.4f}")


Best k: 4
Best features: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Best CV F1-Score: 0.9624


In [11]:
model.fit(X_train[best_features], y_train)
y_pred = model.predict(X_test[best_features])
test_f1 = f1_score(y_test, y_pred, average='macro')
print(f"Test F1-Score with {len(best_features)} features: {test_f1:.4f}")

Test F1-Score with 4 features: 0.8949


