In [5]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

In [2]:
X, y = load_breast_cancer(return_X_y=True)
feature_names = load_breast_cancer().feature_names

In [8]:
rfc = RandomForestClassifier(n_estimators=100, random_state=42)
rfe = RFE(estimator=rfc, n_features_to_select=10, step=1)
rfe = rfe.fit(X, y)

In [9]:
# Print selected features
selected_features = [
    feature_names[i] for i in range(len(feature_names)) if rfe.support_[i]
]
print("Selected features:", selected_features)

Selected features: [np.str_('mean perimeter'), np.str_('mean area'), np.str_('mean concavity'), np.str_('mean concave points'), np.str_('worst radius'), np.str_('worst texture'), np.str_('worst perimeter'), np.str_('worst area'), np.str_('worst concavity'), np.str_('worst concave points')]


In [12]:
scores = cross_val_score(rfc, X[:, rfe.support_], y, cv=5)
print(f"Mean cross-validation score: {scores.mean():.4f}")

Mean cross-validation score: 0.9649


In [13]:
scores

array([0.93859649, 0.95614035, 0.98245614, 0.97368421, 0.97345133])