# Feature Selection using f_regression

In [8]:
from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import f_regression

# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Perform feature selection using f_regression
f_scores, p_values = f_regression(X, y)

# Get the feature importance scores
feature_importance = abs(f_scores)

# Sort the features by importance in descending order
sorted_indices = feature_importance.argsort()[::-1]

# Print the top 10 feature importance scores and names
for i in sorted_indices[:10]:
    print(f"{data.feature_names[i]}: {feature_importance[i]:.2f}")

worst concave points: 964.39
worst perimeter: 897.94
mean concave points: 861.68
worst radius: 860.78
mean perimeter: 697.24
worst area: 661.60
mean radius: 646.98
mean area: 573.06
mean concavity: 533.79
worst concavity: 436.69


# RFE Feature Selection

In [18]:
from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Create a logistic regression estimator
estimator = LogisticRegression(max_iter=2000)

# Perform feature selection using RFE
selector = RFE(estimator, n_features_to_select=10, step=1)
selector = selector.fit(X, y)

# Get the selected feature indices
selected_feature_indices = selector.support_

# Print the selected feature names
selected_feature_names = [data.feature_names[i] for i in range(len(data.feature_names)) if selected_feature_indices[i]]
print("Selected features:", selected_feature_names)

Selected features: ['mean radius', 'mean compactness', 'mean concavity', 'texture error', 'worst radius', 'worst smoothness', 'worst compactness', 'worst concavity', 'worst concave points', 'worst symmetry']
