In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=25, n_informative=5, n_redundant=2, n_classes=2, random_state=42)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the model
model = LogisticRegression(max_iter=500)

# Initialize RFE with the logistic regression model and desired number of features
# Here, we will select 5 features as an example
rfe = RFE(estimator=model, n_features_to_select=5, step=1)

# Fit RFE
rfe.fit(X_train, y_train)

# Make predictions
y_pred = rfe.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Plot the ranking of features
plt.figure(figsize=(10, 6))
plt.title("Feature Importance Ranking")
ranking = rfe.ranking_
plt.bar(range(X.shape[1]), ranking)
plt.xlabel('Feature Index')
plt.ylabel('Importance Rank')
plt.show()

# Optionally, show which features were selected
selected_features_indices = np.where(rfe.support_)[0]
print(f'Selected features indices: {selected_features_indices}')


#This script does the following:
#Generates a synthetic dataset: It creates a dataset with 25 features, where 5 are informative, and 2 are redundant.
#Splits the dataset: It divides the dataset into a training set and a test set.
#Initializes and fits RFE: It uses a logistic regression model for feature ranking and selection, specifying to select 5 features.
#Evaluates the model: It calculates and prints the accuracy of the model on the test set.
#Plots feature importance: It creates a bar plot showing the ranking of features according to their importance in prediction, as determined by RFE.
#Identifies selected features: It prints the indices of the features that were selected by RFE as most important.