In [None]:
import pandas as pd
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler  # Import StandardScaler

# Assuming 'prep.csv' is in the same directory or provide the full path
dataset = pd.read_csv("prep.csv", index_col=None)
df1 = dataset

# Convert categorical variable into dummy/indicator variables.
df1 = pd.get_dummies(df1, drop_first=True, dtype=int)
print(df1.head())

# separation of independent and dependent variables
indep = df1.drop("classification_yes", axis=1)
dep = df1["classification_yes"]

# Scale the independent variables
scaler = StandardScaler()
indep_scaled = scaler.fit_transform(indep)
indep_scaled = pd.DataFrame(indep_scaled, columns=indep.columns) # Convert back to DataFrame for column names

# Instantiate the RandomForestClassifier
rf_classifier = RandomForestClassifier()

def backward_selection(indep, dep, n, estimator):
    """
    Performs backward feature selection.

    Args:
        indep (pd.DataFrame): Independent variables (scaled).
        dep (pd.Series): Dependent variable.
        n (int): Number of features to select.
        estimator: An instance of a scikit-learn estimator for feature selection.

    Returns:
        tuple: A tuple containing the new feature matrix and the list of selected features.
    """
    logistic_model = LogisticRegression(solver='saga', max_iter=5000)  # Increased max_iter
    bs = SequentialFeatureSelector(estimator=estimator, n_features_to_select=n, direction='backward', scoring=None, cv=5, n_jobs=1)
    fit1 = bs.fit(indep, dep)
    x_new = fit1.transform(indep)
    selected_features = indep.columns[fit1.get_support()]
    return x_new, selected_features

# Use the instance in SequentialFeatureSelector, passing the scaled data
x_new, selected_features = backward_selection(indep_scaled, dep, 3, rf_classifier)
print("Selected Features:", selected_features)
print("New feature matrix (x_new):\n", x_new)