In [1]:

# RFE Regression Example

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
df = pd.read_csv("prep.csv")

# Convert categorical variables
df = pd.get_dummies(df, drop_first=True)

# Define features and target
target_column = df.columns[-1]
X = df.drop(target_column, axis=1)
y = df[target_column]

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply RFE
model = LinearRegression()
rfe = RFE(model, n_features_to_select=5)
X_train_rfe = rfe.fit_transform(X_train_scaled, y_train)
X_test_rfe = rfe.transform(X_test_scaled)

print("Selected features:", rfe.get_support())

# Train regression model
model.fit(X_train_rfe, y_train)
y_pred = model.predict(X_test_rfe)

print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))


Selected features: [False False False False False False False False False  True False False
 False False  True  True False False False False False  True  True False
 False False False]
MSE: 0.06540980411840389
R2 Score: 0.7381971567493528
