In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler 
from mlxtend.feature_selection import SequentialFeatureSelector as SFS 
from sklearn.linear_model import LogisticRegression 

#Import Data
hl = pd.read_excel('modified.xlsx')

#Drop unnecessay column 
hl = hl.drop(columns=(['Unnamed: 0']), axis =1)

#Seperate features into categorical and numerical
categorical_features = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area','Credit_History','Loan_Amount_Term']

numerical_features =  ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']

#Convert categorical features into indicator variables(0 or 1)
home_encoded = pd.get_dummies(hl, drop_first=True)

#Drop all null values from data set
home_encoded = home_encoded.dropna()

#Save modified data set to new excel spread sheet
home_encoded.to_excel("SFS.xlsx")

#Setting X to all descriptive features except Loan_Status_Y
X = home_encoded.drop(columns= 'Loan_Status_Y')

#Setting y to Target Feature
y = home_encoded['Loan_Status_Y']

#Splitting Dataset into Test and Train
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,stratify =y,random_state =1)

#Instantiating function to Transform features by scaling each featur into a given range
scaler = MinMaxScaler() 
X_train = scaler.fit_transform(X_train) 
X_test = scaler.transform(X_test)

#Adds featurs to form a feature subset in a greedy fashion
sfs = SFS(LogisticRegression(solver='lbfgs', max_iter=300),
          k_features=5, 
          forward=True, 
          floating = False, 
          verbose=2,
          scoring = 'accuracy', 
          cv = 0)

sfs.fit(X, y) 
sfs.k_feature_names_


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  14 out of  14 | elapsed:    0.0s finished

[2023-09-10 23:22:39] Features: 1/5 -- score: 0.8147448015122873[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:    0.0s finished

[2023-09-10 23:22:40] Features: 2/5 -- score: 0.8147448015122873[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.1s finished

[2023-09-10 23:22:40] Features: 3/5 -- score: 0.8147448015122873[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 

('ApplicantIncome',
 'CoapplicantIncome',
 'LoanAmount',
 'Credit_History',
 'Property_Area_Urban')