In [None]:
# Feature Selection Recursive Feature Elimination

In [2]:
# Import Modules
import pandas as pd
from sklearn.model_selection import train_test_split 
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt
import warnings
# Suppress all warnings
warnings.filterwarnings('ignore')

In [5]:
# Recursive Feature Elimination
def rfeFeature(indep_X,dep_Y,n):
    #Module
    from sklearn.feature_selection import RFE
    from sklearn.linear_model import LinearRegression
    from sklearn.svm import SVR
    from sklearn.tree import DecisionTreeRegressor
    from sklearn.ensemble import RandomForestRegressor
    
    rfelist=[]

    # Models
    lin = LinearRegression()
    SVRl = SVR(kernel='linear')
    dec = DecisionTreeRegressor(random_state=0)
    rf = RandomForestRegressor(n_estimators=10, random_state=0)
     
    # Model list
    rfemodellist=[lin,SVRl,dec,rf]
    # Perform RFE for each model
    for model in   rfemodellist:
        print(f"Running RFE for: {model.__class__.__name__}")
        log_rfe = RFE(estimator=model, n_features_to_select=n)  
        log_fit = log_rfe.fit(indep_X, dep_Y)
        log_rfe_feature = log_fit.transform(indep_X)
        rfelist.append(log_rfe_feature)
    return rfelist
    
# Standard Scaler Function
def split_scalar(indep_X,dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size = 0.25, random_state = 0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)    
    return X_train, X_test, y_train, y_test
    
# R2 Score prediction
def r2_prediction(regressor,X_test,y_test):
    y_pred = regressor.predict(X_test)
    from sklearn.metrics import r2_score
    r2=r2_score(y_test,y_pred)
    return r2

#Linear model
def Linear(X_train,y_train,X_test):       
    # Fitting K-NN to the Training set
    from sklearn.linear_model import LinearRegression
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2 
    
#svm_linear model    
def svm_linear(X_train,y_train,X_test):                
    from sklearn.svm import SVR
    regressor = SVR(kernel = 'linear')
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2  
    
#svm_nonlinear model     
def svm_NL(X_train,y_train,X_test):                
    from sklearn.svm import SVR
    regressor = SVR(kernel = 'rbf')
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2  
     
# Decision Tree model
def Decision(X_train,y_train,X_test):        
    # Fitting K-NN to the Training setC
    from sklearn.tree import DecisionTreeRegressor
    regressor = DecisionTreeRegressor(random_state = 0)
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2  
     
# Random forest model
def random(X_train,y_train,X_test):       
    # Fitting K-NN to the Training set
    from sklearn.ensemble import RandomForestRegressor
    regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2 
    
#table for compare model 
def rfe_regression(acclog, accsvml, accdes, accrf):
    # Create DataFrame with proper index and column names
    rfedataframe = pd.DataFrame(
        index=['Linear', 'SVR', 'DecisionTree', 'RandomForest'],
        columns=['Linear', 'SVMl', 'DecisionTree', 'RandomForest']
    )
    # Fill the DataFrame using enumerate for indexing
    for number, idx in enumerate(rfedataframe.index):
        rfedataframe.loc[idx, 'Linear'] = acclog[number]
        rfedataframe.loc[idx, 'SVMl'] = accsvml[number]
        rfedataframe.loc[idx, 'DecisionTree'] = accdes[number]
        rfedataframe.loc[idx, 'RandomForest'] = accrf[number]        
    return rfedataframe

In [8]:
# Load dataset
dataset=pd.read_csv("prep.csv",index_col=None)

# Copy dataset to df2
df2=dataset

#make lableing using one hot lable
df2 = pd.get_dummies(df2, drop_first=True).astype(int)

#split independent X (all features) , Y dependent
indep_X=df2.drop(columns=['classification_yes'])
dep_Y=df2[['classification_yes']]

# Call function for K best
rfelist=rfeFeature(indep_X,dep_Y,5)       

acclin=[]
accsvml=[]
accsvmnl=[]
accdes=[]
accrf=[]

#print(rfelist)


#after K feature section make all regression model
for i in rfelist: 
    #split Train and test
    X_train, X_test, y_train, y_test=split_scalar(i,dep_Y)  
    r2_lin=Linear(X_train,y_train,X_test)
    acclin.append(r2_lin)
    
    r2_sl=svm_linear(X_train,y_train,X_test)    
    accsvml.append(r2_sl)
    
    r2_NL=svm_NL(X_train,y_train,X_test)
    accsvmnl.append(r2_NL)
    
    r2_d=Decision(X_train,y_train,X_test)
    accdes.append(r2_d)
    
    r2_r=random(X_train,y_train,X_test)
    accrf.append(r2_r)
    
    
result=rfe_regression(acclin,accsvml,accdes,accrf)

result

Running RFE for: LinearRegression
Running RFE for: SVR
Running RFE for: DecisionTreeRegressor
Running RFE for: RandomForestRegressor


Unnamed: 0,Linear,SVMl,DecisionTree,RandomForest
Linear,0.620124,0.457136,0.77924,0.780135
SVR,0.604508,0.456871,0.776474,0.776745
DecisionTree,0.696888,0.658658,0.913194,0.917951
RandomForest,0.696888,0.658658,0.913194,0.917951


# Result

In [7]:
# k=3
result

Unnamed: 0,Linear,SVMl,DecisionTree,RandomForest
Linear,0.441961,0.262153,0.441961,0.441816
SVR,0.441961,0.262153,0.441961,0.441816
DecisionTree,0.639735,0.613769,0.910482,0.921179
RandomForest,0.639735,0.613769,0.910482,0.921179


In [9]:
# K=5
result

Unnamed: 0,Linear,SVMl,DecisionTree,RandomForest
Linear,0.620124,0.457136,0.77924,0.780135
SVR,0.604508,0.456871,0.776474,0.776745
DecisionTree,0.696888,0.658658,0.913194,0.917951
RandomForest,0.696888,0.658658,0.913194,0.917951
