In [None]:
# Select K Algorithm Regression

In [1]:
# Import Modules
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split 
import time
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt
import warnings
# Suppress all warnings
warnings.filterwarnings('ignore')

In [2]:
# Select K Algorithm best on K=n
def selectkbest(indep_X,dep_Y,n):
    test = SelectKBest(score_func=chi2, k=n)
    #fit Model
    fit1= test.fit(indep_X,dep_Y)
    # summarize scores       
    selectk_features = fit1.transform(indep_X)
    return selectk_features

# Standard Scaler Function
def split_scalar(indep_X,dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size = 0.25, random_state = 0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)    
    return X_train, X_test, y_train, y_test
    
# R2 Score prediction
def r2_prediction(regressor,X_test,y_test):
    y_pred = regressor.predict(X_test)
    from sklearn.metrics import r2_score
    r2=r2_score(y_test,y_pred)
    return r2

#Linear model
def Linear(X_train,y_train,X_test):       
    # Fitting K-NN to the Training set
    from sklearn.linear_model import LinearRegression
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2 
    
#svm_linear model    
def svm_linear(X_train,y_train,X_test):                
    from sklearn.svm import SVR
    regressor = SVR(kernel = 'linear')
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2  
    
#svm_nonlinear model     
def svm_NL(X_train,y_train,X_test):                
    from sklearn.svm import SVR
    regressor = SVR(kernel = 'rbf')
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2  
     
# Decision Tree model
def Decision(X_train,y_train,X_test):        
    # Fitting K-NN to the Training setC
    from sklearn.tree import DecisionTreeRegressor
    regressor = DecisionTreeRegressor(random_state = 0)
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2  
     
# Random forest model
def random(X_train,y_train,X_test):       
    # Fitting K-NN to the Training set
    from sklearn.ensemble import RandomForestRegressor
    regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2 
    
#table for compare model  
def selectk_regression(acclin,accsvml,accsvmnl,accdes,accrf):     
    # Create base DataFrame
    dataframe = pd.DataFrame(
        index=['ChiSquare'],
        columns=['Linear', 'SVMl', 'SVMnl', 'DecisionTree', 'RandomForest']
    )
    # Fill values
    for number, idx in enumerate(dataframe.index):       
        dataframe.at[idx, 'Linear'] = acclin[number]
        dataframe.at[idx, 'SVMl'] = accsvml[number]
        dataframe.at[idx, 'SVMnl'] = accsvmnl[number]
        dataframe.at[idx, 'DecisionTree'] = accdes[number]
        dataframe.at[idx, 'RandomForest'] = accrf[number]
    return dataframe

In [9]:
# Load dataset
dataset=pd.read_csv("prep.csv",index_col=None)

# Copy dataset to df2
df2=dataset

#make lableing using one hot lable
df2 = pd.get_dummies(df2, drop_first=True).astype(int)

#split independent X (all features) , Y dependent
indep_X=df2.drop(columns=['classification_yes'])
dep_Y=df2[['classification_yes']]

# Call function for K best
kbest=selectkbest(indep_X,dep_Y,7)  

acclin=[]
accsvml=[]
accsvmnl=[]
accdes=[]
accrf=[]

#split Train and test
X_train, X_test, y_train, y_test=split_scalar(kbest,dep_Y) 

#after K feature section make all regression model
for i in kbest:   
    r2_lin=Linear(X_train,y_train,X_test)
    acclin.append(r2_lin)
    
    r2_sl=svm_linear(X_train,y_train,X_test)    
    accsvml.append(r2_sl)
    
    r2_NL=svm_NL(X_train,y_train,X_test)
    accsvmnl.append(r2_NL)
    
    r2_d=Decision(X_train,y_train,X_test)
    accdes.append(r2_d)
    
    r2_r=random(X_train,y_train,X_test)
    accrf.append(r2_r)
        
result=selectk_regression(acclin,accsvml,accsvmnl,accdes,accrf)

result

Unnamed: 0,Linear,SVMl,SVMnl,DecisionTree,RandomForest
ChiSquare,0.657273,0.643479,0.902678,0.739583,0.933594


# Result

In [4]:
# k=4
result

Unnamed: 0,Linear,SVMl,SVMnl,DecisionTree,RandomForest
ChiSquare,0.303584,0.253857,0.387241,0.045139,0.486111


In [6]:
# K=5
result

Unnamed: 0,Linear,SVMl,SVMnl,DecisionTree,RandomForest
ChiSquare,0.562265,0.555947,0.75309,0.739583,0.811198


In [8]:
# K=6
result

Unnamed: 0,Linear,SVMl,SVMnl,DecisionTree,RandomForest
ChiSquare,0.607607,0.593177,0.844561,0.782986,0.924479


In [10]:
# K=7
result

Unnamed: 0,Linear,SVMl,SVMnl,DecisionTree,RandomForest
ChiSquare,0.657273,0.643479,0.902678,0.739583,0.933594
