In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

In [22]:
def cm_prediction(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
        
    # Making the Confusion Matrix
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y_test, y_pred)
        
    from sklearn.metrics import accuracy_score, classification_report
    Accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    return classifier, Accuracy, report, cm

def logistic(X_train, y_train, X_test, y_test):
    classifier = LogisticRegression(random_state=0)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, cm

def svm_linear(X_train, y_train, X_test, y_test):
    classifier = SVC(kernel='linear', random_state=0)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, cm

def svm_NL(X_train, y_train, X_test, y_test):
    classifier = SVC(kernel='rbf', random_state=0)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, cm

def Navie(X_train, y_train, X_test, y_test):
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, cm

def knn(X_train, y_train, X_test, y_test):
    classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, cm

def Decision(X_train, y_train, X_test, y_test):
    classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, cm

def random(X_train, y_train, X_test, y_test):
    classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    classifier, Accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, Accuracy, report, cm

def rfeFeature(indep_X, dep_Y, n):
    rfelist = []
    
    log_model = LogisticRegression(solver='lbfgs')
    RF = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    DT = DecisionTreeClassifier(criterion='gini', max_features='sqrt', splitter='best', random_state=0)
    svc_model = SVC(kernel='linear', random_state=0)
    
    rfemodellist = [log_model, svc_model, RF, DT]
    
    for i in rfemodellist:
        print(i)
        log_rfe = RFE(estimator=i, n_features_to_select=n)
        log_fit = log_rfe.fit(indep_X, dep_Y)
        log_rfe_feature = log_fit.transform(indep_X)
        rfelist.append(log_rfe_feature)
    
    return rfelist

def rfe_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf): 
    rfedataframe = pd.DataFrame(index=['Logistic', 'SVC', 'Random', 'DecisionTree'],
                                 columns=['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Navie', 'Decision', 'Random'])
    
    for number, idex in enumerate(rfedataframe.index):
        rfedataframe['Logistic'][idex] = acclog[number]       
        rfedataframe['SVMl'][idex] = accsvml[number]
        rfedataframe['SVMnl'][idex] = accsvmnl[number]
        rfedataframe['KNN'][idex] = accknn[number]
        rfedataframe['Navie'][idex] = accnav[number]
        rfedataframe['Decision'][idex] = accdes[number]
        rfedataframe['Random'][idex] = accrf[number]
    
    return rfedataframe

def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)    
    return X_train, X_test, y_train, y_test

dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = pd.get_dummies(dataset1, drop_first=True)

indep_X = df2.drop('classification_yes', 1)
dep_Y = df2['classification_yes']
rfelist = rfeFeature(indep_X, dep_Y, 6)

# Step 2 and 3: Train classification models on each feature set
acclog = []
accsvml = []
accsvmnl = []
accknn = []
accnav = []
accdes = []
accrf = []

for i in rfelist:
    X_train, X_test, y_train, y_test = split_scalar(i, dep_Y)   

    classifier, Accuracy, report, cm = logistic(X_train, y_train, X_test, y_test)
    acclog.append(Accuracy)

    classifier, Accuracy, report, cm = svm_linear(X_train, y_train, X_test, y_test)  
    accsvml.append(Accuracy)

    classifier, Accuracy, report, cm = svm_NL(X_train, y_train, X_test, y_test)  
    accsvmnl.append(Accuracy)

    classifier, Accuracy, report, cm = knn(X_train, y_train, X_test, y_test)  
    accknn.append(Accuracy)

    classifier, Accuracy, report, cm = Navie(X_train, y_train, X_test, y_test)  
    accnav.append(Accuracy)

    classifier, Accuracy, report, cm = Decision(X_train, y_train, X_test, y_test)  
    accdes.append(Accuracy)

    classifier, Accuracy, report, cm = random(X_train, y_train, X_test, y_test)  
    accrf.append(Accuracy)


  indep_X = df2.drop('classification_yes', 1)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer t

LogisticRegression()
SVC(kernel='linear', random_state=0)
RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)
DecisionTreeClassifier(max_features='sqrt', random_state=0)


In [23]:
result=rfe_Classification(acclog,accsvml,accsvmnl,accknn,accnav,accdes,accrf)

In [15]:
result
#3

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
Logistic,0.94,0.94,0.94,0.94,0.94,0.94,0.94
SVC,0.87,0.87,0.87,0.87,0.87,0.87,0.87
Random,0.94,0.94,0.94,0.94,0.9,0.91,0.92
DecisionTree,0.98,0.98,0.98,0.98,0.79,0.97,0.97


In [18]:
result
#4

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
Logistic,0.95,0.95,0.95,0.95,0.95,0.95,0.95
SVC,0.96,0.96,0.96,0.96,0.96,0.96,0.96
Random,0.97,0.97,0.97,0.97,0.87,0.95,0.97
DecisionTree,0.98,0.98,0.92,0.98,0.81,0.98,0.98


In [21]:
result
#5

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
Logistic,0.98,0.98,0.98,0.98,0.98,0.98,0.98
SVC,0.99,0.99,0.99,0.99,0.99,0.99,0.99
Random,0.97,0.97,0.98,0.97,0.91,0.96,0.98
DecisionTree,0.95,0.98,0.93,0.94,0.85,0.97,0.98


In [24]:
result
#6

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
Logistic,0.98,0.98,0.98,0.98,0.98,0.99,0.98
SVC,0.99,0.99,0.99,0.99,0.99,0.99,0.99
Random,0.98,0.98,0.99,0.96,0.92,0.95,0.98
DecisionTree,0.96,0.96,0.97,0.97,0.85,0.97,0.96
