In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
   
def featureImportanceFeature(indep_X, dep_Y, n):
        filist = []
        
        # Define tree-based models
        RF = RandomForestClassifier(n_estimators=100, random_state=42)
        DT = DecisionTreeClassifier(random_state=42)
        GB = GradientBoostingClassifier(n_estimators=100, random_state=42)
        ET = ExtraTreesClassifier(n_estimators=100, random_state=42)
        
        fimodellist = [RF, DT, GB, ET]
        
        for model in fimodellist:
            print(model)
            model.fit(indep_X, dep_Y)
            
            # Get feature importances
            importances = model.feature_importances_
            
            # Sort and select top n features
            importance_df = pd.DataFrame({
                'Feature': indep_X.columns,
                'Importance': importances
            }).sort_values(by='Importance', ascending=False)
            
            top_features = importance_df['Feature'].head(n).tolist()
            print(f"Top {n} features for {type(model).__name__}: {top_features}")
         
            # Transform dataset with selected features
            fi_features = indep_X[top_features].values
            filist.append(fi_features)
        
        return filist


def split_scalar(indep_X,dep_Y):
        X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size = 0.25, random_state = 0)
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)    
        return X_train, X_test, y_train, y_test
    
def cm_prediction(classifier,X_test):
     y_pred = classifier.predict(X_test)
        
        # Making the Confusion Matrix
     from sklearn.metrics import confusion_matrix
     cm = confusion_matrix(y_test, y_pred)
        
     from sklearn.metrics import accuracy_score 
     from sklearn.metrics import classification_report
        
     Accuracy=accuracy_score(y_test, y_pred )
        
     report=classification_report(y_test, y_pred)
     return  classifier,Accuracy,report,X_test,y_test,cm
    
def logistic(X_train,y_train,X_test):       
        # Fitting K-NN to the Training set
        from sklearn.linear_model import LogisticRegression
        classifier = LogisticRegression(random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm      
    
def svm_linear(X_train,y_train,X_test):
                
        from sklearn.svm import SVC
        classifier = SVC(kernel = 'linear', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm
    
def svm_NL(X_train,y_train,X_test):
                
        from sklearn.svm import SVC
        classifier = SVC(kernel = 'rbf', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm

def Navie(X_train,y_train,X_test):       
       
        from sklearn.naive_bayes import GaussianNB
        classifier = GaussianNB()
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm       
    
def knn(X_train,y_train,X_test):
           
        # Fitting K-NN to the Training set
        from sklearn.neighbors import KNeighborsClassifier
        classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm
    
def Decision(X_train,y_train,X_test):

        from sklearn.tree import DecisionTreeClassifier
        classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm      


def random(X_train,y_train,X_test):
        
        from sklearn.ensemble import RandomForestClassifier
        classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm
    
def fi_classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf):
    
    fidataframe = pd.DataFrame(index=['RandomForest','DecisionTree','GradientBoosting','ExtraTrees'],
                               columns=['Logistic','SVMl','SVMnl','KNN','Navie','Decision','Random'])
    
    for number, idex in enumerate(fidataframe.index):
        fidataframe['Logistic'][idex] = acclog[number]
        fidataframe['SVMl'][idex] = accsvml[number]
        fidataframe['SVMnl'][idex] = accsvmnl[number]
        fidataframe['KNN'][idex] = accknn[number]
        fidataframe['Navie'][idex] = accnav[number]
        fidataframe['Decision'][idex] = accdes[number]
        fidataframe['Random'][idex] = accrf[number]
        
    return fidataframe

In [34]:
dataset1=pd.read_csv("prep.csv",index_col=None)

df2=dataset1

df2 = pd.get_dummies(df2, drop_first=True)  

indep_X=df2.drop('classification_yes', 1)
dep_Y=df2['classification_yes']


  indep_X=df2.drop('classification_yes', 1)


In [35]:
df2

Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.000000,76.459948,3.0,0.0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,0,0,0,0,0,0,1,1,0,1
1,3.000000,76.459948,2.0,0.0,148.112676,22.000000,0.700000,137.528754,4.627244,10.700000,...,1,0,0,0,0,0,1,0,0,1
2,4.000000,76.459948,1.0,0.0,99.000000,23.000000,0.600000,138.000000,4.400000,12.000000,...,1,0,0,0,0,0,1,0,0,1
3,5.000000,76.459948,1.0,0.0,148.112676,16.000000,0.700000,138.000000,3.200000,8.100000,...,1,0,0,0,0,0,1,0,1,1
4,5.000000,50.000000,0.0,0.0,148.112676,25.000000,0.600000,137.528754,4.627244,11.800000,...,1,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,0.0,0.0,219.000000,36.000000,1.300000,139.000000,3.700000,12.500000,...,1,0,0,0,0,0,1,0,0,1
395,51.492308,70.000000,0.0,2.0,220.000000,68.000000,2.800000,137.528754,4.627244,8.700000,...,1,0,0,1,1,0,1,0,1,1
396,51.492308,70.000000,3.0,0.0,110.000000,115.000000,6.000000,134.000000,2.700000,9.100000,...,1,0,0,1,1,0,0,0,0,1
397,51.492308,90.000000,0.0,0.0,207.000000,80.000000,6.800000,142.000000,5.500000,8.500000,...,1,0,0,1,1,0,1,0,1,1


In [36]:
filist = featureImportanceFeature(indep_X, dep_Y, 3)

acclog=[]
accsvml=[]
accsvmnl=[]
accknn=[]
accnav=[]
accdes=[]
accrf=[]

RandomForestClassifier(random_state=42)
Top 3 features for RandomForestClassifier: ['hrmo', 'pcv', 'sc']
DecisionTreeClassifier(random_state=42)
Top 3 features for DecisionTreeClassifier: ['hrmo', 'sg_d', 'sg_c']
GradientBoostingClassifier(random_state=42)
Top 3 features for GradientBoostingClassifier: ['hrmo', 'sg_d', 'al']
ExtraTreesClassifier(random_state=42)
Top 3 features for ExtraTreesClassifier: ['htn_yes', 'hrmo', 'dm_yes']


In [37]:
filist

[array([[12.51815562, 38.86890244,  3.07735602],
        [10.7       , 34.        ,  0.7       ],
        [12.        , 34.        ,  0.6       ],
        ...,
        [ 9.1       , 26.        ,  6.        ],
        [ 8.5       , 38.86890244,  6.8       ],
        [16.3       , 53.        ,  1.        ]]),
 array([[12.51815562,  0.        ,  1.        ],
        [10.7       ,  0.        ,  1.        ],
        [12.        ,  0.        ,  0.        ],
        ...,
        [ 9.1       ,  0.        ,  1.        ],
        [ 8.5       ,  0.        ,  0.        ],
        [16.3       ,  0.        ,  0.        ]]),
 array([[12.51815562,  0.        ,  3.        ],
        [10.7       ,  0.        ,  2.        ],
        [12.        ,  0.        ,  1.        ],
        ...,
        [ 9.1       ,  0.        ,  3.        ],
        [ 8.5       ,  0.        ,  0.        ],
        [16.3       ,  0.        ,  0.        ]]),
 array([[ 0.        , 12.51815562,  0.        ],
        [ 0.        , 10

In [38]:
for i in filist:
    X_train, X_test, y_train, y_test = split_scalar(i, dep_Y)
    
    classifier,Accuracy,report,X_test,y_test,cm = logistic(X_train,y_train,X_test)
    acclog.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm = svm_linear(X_train,y_train,X_test)
    accsvml.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm = svm_NL(X_train,y_train,X_test)
    accsvmnl.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm = knn(X_train,y_train,X_test)
    accknn.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm = Navie(X_train,y_train,X_test)
    accnav.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm = Decision(X_train,y_train,X_test)
    accdes.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm = random(X_train,y_train,X_test)
    accrf.append(Accuracy)

fi_result = fi_classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf)


In [39]:
fi_result
#3

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
RandomForest,0.94,0.94,0.94,0.94,0.9,0.91,0.93
DecisionTree,0.99,0.96,0.96,0.99,0.78,0.99,0.99
GradientBoosting,0.98,0.94,0.96,0.98,0.87,0.99,0.96
ExtraTrees,0.96,0.94,0.97,0.95,0.8,0.96,0.94


In [22]:
fi_result
#4

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
RandomForest,0.93,0.93,0.94,0.93,0.91,0.91,0.92
DecisionTree,0.98,0.98,0.97,0.98,0.78,0.96,0.98
GradientBoosting,0.98,0.98,0.99,0.99,0.91,0.95,1.0
ExtraTrees,0.97,0.97,0.92,0.98,0.87,0.97,0.95


In [26]:
fi_result
#5

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
RandomForest,0.97,0.97,0.97,0.96,0.87,0.93,0.97
DecisionTree,0.98,0.99,0.98,0.99,0.94,0.96,0.96
GradientBoosting,0.97,0.97,0.98,1.0,0.91,0.96,0.99
ExtraTrees,0.96,0.96,0.96,0.96,0.95,0.98,0.97


In [14]:
fi_result
#6

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
RandomForest,0.98,0.98,0.99,0.97,0.93,0.96,0.97
DecisionTree,0.99,1.0,0.98,0.98,0.94,0.96,0.97
GradientBoosting,0.98,0.98,0.99,0.98,0.94,0.95,0.99
ExtraTrees,0.97,0.98,0.97,0.98,0.95,0.99,0.98
