In [1]:
import pandas as pd
import numpy as np
import warnings
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [2]:
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('./Data.csv')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [4]:
models = {
    'XGBoost': XGBClassifier(),
    'Logistic Regression': LogisticRegression(),
    'K-NN': KNeighborsClassifier(),
    'SVC': SVC(),
    'GauusianNB': GaussianNB(),
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'CATBoost': CatBoostClassifier(verbose = 0),
} 

In [5]:
results = pd.DataFrame(columns = ['Score'])

In [6]:
for modelName in models:
    estimator = Pipeline((('scaler', StandardScaler()), ('lda', LDA()), ('model', models[modelName])))
    param_grid = {'lda__n_components': np.arange(1, df.columns.size)}

    if modelName == 'K-NN':
        param_grid['model__n_neighbors'] = np.arange(1, 50)
        Score = GridSearchCV(estimator, param_grid, cv = 10, n_jobs = -1).fit(X, y).best_score_
        
    elif modelName == 'XGBoost':
        Score = cross_val_score(XGBClassifier(), X, LabelEncoder().fit_transform(y), cv = 10).mean()
        
    elif modelName == 'SVC':
        param_grid['model__kernel'] = ['rbf', 'poly', 'sigmoid']
        Score = GridSearchCV(estimator, param_grid, cv = 10, n_jobs = -1).fit(X, y).best_score_
        
    elif modelName == 'Decision Tree':
        param_grid['model__criterion'] = ['gini', 'entropy', 'log_loss']
        param_grid['model__splitter'] = ['best', 'random']
        Score = GridSearchCV(estimator, param_grid, cv = 10, n_jobs = -1).fit(X, y).best_score_
        
    elif modelName == 'Random Forest':
        param_grid['model__criterion'] = ['gini', 'entropy', 'log_loss']
        param_grid['model__max_features'] = ['sqrt', 'log2']
        param_grid['model__class_weight'] = ['balanced', 'balanced_subsample']
        Score = GridSearchCV(estimator, param_grid, cv = 10, n_jobs = -1).fit(X, y).best_score_
    
    else:
        Score = GridSearchCV(estimator, param_grid, cv = 10, n_jobs = -1).fit(X, y).best_score_
    
    results.loc[modelName] = dict(Score = '{:.2f}'.format(Score * 100) + '%')

In [None]:
results.sort_values('Score', ascending = False)

Unnamed: 0,Score
Random Forest,97.66%
Decision Tree,97.66%
K-NN,97.51%
SVC,97.37%
CATBoost,97.37%
Logistic Regression,96.94%
GauusianNB,96.93%
