# Classification Models Comparison

#### import required packages

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#### load the data 

In [2]:
df = pd.read_csv('hearing_test.csv')
print(df.head())

    age  physical_score  test_result
0  33.0            40.7            1
1  50.0            37.2            1
2  52.0            24.7            0
3  56.0            31.0            0
4  35.0            42.9            1


#### data cleansing process

In [11]:
# decide x and y
x = df.drop('test_result', axis=1)
y = df['test_result']

In [33]:
# split the data into train and test
from sklearn.model_selection import train_test_split

trials = 10
accuracies = []
for trial in range(trials):
    # plsit the data
    x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8)
    
    # dictionary to hold the result
    accuracies.append({ 
        "Naive Bayes": nb(),
        "SVM": svm(),
        "Decision Tree": decision_tree(),
        "Random Forest": random_forest(),
        "Logistic Regression": logistic_regression(),
        "KNN": knn()
    })

    
for trial in range(trials):
    info = accuracies[trial]

    max_score = 0.0
    winner = ''
    print(f"-- Trial {trial + 1} --")

    for key in info.keys():
        print(f"{key:<20}: {info[key]}")
        if float(info[key]) > max_score:
            max_score = float(info[key])
            winner = key
    
    print()
    print(f"winner for trial-{trial + 1} is = {winner}")
    print()
    print()

-- Trial 1 --
Naive Bayes         :  91.00
SVM                 :  92.40
Decision Tree       :  89.00
Random Forest       :  89.80
Logistic Regression :  90.50
KNN                 :  91.30

winner for trial-1 is = SVM


-- Trial 2 --
Naive Bayes         :  90.90
SVM                 :  92.70
Decision Tree       :  88.70
Random Forest       :  90.30
Logistic Regression :  91.20
KNN                 :  91.80

winner for trial-2 is = SVM


-- Trial 3 --
Naive Bayes         :  90.20
SVM                 :  91.30
Decision Tree       :  86.40
Random Forest       :  88.40
Logistic Regression :  90.20
KNN                 :  90.90

winner for trial-3 is = SVM


-- Trial 4 --
Naive Bayes         :  90.40
SVM                 :  92.10
Decision Tree       :  88.30
Random Forest       :  89.40
Logistic Regression :  91.10
KNN                 :  91.30

winner for trial-4 is = SVM


-- Trial 5 --
Naive Bayes         :  91.70
SVM                 :  93.30
Decision Tree       :  88.20
Random Forest       :  

#### model building

#### Logistic Regression

In [13]:
def logistic_regression():
    from sklearn.linear_model import LogisticRegressionCV

    # create the model
    model = LogisticRegressionCV()

    # fit the model
    model.fit(x_train, y_train)
    
    return evaluate_model(model, 'Logistic Regression')

#### naive bayes

In [14]:
def nb():
    from sklearn.naive_bayes import GaussianNB

    # create the model
    model = GaussianNB()

    # fit the model
    model.fit(x_train, y_train)
    
    return evaluate_model(model, 'Naive Bayes')

#### svm

In [15]:
def svm():
    from sklearn.svm import SVC
    
    # create the model
    model = SVC(C=2.0)

    # fit the model
    model.fit(x_train, y_train)
    
    return evaluate_model(model, 'SVM')

#### KNN

In [16]:
def knn():
    from sklearn.neighbors import KNeighborsClassifier
    
    # create the model
    model = KNeighborsClassifier()

    # fit the model
    model.fit(x_train, y_train)
    
    return evaluate_model(model, 'KNN')

#### decision tree

In [17]:
def decision_tree():
    from sklearn.tree import DecisionTreeClassifier
    
    # create the model
    model = DecisionTreeClassifier()

    # fit the model
    model.fit(x_train, y_train)
    
    return evaluate_model(model, 'Decision Tree')

#### random forest

In [18]:
def random_forest():
    from sklearn.ensemble import RandomForestClassifier
    
    # create the model
    model = RandomForestClassifier(n_estimators=100)

    # fit the model
    model.fit(x_train, y_train)
    
    return evaluate_model(model, 'Random Forest')

#### evaluation

In [19]:
def evaluate_model(model, name):
    # predict the values for x_test
    y_prediction = model.predict(x_test)
    
    from sklearn.metrics import accuracy_score
    
    return f"{accuracy_score(y_test, y_prediction) * 100: 0.2f}"