In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.datasets import load_iris

In [57]:
iris = load_iris()
X, y = iris.data, iris.target
df = pd.DataFrame(X, columns=iris.feature_names)

sc = StandardScaler()
X_Scaled = sc.fit_transform(X)

In [58]:
# classifiers instance
def model_test_run(model_instance, model_name, x_train, x_test, y_train, y_test):
    padding = 2
    # print("\n"*padding)
    classifier = model_instance()
    classifier.fit(x_train, y_train)
    y_pred = classifier.predict(x_test)

    score = accuracy_score(y_test, y_pred)
    c_mat = confusion_matrix(y_test, y_pred)
    print(f"score for {model_name} is :- \n")
    print(f"score :- {score*100:.2f}%")
    print(f"confusion matrix :- \n{c_mat}")

    print("\n" * padding)

    return score, c_mat


models = [
    ("decision tree", DecisionTreeClassifier),
    ("naive bayes", GaussianNB),
    ("knn", KNeighborsClassifier),
]

In [59]:
# hold out method for 75-25 train test split

for model_name, model_instance in models:
    X_train, y_train, X_test, y_test = None, None, None, None
    if model_name != "knn":

        X_train, y_train, X_test, y_test = train_test_split(
            X, y, test_size=0.25, random_state=10
        )

    else:
        X_train, y_train, X_test, y_test = train_test_split(
            X_Scaled, y, test_size=0.25, random_state=10
        )
    # print(X_train, X_test, y_train, y_test)
    score, c_mat = model_test_run(
        model_instance, model_name, X_train, y_train, X_test, y_test
    )

score for decision tree is :- 

score :- 97.37%
confusion matrix :- 
[[11  0  0]
 [ 0 15  0]
 [ 0  1 11]]



score for naive bayes is :- 

score :- 100.00%
confusion matrix :- 
[[11  0  0]
 [ 0 15  0]
 [ 0  0 12]]



score for knn is :- 

score :- 97.37%
confusion matrix :- 
[[11  0  0]
 [ 0 14  1]
 [ 0  0 12]]





In [60]:
# hold out method for 66-33 train test split

for model_name, model_instance in models:
    X_train, y_train, X_test, y_test = None, None, None, None
    if model_name != "knn":

        X_train, y_train, X_test, y_test = train_test_split(
            X, y, test_size=0.33, random_state=10
        )

    else:
        X_train, y_train, X_test, y_test = train_test_split(
            X_Scaled, y, test_size=0.33, random_state=10
        )
    # print(X_train, X_test, y_train, y_test)
    score, c_mat = model_test_run(
        model_instance, model_name, X_train, y_train, X_test, y_test
    )

score for decision tree is :- 

score :- 90.00%
confusion matrix :- 
[[15  0  0]
 [ 0 15  4]
 [ 0  1 15]]



score for naive bayes is :- 

score :- 94.00%
confusion matrix :- 
[[15  0  0]
 [ 0 16  3]
 [ 0  0 16]]



score for knn is :- 

score :- 94.00%
confusion matrix :- 
[[15  0  0]
 [ 0 16  3]
 [ 0  0 16]]





In [None]:
# 75-25 split in Cross Validation