# Train all classifiers at once

In [None]:
import os, sys, pathlib

UTILS_FOLDER = 'S00 - Utils'
curPath = os.getcwd()
parPath = pathlib.Path(curPath).parent
utilPath = os.path.join(parPath, UTILS_FOLDER)
for p in [curPath, str(parPath), utilPath]:
    sys.path.append(p)

In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from utils import plot_ds
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
# Note that this will change the result
SPLIT_RANDOM_STATE = 3

In [None]:
# Read data
iris = datasets.load_iris()

# Extract the last 2 columns
X = iris.data[:, 2:4]
y = iris.target

# Split data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=SPLIT_RANDOM_STATE, stratify=y
)

# Standardization
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

In [None]:
models = [
    {
        "clf": LogisticRegression,
        "params": {"solver": "lbfgs", "C": 1, "max_iter": 100},
    },
    {
        "clf": SVC,
        "params": {"kernel": "rbf", "C": 1.0, "gamma": 0.2},
    },
    {
        "clf": DecisionTreeClassifier,
        "params": {"criterion": "gini", "max_depth": 4, "min_samples_split": 2},
    },
    {
        "clf": RandomForestClassifier,
        "params": {
            "criterion": "gini",
            "n_estimators": 25,
            "max_samples": None,
            "max_features": "sqrt",
            "max_depth": None,
        },
    },
    {
        "clf": KNeighborsClassifier,
        "params": {
            "n_neighbors": 3,
            "p": 2,
            "algorithm": "auto",
        },
    },
    {
        "clf": GaussianNB,
        "params": {},
    },
    {
        "clf": AdaBoostClassifier,
        "params": {
            "estimator": None,  # Estimator is DecisionTreeClassifier initialized with max_depth=1.
            "n_estimators": 500,
            "learning_rate": 1,
        },
    },
    {
        "clf": GradientBoostingClassifier,
        "params": {
            "learning_rate": 0.1,
            "n_estimators": 100,
            "min_samples_split": 2,
            "min_samples_leaf": 1,
            "max_depth": 3,
        },
    },
]

In [None]:
import json

results = []

for model in models:
    clf = model["clf"](**model["params"])

    # Training
    clf.fit(X_train_std, y_train)

    # Prediction
    y_pred_train = clf.predict(X_train_std).flatten()
    y_pred_test = clf.predict(X_test_std).flatten()

    # Misclassifications
    miss_train = (y_train != y_pred_train).sum()
    miss_test = (y_test != y_pred_test).sum()

    # Accuracy
    acc_train = accuracy_score(y_train, y_pred_train)
    acc_test = accuracy_score(y_test, y_pred_test)

    results.append(
        {
            "model": model["clf"].__name__,
            "Train Miss": miss_train,
            "Train Acc": acc_train,
            "Test Miss": miss_test,
            "Test Acc": acc_test,
            "params": json.dumps(model["params"]),
        }
    )

    plot_ds(X_train_std, X_test_std, y_train, y_test, clf)

In [None]:
df = pd.DataFrame.from_records(results)
display(df)
df.to_excel("results.xlsx", index=False)