In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Import all the required classifiers
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
import xgboost as xgb
from catboost import CatBoostClassifier

# --- 1. Load and Prepare Data ---
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target

# Split data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale the features
# It's important to fit the scaler ONLY on the training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# --- 2. Define Models ---
models = {
    "Support Vector Machine": SVC(),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "AdaBoost": AdaBoostClassifier(random_state=42),
    "XGBoost": xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42),
    "CatBoost": CatBoostClassifier(verbose=0, random_state=42),
    "Gaussian Naive-Bayes": GaussianNB(),
    "MLP Classifier": MLPClassifier(max_iter=1000, random_state=42)
}


# --- 3. Train and Evaluate ---
results_list = []

for name, model in models.items():
    # Train the model
    model.fit(X_train_scaled, y_train)

    # Predictions on Training data
    y_train_pred = model.predict(X_train_scaled)
    
    # Predictions on Test data
    y_test_pred = model.predict(X_test_scaled)
    
    # Store results
    results_list.append({
        "Classifier": name,
        "Train Accuracy": accuracy_score(y_train, y_train_pred),
        "Test Accuracy": accuracy_score(y_test, y_test_pred),
        "Test Precision (Weighted)": precision_score(y_test, y_test_pred, average='weighted'),
        "Test Recall (Weighted)": recall_score(y_test, y_test_pred, average='weighted'),
        "Test F1-Score (Weighted)": f1_score(y_test, y_test_pred, average='weighted')
    })

# Create a DataFrame from the results list
results_df = pd.DataFrame(results_list)

# --- 4. Display Results ---
print("Performance Metrics Comparison")
print(results_df.to_string())

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Performance Metrics Comparison
               Classifier  Train Accuracy  Test Accuracy  Test Precision (Weighted)  Test Recall (Weighted)  Test F1-Score (Weighted)
0  Support Vector Machine        0.975000       0.966667                   0.969697                0.966667                  0.966583
1           Decision Tree        1.000000       0.933333                   0.933333                0.933333                  0.933333
2           Random Forest        1.000000       0.900000                   0.902357                0.900000                  0.899749
3                AdaBoost        1.000000       0.933333                   0.933333                0.933333                  0.933333
4                 XGBoost        1.000000       0.933333                   0.933333                0.933333                  0.933333
5                CatBoost        1.000000       0.933333                   0.933333                0.933333                  0.933333
6    Gaussian Naive-Bayes      