In [1]:
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the Iris dataset
iris = load_iris()
# Create a pandas DataFrame from the dataset
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target

# Split the dataset into features (X) and target (y)
X = df.drop("target", axis=1)
y = df["target"]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

import pickle

# Define the models to evaluate
models = [
    ('KNN', KNeighborsClassifier(), {'n_neighbors': range(1, 21)}),
    ('LR', LogisticRegression(), {'C': [0.01, 0.1, 1, 10, 100],'max_iter':[10000]}),
    ('Random_Forest', RandomForestClassifier(), {
        'n_estimators': [10, 20, 50],
        'max_features': [ 'sqrt', 'log2'],
        'max_depth': [4, 5, 6, 7, 8],
        'criterion': ['gini', 'entropy']
    }),
    ('SVM', svm.SVC(), {
        'C': [0.01, 0.1, 1, 10, 100],
        'gamma': [0.01, 0.1, 1, 10, 100]
    })
]

# Loop through the models
for name, model, param_grid in models:
    print(f"--- {name} ---")

    # Perform grid search with cross-validation
    grid_search = GridSearchCV(model, param_grid, cv=5)
    grid_search.fit(X_train, y_train)

    # Get the best hyperparameters and score
    best_params = grid_search.best_params_
    best_score = grid_search.best_score_

    # Train a new model using the best hyperparameters
    best_model = model.__class__(**best_params)
    best_model.fit(X_train, y_train)

    # Predict on the test set
    y_pred = best_model.predict(X_test)
    # Predict on the train set
    y_pred_trn = best_model.predict(X_train)
    
    # Evaluate the model on the test set
    accuracy = best_model.score(X_test, y_test)
    # Evaluate the model on the train set
    accuracy_trn = best_model.score(X_train, y_train)

    # Print the results
    print("Best hyperparameters:", best_params)
    print("Best score:", best_score)
    
    print('--------Training-----------------')
    print("Accuracy on the train set:", accuracy_trn)
    # Print the classification report
    print("Classification Report:")
    print(classification_report(y_train, y_pred_trn))

    # Print the confusion matrix
    print("Confusion Matrix:")
    print(confusion_matrix(y_train, y_pred_trn))

    print('--------Testing-----------------')
    print("Accuracy on the test set:", accuracy)

    # Print the classification report
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    # Print the confusion matrix
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))


    # Save the model as a pickled file
    with open(f"{name}_model.pkl", "wb") as file:
        pickle.dump(best_model, file)
    print()


--- KNN ---
Best hyperparameters: {'n_neighbors': 3}
Best score: 0.9583333333333334
--------Training-----------------
Accuracy on the train set: 0.95
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.93      0.93      0.93        41
           2       0.92      0.92      0.92        39

    accuracy                           0.95       120
   macro avg       0.95      0.95      0.95       120
weighted avg       0.95      0.95      0.95       120

Confusion Matrix:
[[40  0  0]
 [ 0 38  3]
 [ 0  3 36]]
--------Testing-----------------
Accuracy on the test set: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00