In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder

from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import accuracy_score, classification_report

### EDA

In [4]:
from sklearn.datasets import load_iris
iris = load_iris()
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

### Spliting data

In [6]:
X = iris.data  # Features
y = iris.target  # Target labels

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Modeling with hyper parameter tuning

In [8]:
classifiers = {
    'Random Forest Classifier': RandomForestClassifier(n_estimators=47, random_state=34),
    'Logistic Regression': LogisticRegression(random_state=42),
    'SVM': SVC(kernel='linear'),
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'Naive Bayes': GaussianNB(),
    'Decision Tree': DecisionTreeClassifier(random_state=42, max_depth=5),
    'AdaBoost': AdaBoostClassifier(n_estimators=50, random_state=42,learning_rate=0.3)
}


for name, clf in classifiers.items():
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)  
    accuracy = accuracy_score(y_test, y_pred)
    print(f'{name} Accuracy: {accuracy * 100:.2f}%')
    # print("\nClassification Report:")
    # print(classification_report(y_test, y_pred))

Random Forest Classifier Accuracy: 100.00%
Logistic Regression Accuracy: 100.00%
SVM Accuracy: 100.00%
KNN Accuracy: 100.00%
Naive Bayes Accuracy: 100.00%
Decision Tree Accuracy: 100.00%
AdaBoost Accuracy: 100.00%


In [9]:
from sklearn.model_selection import GridSearchCV

# Define the hyperparameter grid
param_grids = {
    'Random Forest Classifier': {
        'n_estimators': [50, 100, 150],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    },
    'Logistic Regression': {
        'C': [0.01, 0.1, 1, 10],
        'solver': ['liblinear', 'saga']
    },
    'SVM': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': ['scale', 'auto']
    },
    'KNN': {
        'n_neighbors': [3, 5, 7, 9],
        'weights': ['uniform', 'distance']
    },
    'Naive Bayes': {},
    'Decision Tree': {
        'max_depth': [None, 5, 10, 20],
        'min_samples_split': [2, 5, 10]
    },
    'AdaBoost': {
        'n_estimators': [50, 100, 150],
        'learning_rate': [0.1, 0.3, 0.5]
    }
}

# Loop through each classifier and apply GridSearchCV
for name, clf in classifiers.items():
    print(f"Training {name} with hyperparameter tuning...")
    
    # Get the appropriate parameter grid
    param_grid = param_grids.get(name, {})
    
    # If the classifier has parameters to tune, perform GridSearchCV
    if param_grid:
        grid_search = GridSearchCV(clf, param_grid, cv=10, scoring='accuracy', n_jobs=-1, verbose=1)
        grid_search.fit(X_train, y_train)
        
        # Get the best model and evaluate
        best_clf = grid_search.best_estimator_
        y_pred = best_clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f'{name} Best Accuracy: {accuracy * 100:.2f}% with best params: {grid_search.best_params_}')
    else:
        # If no hyperparameters to tune, just fit the model
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f'{name} Accuracy: {accuracy * 100:.2f}%')

Training Random Forest Classifier with hyperparameter tuning...
Fitting 10 folds for each of 36 candidates, totalling 360 fits
Random Forest Classifier Best Accuracy: 100.00% with best params: {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 150}
Training Logistic Regression with hyperparameter tuning...
Fitting 10 folds for each of 8 candidates, totalling 80 fits
Logistic Regression Best Accuracy: 100.00% with best params: {'C': 1, 'solver': 'saga'}
Training SVM with hyperparameter tuning...
Fitting 10 folds for each of 12 candidates, totalling 120 fits
SVM Best Accuracy: 100.00% with best params: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
Training KNN with hyperparameter tuning...
Fitting 10 folds for each of 8 candidates, totalling 80 fits
KNN Best Accuracy: 100.00% with best params: {'n_neighbors': 3, 'weights': 'uniform'}
Training Naive Bayes with hyperparameter tuning...
Naive Bayes Accuracy: 100.00%
Training Decision Tree with hyperparameter tuning...
Fitting 1

In [10]:
#END