In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier


In [15]:
# Load the Iris dataset
from sklearn.datasets import load_iris
data = load_iris()

In [16]:
# Create a DataFrame
iris_df = pd.DataFrame(
    data=np.c_[data['data'], data['target']], 
    columns=data['feature_names'] + ['Species']
)

In [17]:
# Rename columns for simplicity
iris_df.columns = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm", "Species"]
iris_df['Species'] = iris_df['Species'].map({0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'})


In [None]:
# Exploratory Data Analysis
print("Dataset Information:")
print(iris_df.info())
print("\nSummary Statistics:")
print(iris_df.describe())


In [None]:
# Visualizations
sns.pairplot(iris_df, hue='Species')
plt.show()

In [None]:
# Check for missing values
print("\nMissing Values:")
print(iris_df.isnull().sum())

In [21]:
# Feature Scaling
X = iris_df.drop(columns=['Species'])
y = iris_df['Species']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [22]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Model Training and Evaluation
models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(probability=True),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "KNN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[model_name] = acc
    print(f"\n{model_name}:")
    print(classification_report(y_test, y_pred))
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
    plt.title(f"Confusion Matrix for {model_name}")
    plt.show()


In [None]:
# Compare Model Performances
plt.figure(figsize=(10, 6))
plt.bar(results.keys(), results.values(), color='skyblue')
plt.title("Model Performance Comparison")
plt.ylabel("Accuracy")
plt.xticks(rotation=45)
plt.show()

In [None]:
# Hyperparameter Tuning for the Best Model (Example: Random Forest)
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}
rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1)
grid_search.fit(X_train, y_train)

print("\nBest Parameters for Random Forest:", grid_search.best_params_)
print("Best Accuracy:", grid_search.best_score_)


In [None]:
# Final Evaluation
best_rf = grid_search.best_estimator_
y_pred_final = best_rf.predict(X_test)
print("\nFinal Model Evaluation:")
print(classification_report(y_test, y_pred_final))
