In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import VotingClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import StackingClassifier
from sklearn.preprocessing import StandardScaler

In [2]:
data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [3]:
# Define individual models
log_reg = LogisticRegression(random_state=42)
knn = KNeighborsClassifier(n_neighbors=3)
dt = DecisionTreeClassifier(random_state=42)
svc = SVC(probability=True, random_state=42)
nb = GaussianNB()

# List of classifiers for easy reference
models = [('Logistic Regression', log_reg), ('KNN', knn), ('Decision Tree', dt), ('SVC', svc), ('Naive Bayes', nb)]


In [4]:
# Soft Voting (Averaging of probabilities)
voting_clf = VotingClassifier(estimators=models, voting='soft')
voting_clf.fit(X_train, y_train)
y_pred = voting_clf.predict(X_test)
print("Averaging (Soft Voting) Accuracy:", accuracy_score(y_test, y_pred))


Averaging (Soft Voting) Accuracy: 0.9777777777777777


In [5]:
# Hard Voting (Majority voting)
voting_clf_hard = VotingClassifier(estimators=models, voting='hard')
voting_clf_hard.fit(X_train, y_train)
y_pred_hard = voting_clf_hard.predict(X_test)
print("Max Voting (Hard Voting) Accuracy:", accuracy_score(y_test, y_pred_hard))


Max Voting (Hard Voting) Accuracy: 0.9777777777777777


In [6]:
# Stacking
stacking_clf = StackingClassifier(estimators=models, final_estimator=LogisticRegression())
stacking_clf.fit(X_train, y_train)
y_pred_stack = stacking_clf.predict(X_test)
print("Stacking Accuracy:", accuracy_score(y_test, y_pred_stack))


Stacking Accuracy: 0.9777777777777777


In [7]:
# Train individual models on 70% of the training data and blend their predictions
X_train_blend, X_val_blend, y_train_blend, y_val_blend = train_test_split(X_train, y_train, test_size=0.3, random_state=42)
for name, model in models:
    model.fit(X_train_blend, y_train_blend)
    val_preds = model.predict(X_val_blend)
    print(f'{name} Accuracy on Blending:', accuracy_score(y_val_blend, val_preds))


Logistic Regression Accuracy on Blending: 0.90625
KNN Accuracy on Blending: 0.96875
Decision Tree Accuracy on Blending: 0.84375
SVC Accuracy on Blending: 0.9375
Naive Bayes Accuracy on Blending: 0.90625


In [8]:
# Bagging with Decision Tree
bagging_clf = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42)
bagging_clf.fit(X_train, y_train)
y_pred_bag = bagging_clf.predict(X_test)
print("Bagging Accuracy:", accuracy_score(y_test, y_pred_bag))


Bagging Accuracy: 0.9777777777777777


In [9]:
# Boosting with AdaBoost
boosting_clf = AdaBoostClassifier(estimator=DecisionTreeClassifier(), n_estimators=50, random_state=42)
boosting_clf.fit(X_train, y_train)
y_pred_boost = boosting_clf.predict(X_test)
print("Boosting Accuracy:", accuracy_score(y_test, y_pred_boost))


Boosting Accuracy: 0.9777777777777777




In [10]:
# Summary of performance
print("Classification Report for Averaging:")
print(classification_report(y_test, y_pred))

print("Classification Report for Max Voting:")
print(classification_report(y_test, y_pred_hard))

print("Classification Report for Stacking:")
print(classification_report(y_test, y_pred_stack))

print("Classification Report for Bagging:")
print(classification_report(y_test, y_pred_bag))

print("Classification Report for Boosting:")
print(classification_report(y_test, y_pred_boost))


Classification Report for Averaging:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        11

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

Classification Report for Max Voting:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        11

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

Classification Report for Stacking:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1      