In [1]:
# Import necessary libraries
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

# Load the Iris dataset
iris = load_iris()
X_iris, y_iris = iris.data, iris.target

# Load the Wine Quality dataset
wine_quality_url = r'datasets\wine+quality\winequality-red.csv'
wine_data = pd.read_csv(wine_quality_url, delimiter=';')
X_wine = wine_data.drop('quality', axis=1)
y_wine = wine_data['quality']


In [2]:
# Split the Iris dataset
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(X_iris, y_iris, test_size=0.3, random_state=42)

# Split the Wine Quality dataset
X_train_wine, X_test_wine, y_train_wine, y_test_wine = train_test_split(X_wine, y_wine, test_size=0.3, random_state=42)


In [3]:
# Define the base models
log_clf = LogisticRegression(max_iter=1000)
rf_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)

# Hard Voting Classifier (majority voting)
voting_clf_hard = VotingClassifier(estimators=[('lr', log_clf), ('rf', rf_clf), ('svc', svm_clf)], voting='hard')

# Soft Voting Classifier (average probability)
voting_clf_soft = VotingClassifier(estimators=[('lr', log_clf), ('rf', rf_clf), ('svc', svm_clf)], voting='soft')


In [4]:
# Bagging Classifier using Decision Tree
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=100, random_state=42)

# Gradient Boosting Classifier
gb_clf = GradientBoostingClassifier(n_estimators=100, random_state=42)


In [5]:
# Train and evaluate on Iris dataset
voting_clf_hard.fit(X_train_iris, y_train_iris)
y_pred_iris = voting_clf_hard.predict(X_test_iris)
print("Voting Classifier (Hard Voting) Accuracy on Iris:", accuracy_score(y_test_iris, y_pred_iris))

voting_clf_soft.fit(X_train_iris, y_train_iris)
y_pred_iris = voting_clf_soft.predict(X_test_iris)
print("Voting Classifier (Soft Voting) Accuracy on Iris:", accuracy_score(y_test_iris, y_pred_iris))

bag_clf.fit(X_train_iris, y_train_iris)
y_pred_iris = bag_clf.predict(X_test_iris)
print("Bagging Classifier Accuracy on Iris:", accuracy_score(y_test_iris, y_pred_iris))

gb_clf.fit(X_train_iris, y_train_iris)
y_pred_iris = gb_clf.predict(X_test_iris)
print("Gradient Boosting Classifier Accuracy on Iris:", accuracy_score(y_test_iris, y_pred_iris))


Voting Classifier (Hard Voting) Accuracy on Iris: 1.0
Voting Classifier (Soft Voting) Accuracy on Iris: 1.0
Bagging Classifier Accuracy on Iris: 1.0
Gradient Boosting Classifier Accuracy on Iris: 1.0


In [6]:
# Train and evaluate on Wine Quality dataset
voting_clf_hard.fit(X_train_wine, y_train_wine)
y_pred_wine = voting_clf_hard.predict(X_test_wine)
print("Voting Classifier (Hard Voting) Accuracy on Wine:", accuracy_score(y_test_wine, y_pred_wine))

voting_clf_soft.fit(X_train_wine, y_train_wine)
y_pred_wine = voting_clf_soft.predict(X_test_wine)
print("Voting Classifier (Soft Voting) Accuracy on Wine:", accuracy_score(y_test_wine, y_pred_wine))

bag_clf.fit(X_train_wine, y_train_wine)
y_pred_wine = bag_clf.predict(X_test_wine)
print("Bagging Classifier Accuracy on Wine:", accuracy_score(y_test_wine, y_pred_wine))

gb_clf.fit(X_train_wine, y_train_wine)
y_pred_wine = gb_clf.predict(X_test_wine)
print("Gradient Boosting Classifier Accuracy on Wine:", accuracy_score(y_test_wine, y_pred_wine))


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Voting Classifier (Hard Voting) Accuracy on Wine: 0.6208333333333333


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Voting Classifier (Soft Voting) Accuracy on Wine: 0.63125
Bagging Classifier Accuracy on Wine: 0.6416666666666667
Gradient Boosting Classifier Accuracy on Wine: 0.63125


In [7]:
from sklearn.model_selection import cross_val_score

# Perform 5-fold cross-validation for Iris dataset
print("Cross-Validation Results for Iris Dataset:")

cv_scores_hard_iris = cross_val_score(voting_clf_hard, X_iris, y_iris, cv=5)
print("Voting (Hard):", cv_scores_hard_iris.mean())

cv_scores_soft_iris = cross_val_score(voting_clf_soft, X_iris, y_iris, cv=5)
print("Voting (Soft):", cv_scores_soft_iris.mean())

cv_scores_bag_iris = cross_val_score(bag_clf, X_iris, y_iris, cv=5)
print("Bagging:", cv_scores_bag_iris.mean())

cv_scores_gb_iris = cross_val_score(gb_clf, X_iris, y_iris, cv=5)
print("Gradient Boosting:", cv_scores_gb_iris.mean())


Cross-Validation Results for Iris Dataset:
Voting (Hard): 0.9666666666666668
Voting (Soft): 0.9666666666666668
Bagging: 0.9600000000000002
Gradient Boosting: 0.9600000000000002


In [8]:
# Perform 5-fold cross-validation for Wine Quality dataset
print("\nCross-Validation Results for Wine Quality Dataset:")

cv_scores_hard_wine = cross_val_score(voting_clf_hard, X_wine, y_wine, cv=5)
print("Voting (Hard):", cv_scores_hard_wine.mean())

cv_scores_soft_wine = cross_val_score(voting_clf_soft, X_wine, y_wine, cv=5)
print("Voting (Soft):", cv_scores_soft_wine.mean())

cv_scores_bag_wine = cross_val_score(bag_clf, X_wine, y_wine, cv=5)
print("Bagging:", cv_scores_bag_wine.mean())

cv_scores_gb_wine = cross_val_score(gb_clf, X_wine, y_wine, cv=5)
print("Gradient Boosting:", cv_scores_gb_wine.mean())



Cross-Validation Results for Wine Quality Dataset:


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Voting (Hard): 0.559735501567398


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Voting (Soft): 0.5903644200626961
Bagging: 0.5634874608150471
Gradient Boosting: 0.5647198275862069


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
import requests

# Function to load the Wine Quality dataset
def load_wine_data():
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    data = pd.read_csv(url, sep=';')
    X = data.drop('quality', axis=1)
    y = data['quality']
    return X, y

# Function to load the Iris dataset
def load_iris_data():
    iris = load_iris()
    X = iris.data
    y = iris.target
    return X, y

# Function to train classifiers and compare their accuracies
def evaluate_classifiers(X, y):
    # Splitting the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initializing classifiers
    dt = DecisionTreeClassifier(random_state=42)

    voting_clf = VotingClassifier(estimators=[
        ('dt', dt),
        ('bagging', BaggingClassifier(base_estimator=dt, n_estimators=10, random_state=42)),
        ('gb', GradientBoostingClassifier(n_estimators=100, random_state=42))
    ], voting='hard')

    bagging_clf = BaggingClassifier(base_estimator=dt, n_estimators=10, random_state=42)
    gb_clf = GradientBoostingClassifier(n_estimators=100, random_state=42)

    # Fit the classifiers
    voting_clf.fit(X_train, y_train)
    bagging_clf.fit(X_train, y_train)
    gb_clf.fit(X_train, y_train)

    # Predict and calculate accuracies
    models = {
        "Voting Classifier": voting_clf,
        "Bagging Classifier": bagging_clf,
        "Gradient Boosting Classifier": gb_clf
    }

    accuracies = {}
    for model_name, model in models.items():
        y_pred = model.predict(X_test)
        accuracies[model_name] = accuracy_score(y_test, y_pred)

    return accuracies

# Evaluate on Wine Quality Dataset
print("Wine Quality Dataset:")
X_wine, y_wine = load_wine_data()
wine_accuracies = evaluate_classifiers(X_wine, y_wine)
for model, accuracy in wine_accuracies.items():
    print(f"{model}: {accuracy:.4f}")

# Evaluate on Iris Dataset
print("\nIris Dataset:")
X_iris, y_iris = load_iris_data()
iris_accuracies = evaluate_classifiers(X_iris, y_iris)
for model, accuracy in iris_accuracies.items():
    print(f"{model}: {accuracy:.4f}")


Wine Quality Dataset:


TypeError: BaggingClassifier.__init__() got an unexpected keyword argument 'base_estimator'