In [1]:
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier


In [2]:

#
iris = load_iris()
df=pd.DataFrame(iris.data, columns=['SW','SL','PW','PL'])
df['Species']=iris.target

X_iris = df.drop('Species',axis=1)
y_iris = df['Species']

wine_quality_url = r'datasets\wine+quality\winequality-red.csv'
wine_data = pd.read_csv(wine_quality_url, delimiter=';')
X_wine = wine_data.drop('quality', axis=1)
y_wine = wine_data['quality']


In [3]:
# Split the Iris dataset
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(X_iris, y_iris, test_size=0.3, random_state=42)

# Split the Wine Quality dataset
X_train_wine, X_test_wine, y_train_wine, y_test_wine = train_test_split(X_wine, y_wine, test_size=0.3, random_state=42)


In [4]:
# Define the base models
log_clf = LogisticRegression()
rf_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)

# Hard Voting Classifier (majority voting)
voting_clf_hard = VotingClassifier(estimators=[('lr', log_clf), ('rf', rf_clf), ('svc', svm_clf)], voting='hard')

# Soft Voting Classifier (average probability)
voting_clf_soft = VotingClassifier(estimators=[('lr', log_clf), ('rf', rf_clf), ('svc', svm_clf)], voting='soft')


In [5]:
# Bagging Classifier using Decision Tree
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=100, random_state=42)

# Gradient Boosting Classifier
gb_clf = GradientBoostingClassifier(n_estimators=100, random_state=42)


In [12]:
# Train and evaluate on Iris dataset
voting_clf_hard.fit_transform(X_train_iris, y_train_iris)
y_pred_iris = voting_clf_hard.predict(X_test_iris)
print("Voting Classifier (Hard Voting) Accuracy on Iris:", accuracy_score(y_test_iris, y_pred_iris))

voting_clf_soft.fit_transform(X_train_iris, y_train_iris)
y_pred_iris = voting_clf_soft.predict(X_test_iris)
print("Voting Classifier (Soft Voting) Accuracy on Iris:", accuracy_score(y_test_iris, y_pred_iris))

bag_clf.fit(X_train_iris, y_train_iris)
y_pred_iris = bag_clf.predict(X_test_iris)
print("Bagging Classifier Accuracy on Iris:", accuracy_score(y_test_iris, y_pred_iris))

gb_clf.fit(X_train_iris, y_train_iris)
y_pred_iris = gb_clf.predict(X_test_iris)
print("Gradient Boosting Classifier Accuracy on Iris:", accuracy_score(y_test_iris, y_pred_iris))


Voting Classifier (Hard Voting) Accuracy on Iris: 1.0
Voting Classifier (Soft Voting) Accuracy on Iris: 1.0
Bagging Classifier Accuracy on Iris: 1.0
Gradient Boosting Classifier Accuracy on Iris: 1.0


In [7]:
# Train and evaluate on Wine Quality dataset
voting_clf_hard.fit(X_train_wine, y_train_wine)
y_pred_wine = voting_clf_hard.predict(X_test_wine)
print("Voting Classifier (Hard Voting) Accuracy on Wine:", accuracy_score(y_test_wine, y_pred_wine))

voting_clf_soft.fit(X_train_wine, y_train_wine)
y_pred_wine = voting_clf_soft.predict(X_test_wine)
print("Voting Classifier (Soft Voting) Accuracy on Wine:", accuracy_score(y_test_wine, y_pred_wine))

bag_clf.fit(X_train_wine, y_train_wine)
y_pred_wine = bag_clf.predict(X_test_wine)
print("Bagging Classifier Accuracy on Wine:", accuracy_score(y_test_wine, y_pred_wine))

gb_clf.fit(X_train_wine, y_train_wine)
y_pred_wine = gb_clf.predict(X_test_wine)
print("Gradient Boosting Classifier Accuracy on Wine:", accuracy_score(y_test_wine, y_pred_wine))


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Voting Classifier (Hard Voting) Accuracy on Wine: 0.6083333333333333


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Voting Classifier (Soft Voting) Accuracy on Wine: 0.6270833333333333
Bagging Classifier Accuracy on Wine: 0.6416666666666667
Gradient Boosting Classifier Accuracy on Wine: 0.63125


In [14]:
from sklearn.model_selection import cross_val_score,KFold
kf=KFold(n_splits=5,shuffle=True,random_state=42)

# Perform 5-fold cross-validation for Iris dataset
print("Cross-Validation Results for Iris Dataset:")

cv_scores_hard_iris = cross_val_score(voting_clf_hard, X_iris, y_iris, cv=kf)
print("Voting (Hard):", cv_scores_hard_iris.mean())

cv_scores_soft_iris = cross_val_score(voting_clf_soft, X_iris, y_iris, cv=5)
print("Voting (Soft):", cv_scores_soft_iris.mean())

cv_scores_bag_iris = cross_val_score(bag_clf, X_iris, y_iris, cv=5)
print("Bagging:", cv_scores_bag_iris.mean())

cv_scores_gb_iris = cross_val_score(gb_clf, X_iris, y_iris, cv=5)
print("Gradient Boosting:", cv_scores_gb_iris.mean())


Cross-Validation Results for Iris Dataset:
Voting (Hard): 0.9733333333333334


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Voting (Soft): 0.9666666666666668
Bagging: 0.9600000000000002
Gradient Boosting: 0.9600000000000002


In [None]:
# Perform 5-fold cross-validation for Wine Quality dataset
print("\nCross-Validation Results for Wine Quality Dataset:")

cv_scores_hard_wine = cross_val_score(voting_clf_hard, X_wine, y_wine, cv=5)
print("Voting (Hard):", cv_scores_hard_wine.mean())

cv_scores_soft_wine = cross_val_score(voting_clf_soft, X_wine, y_wine, cv=5)
print("Voting (Soft):", cv_scores_soft_wine.mean())

cv_scores_bag_wine = cross_val_score(bag_clf, X_wine, y_wine, cv=5)
print("Bagging:", cv_scores_bag_wine.mean())

cv_scores_gb_wine = cross_val_score(gb_clf, X_wine, y_wine, cv=5)
print("Gradient Boosting:", cv_scores_gb_wine.mean())



Cross-Validation Results for Wine Quality Dataset:


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Voting (Hard): 0.5578565830721003


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Voting (Soft): 0.5697296238244514
Bagging: 0.5634874608150471
Gradient Boosting: 0.5647198275862069
