https://www.kaggle.com/code/pavansanagapati/ensemble-learning-techniques-tutorial

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

In [2]:
# loading iris dataset 
iris = load_iris() 
X = iris.data[:, :4] 
Y = iris.target 

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size = 0.20,random_state = 42) 

In [4]:
# Ensemble of Models 
estimator = [] 
estimator.append(('LR',LogisticRegression(solver ='lbfgs',multi_class ='multinomial',max_iter = 200))) 
estimator.append(('SVC', SVC(gamma ='auto', probability = True))) 
estimator.append(('DTC', DecisionTreeClassifier())) 

In [5]:
# Voting Classifier with hard voting 
hard_voting = VotingClassifier(estimators = estimator, voting ='hard') 
hard_voting.fit(X_train, y_train) 
y_pred = hard_voting.predict(X_test)  

In [6]:
# accuracy_score metric to predict Accuracy 
score = accuracy_score(y_test, y_pred) 
print("Hard Voting Score % d" % score) 

Hard Voting Score  1


In [7]:
from sklearn.model_selection import cross_val_score

for label, clf in estimator:
    scores = cross_val_score(clf, X, Y, scoring='accuracy', cv=5)
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

Accuracy: 0.97 (+/- 0.02) [LR]
Accuracy: 0.98 (+/- 0.02) [SVC]
Accuracy: 0.97 (+/- 0.04) [DTC]


In [None]:
# Voting Classifier with soft voting 
soft_voting = VotingClassifier(estimators = estimator, voting ='soft') 
soft_voting.fit(X_train, y_train) 
y_pred = soft_voting.predict(X_test) 

In [None]:
# Using accuracy_score 
score = accuracy_score(y_test, y_pred) 
print("Soft Voting Score % d" % score)

In [None]:
from sklearn.datasets import make_blobs
from matplotlib import pyplot
from pandas import DataFrame

In [None]:
# generate 2d classification dataset
X, y = make_blobs(n_samples=500, centers=3, n_features=2, cluster_std=2, random_state=2)

In [None]:
# scatter plot, dots colored by class value
df = DataFrame(dict(x=X[:,0], y=X[:,1], label=y))
colors = {0:'red', 1:'blue', 2:'green'}
fig, ax = pyplot.subplots()
grouped = df.groupby('label')
for key, group in grouped:
    group.plot(ax=ax, kind='scatter', x='x', y='y', label=key, color=colors[key])
pyplot.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.20,random_state = 42) 

In [None]:
hard_voting.fit(X_train, y_train) 
y_pred = hard_voting.predict(X_test) 
score = accuracy_score(y_test, y_pred) 
print("Hard Voting Score % d" % score) 

In [None]:
soft_voting = VotingClassifier(estimators = estimator, voting ='soft') 
soft_voting.fit(X_train, y_train) 
y_pred = soft_voting.predict(X_test) 
score = accuracy_score(y_test, y_pred) 
print("Soft Voting Score % d" % score)

Interesting examples, but I need to dig more deeply to see how these work.  Or, more accurately, to see why they are not working reasonably: only 1 or 0 as answers is worrisome.