# Examples

In [1]:
from betterVotingEstimators import VotingClassifier,VotingRegressor

## Classification

In [2]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier as OriginalVotingClassifier

In [3]:
voting='soft'

In [16]:
# Vanilla classifier
clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array(['A','A','A','B','B','B'])
eclf1 = OriginalVotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting=voting)
eclf1 = eclf1.fit(X, y)

vanilla_preds=eclf1.predict(X)
print(vanilla_preds)

['A' 'A' 'A' 'B' 'B' 'B']


In [17]:
# Modified VotingClassifier with all unfitted estimators - sanity check on weights
clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array(['A','A','A','B','B','B'])

eclf1 = VotingClassifier(unfitted_estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                                 weights=[1/3,1/3,1/3],
                                 voting=voting)
eclf1 = eclf1.fit(X, y)

mod_preds=eclf1.predict(X)
print(mod_preds)

assert all(vanilla_preds==mod_preds)

['A' 'A' 'A' 'B' 'B' 'B']


In [None]:
# Modified VotingClassifier with all unfitted estimators
clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array(['A','A','A','B','B','B'])

eclf1 = VotingClassifier(unfitted_estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                                 weights=[0.05,0.05,0.9],
                                 voting=voting)
eclf1 = eclf1.fit(X, y)

mod_preds=eclf1.predict(X)
print(mod_preds)

assert all(vanilla_preds==mod_preds)

In [6]:
# Modified VotingClassifier with all fitted estimators with same data
clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array(['A','A','A','B','B','B'])

# Fit the estimators
for est in [clf1,clf2,clf3]:
    est.fit(X,y)

eclf1 = VotingClassifier(fitted_estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting=voting)
eclf1 = eclf1.fit()

mod_preds=eclf1.predict(X)
print(mod_preds)

assert all(vanilla_preds==mod_preds)

['A' 'A' 'A' 'B' 'B' 'B']


In [7]:
# Modified VotingClassifier with mixed fitted and unfitted estimators
clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array(['A','A','A','B','B','B'])

# Fit the estimators
for est in [clf1,clf2]:
    est.fit(X,y)

eclf1 = VotingClassifier(fitted_estimators=[('lr', clf1), ('rf', clf2)],unfitted_estimators=[('gnb', clf3)], voting=voting)
eclf1 = eclf1.fit(X,y)

mod_preds=eclf1.predict(X)
print(mod_preds)

assert all(vanilla_preds==mod_preds)

['A' 'A' 'A' 'B' 'B' 'B']


In [8]:
# Modified VotingClassifier with all fitted estimators with different data
clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y1 = np.array(['A','A','A','B','B','B'])
y2 = np.array(['B','A','C','C','B','Z'])
y3 = np.array(['D','E','A','C','B','Z'])

# Fit the estimators
for est,y in zip([clf1,clf2,clf3],[y1,y2,y3]):
    est.fit(X,y)

eclf1 = VotingClassifier(fitted_estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting=voting)
eclf1 = eclf1.fit()

mod_preds=eclf1.predict(X)
print(mod_preds)

['A' 'A' 'A' 'C' 'B' 'Z']


In [9]:
# Modified VotingClassifier with mix of fitted and unfitted estimators with different data

# Define our three classifiers
clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()
# They all train on the same X
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])

# But on different ys
y1 = np.array(['A','A','A','B','B','B'])
y2 = np.array(['B','A','C','C','B','Z'])
y3 = np.array(['D','E','A','C','B','Z'])

# We fit two estimators
for est,y in zip([clf1,clf2],[y1,y2]):
    est.fit(X,y)

# Instantiate our ModifiedVotingClassifier
eclf1 = VotingClassifier(
    fitted_estimators=[('lr', clf1), ('rf', clf2)],
    unfitted_estimators=[('gnb', clf3)],
    voting='hard'
)
# Call fit - which will fit the unfitted_estimators with X and y3
eclf1 = eclf1.fit(X, y3)

# Make our predictions
mod_preds=eclf1.predict(X)
print(mod_preds)

['A' 'A' 'A' 'C' 'B' 'Z']


## Regression

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor as OriginalVotingRegressor
from sklearn.neighbors import KNeighborsRegressor

In [11]:
# Vanilla VotingRegressor
r1 = LinearRegression()
r2 = RandomForestRegressor(n_estimators=10, random_state=1)
r3 = KNeighborsRegressor()
X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
y = np.array([2, 6, 12, 20, 30, 42])
er = OriginalVotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])

vanilla_preds=er.fit(X, y).predict(X)

In [12]:
# Modified VotingClassifier with all unfitted estimators
r1 = LinearRegression()
r2 = RandomForestRegressor(n_estimators=10, random_state=1)
r3 = KNeighborsRegressor()
X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
y = np.array([2, 6, 12, 20, 30, 42])
er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])

mod_preds=er.fit(X, y).predict(X)

assert all(vanilla_preds==mod_preds)

In [13]:
# Modified VotingClassifier with all fitted estimators
r1 = LinearRegression()
r2 = RandomForestRegressor(n_estimators=10, random_state=1)
r3 = KNeighborsRegressor()
X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
y = np.array([2, 6, 12, 20, 30, 42])

for est in [r1,r2,r3]:
    est.fit(X,y)

er = VotingRegressor(fitted_estimators=[('lr', r1), ('rf', r2), ('r3', r3)])

mod_preds=er.fit().predict(X)

assert all(vanilla_preds==mod_preds)

In [14]:
# Modified VotingClassifier with mix of unfitted and fitted estimators
r1 = LinearRegression()
r2 = RandomForestRegressor(n_estimators=10, random_state=1)
r3 = KNeighborsRegressor()
X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
y = np.array([2, 6, 12, 20, 30, 42])

for est in [r1,r2]:
    est.fit(X,y)

er = VotingRegressor(fitted_estimators=[('lr', r1), ('rf', r2)],unfitted_estimators=[('r3', r3)],n_jobs=-1)

mod_preds=er.fit(X,y).predict(X)

assert all(vanilla_preds==mod_preds)