Code from: https://www.analyticsvidhya.com/blog/2018/06/comprehensive-guide-for-ensemble-models/?utm_source=feedburner&utm_medium=email&utm_campaign=Feed%3A+AnalyticsVidhya+%28Analytics+Vidhya%29 (with minor changes)

## Simple Ensemble Techniques
### Max Voting Ensemble

In [60]:
from sklearn.datasets import load_iris
import numpy as np

# Load dataset and define train and test
iris = load_iris()
x = iris.data
y = iris.target

test_ratio = 0.3
instances = x.shape[0]
test_instances = int(instances * test_ratio)

index_arr = np.arange(instances)
np.random.shuffle(index_arr)

random_test_inst = index_arr[:test_instances]
random_train_inst = index_arr[test_instances:]

x_train = x[random_train_inst]
y_train = y[random_train_inst]
x_test = x[random_test_inst]
y_test = y[random_test_inst]

In [61]:
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from scipy.stats import mode

# Train models and perform prediction
model1 = tree.DecisionTreeClassifier()
model2 = KNeighborsClassifier()
model3 = LogisticRegression()

model1.fit(x_train,y_train)
model2.fit(x_train,y_train)
model3.fit(x_train,y_train)

pred1=model1.predict(x_test)
pred2=model2.predict(x_test)
pred3=model3.predict(x_test)

# Max Voting Ensemble
final_pred = np.array([])
for i in range(0,len(x_test)):
    final_pred = np.append(final_pred, mode([pred1[i], pred2[i], pred3[i]])[0])

In [62]:
from sklearn.metrics import accuracy_score

accuracy_ens = accuracy_score(y_test, final_pred)
print(accuracy_ens)

accuracy_model1 = accuracy_score(y_test, pred1)
print(accuracy_model1)

accuracy_model2 = accuracy_score(y_test, pred2)
print(accuracy_model2)

accuracy_model3 = accuracy_score(y_test, pred3)
print(accuracy_model3)

0.9555555555555556
0.9333333333333333
0.9333333333333333
0.9777777777777777


#### Using 'VotingClassifier' module

In [63]:
from sklearn.ensemble import VotingClassifier
model1 = LogisticRegression(random_state=1)
model2 = tree.DecisionTreeClassifier(random_state=1)
model = VotingClassifier(estimators=[('lr', model1), ('dt', model2)], voting='hard')
model.fit(x_train,y_train)
model.score(x_test,y_test)

  if diff:


0.9333333333333333

### Averaging

In [65]:
model1 = tree.DecisionTreeClassifier()
model2 = KNeighborsClassifier()
model3= LogisticRegression()

model1.fit(x_train,y_train)
model2.fit(x_train,y_train)
model3.fit(x_train,y_train)

pred1=model1.predict_proba(x_test)
pred2=model2.predict_proba(x_test)
pred3=model3.predict_proba(x_test)

finalpred=(pred1+pred2+pred3)/3

### Weighted Average

In [66]:
model1 = tree.DecisionTreeClassifier()
model2 = KNeighborsClassifier()
model3= LogisticRegression()

model1.fit(x_train,y_train)
model2.fit(x_train,y_train)
model3.fit(x_train,y_train)

pred1=model1.predict_proba(x_test)
pred2=model2.predict_proba(x_test)
pred3=model3.predict_proba(x_test)

finalpred=(pred1*0.3+pred2*0.3+pred3*0.4)

## Advanced Ensemble Techniques
### Stacking

In [106]:
from sklearn.model_selection import StratifiedKFold

def Stacking(model, train, y, test, n_fold):
   folds = StratifiedKFold(n_splits=n_fold, random_state=1)
   test_pred = np.empty((0, 1), float)
   train_pred = np.empty((0, 1), float)

   for train_indices, val_indices in folds.split(train, y):
      x_train, x_val = train[train_indices], train[val_indices]
      y_train, y_val = y[train_indices], y[val_indices]

      model.fit(X=x_train, y=y_train)
      train_pred = np.append(train_pred, model.predict(x_val))
   
   test_pred = np.append(test_pred, model.predict(test))
   return test_pred.reshape(-1,1), train_pred

In [109]:
import pandas as pd
model1 = tree.DecisionTreeClassifier(random_state=1)

test_pred1, train_pred1 = Stacking(model=model1, n_fold=10, train=x_train, test=x_test, y=y_train)

train_pred1 = pd.DataFrame(train_pred1)
test_pred1 = pd.DataFrame(test_pred1)

In [111]:
model2 = KNeighborsClassifier()

test_pred2 ,train_pred2=Stacking(model=model2,n_fold=10,train=x_train,test=x_test,y=y_train)

train_pred2=pd.DataFrame(train_pred2)
test_pred2=pd.DataFrame(test_pred2)

In [112]:
# Logistic regression model on the predictions of DT and KNN
df = pd.concat([train_pred1, train_pred2], axis=1)
df_test = pd.concat([test_pred1, test_pred2], axis=1)

model = LogisticRegression(random_state=1)
model.fit(df,y_train)
model.score(df_test, y_test)

0.6222222222222222

### Blending