In [1]:
import pandas as pd 
from pandas import read_csv
from sklearn.model_selection import KFold          # For getting samples of data
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier      # Base learner will be decision tree
import warnings
warnings.filterwarnings('ignore')

## Bagging Tree Classifier

In [2]:
filename = "pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename,names=names)
array = dataframe.values
x = array[:,0:8]
y = array[:,8]

In [30]:
kfold = KFold(n_splits = 10, random_state = 7, shuffle = True)

In [4]:
# Creating a base learners from sample data
cart = DecisionTreeClassifier()          

In [5]:
# Building the model using baggingclassifier

In [6]:
num_trees = 150            # Number of decision trees we want to build 

model = BaggingClassifier(base_estimator = cart, n_estimators = num_trees, random_state = 7)

In [7]:
# Calculating the accuarcy of the model, as number of folds we have considered is 10 so 10 accuracies will be created and at the end average of those will be taken

results = cross_val_score(model, x, y, cv = kfold)

In [8]:
# Calculating the average of all the accuracies

results.mean()*100

76.3021189336979

Inference: Accuracy depends on the number of trees we mention

## Random Forest classifier

In [9]:
from sklearn.model_selection import KFold          # For getting samples of data
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

In [10]:
x = array[:,0:8]
y = array[:,8]

In [11]:
kfold = KFold(n_splits = 10, random_state = 7, shuffle = True)

In [12]:
max_features = 3        # At a time 3 features will be selected
num_trees = 150
model_r = RandomForestClassifier(n_estimators = num_trees, max_features = max_features)

In [13]:
results = cross_val_score(model_r, x, y, cv = kfold)

In [14]:
results.mean()*100

77.21462747778537

## Adaboost Classification

In [15]:
from sklearn.model_selection import KFold          # For getting samples of data
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier

In [16]:
x = array[:,0:8]
y = array[:,8]

In [17]:
kfold = KFold(n_splits = 10, random_state = 7, shuffle = True)

In [18]:
num_trees = 10                 # Here I have selected less no. of trees because as it is a sequential ensemble technique so it is anyhow time consuming and if we mention 100 trees and all it will be very tedious

model_ad = AdaBoostClassifier(n_estimators = num_trees, random_state = 7)

In [19]:
results = cross_val_score(model_ad, x, y, cv = kfold)

In [20]:
results.mean()*100


76.69685577580314

## Stacking Ensemble for Classification

In [21]:
from sklearn.model_selection import KFold          # For getting samples of data
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier      # For stacking ensemble we will use voting classifier

In [22]:
x = array[:,0:8]
y = array[:,8]

In [23]:
kfold = KFold(n_splits = 10, random_state = 7, shuffle = True)

In [25]:
# As in stacking our base learners are going to be different so here we will create a list of estimators first.

estimators = []
model1 = LogisticRegression(max_iter = 500)
estimators.append(('LogisticRegression',model1))
model2 = DecisionTreeClassifier()
estimators.append(('DecisionTree',model2))
model3 = SVC()
estimators.append(('SVC',model3))

In [26]:
# Building Final model using the estimators

model_stack = VotingClassifier(estimators)

In [27]:
results = cross_val_score(model_stack, x, y, cv = kfold)

In [28]:
results.mean()*100

77.21633629528367