# Bagged Decision Trees for Classification

In [2]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [3]:
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass','pedi', 'age', 'class']
dataframe = pd.read_csv('pima-indians-diabetes.data.csv', names=names)
array = dataframe.values

X = array[:,0:8]
Y = array[:,8]
seed = 7

In [5]:
kfold = KFold(n_splits=10)
cart = DecisionTreeClassifier()
num_trees = 100
model = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed)
results = cross_val_score(model, X, Y, cv=kfold)
print(results.mean())

0.7720437457279563


# Random Forest Classification

In [6]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier

In [7]:
X = array[:,0:8]
Y = array[:,8]

num_trees = 100
max_features = 3
kfold = KFold(n_splits=10)
model = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)
results = cross_val_score(model, X, Y, cv=kfold)
results

array([0.63636364, 0.85714286, 0.71428571, 0.62337662, 0.77922078,
       0.83116883, 0.81818182, 0.83116883, 0.68421053, 0.76315789])

In [8]:
print(results.mean())

0.7538277511961723


# AdaBoost Classification

In [9]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.ensemble import AdaBoostClassifier

In [13]:
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass','pedi', 'age', 'class']
dataframe = pd.read_csv('pima-indians-diabetes.data.csv', names=names)
array = dataframe.values

X = array[:,0:8]
Y = array[:,8]

num_trees = 30
seed = 7

In [14]:
kfold = KFold(n_splits=10)
model = AdaBoostClassifier(n_estimators=num_trees,random_state=seed)
results = cross_val_score(model, X, Y, cv=kfold)
results

array([0.67532468, 0.80519481, 0.7012987 , 0.68831169, 0.75324675,
       0.80519481, 0.79220779, 0.83116883, 0.73684211, 0.81578947])

In [15]:
print(results.mean())

0.760457963089542


# Stacking Ensemble for Classification

In [16]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

In [17]:
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass','pedi', 'age', 'class']
dataframe = pd.read_csv('pima-indians-diabetes.data.csv', names=names)
array = dataframe.values

X = array[:,0:8]
Y = array[:,8]


In [18]:
kfold = KFold(n_splits=10)

# create the submodels
estimators = []

model1 = LogisticRegression(max_iter=500)
estimators.append(('logistic', model1))

model2 = DecisionTreeClassifier()
estimators.append(('cart', model2))

model3 = SVC()
estimators.append(('svm',model3))

# create the ensemble model
ensemble = VotingClassifier(estimators)
results = cross_val_score(ensemble, X, Y, cv=kfold)
results

array([0.64935065, 0.80519481, 0.72727273, 0.64935065, 0.80519481,
       0.80519481, 0.83116883, 0.85714286, 0.75      , 0.77631579])

In [19]:
print(results.mean())

0.7656185919343814
