In [17]:
# load datasets
>>> from sklearn import datasets
>>> import numpy as np
>>> iris = datasets.load_iris()
>>> X = iris.data[:, [0,1,2,3]]
>>> y = iris.target
>>> print('Class labels:', np.unique(y))

Class labels: [0 1 2]


In [18]:
# splitting and standardizing the data
>>> from sklearn.model_selection import train_test_split
>>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)
>>> from sklearn.preprocessing import StandardScaler
>>> sc = StandardScaler()
>>> sc.fit(X_train)
>>> X_train_std = sc.transform(X_train)
>>> X_test_std = sc.transform(X_test)

In [19]:
# creating Adaboost classifier and training the data
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> ada = AdaBoostClassifier(n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 0.93
test accuracy: 0.93
[[35  0  0]
 [ 0 35  0]
 [ 0  7 28]]
[[15  0  0]
 [ 0 15  0]
 [ 0  3 12]]


In [20]:
# creating Adaboost classifier with Decision tree classifier at base with gini impurity and depth of 1  and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='gini',max_depth = 1, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 0.93
test accuracy: 0.93
[[35  0  0]
 [ 0 35  0]
 [ 0  7 28]]
[[15  0  0]
 [ 0 15  0]
 [ 0  3 12]]


In [21]:
# creating Adaboost classifier with Decision tree classifier at base with gini impurity and depth of 2 and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='gini',max_depth = 2, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 1.00
test accuracy: 0.93
[[35  0  0]
 [ 0 35  0]
 [ 0  0 35]]
[[15  0  0]
 [ 0 13  2]
 [ 0  1 14]]


In [22]:
# creating Adaboost classifier with Decision tree classifier at base with gini impurity and depth of 3 and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='gini',max_depth = 3, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 1.00
test accuracy: 0.93
[[35  0  0]
 [ 0 35  0]
 [ 0  0 35]]
[[15  0  0]
 [ 0 13  2]
 [ 0  1 14]]


In [23]:
# creating Adaboost classifier with Decision tree classifier at base with gini impurity and depth of 4 and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='gini',max_depth = 4, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 1.00
test accuracy: 0.96
[[35  0  0]
 [ 0 35  0]
 [ 0  0 35]]
[[15  0  0]
 [ 0 14  1]
 [ 0  1 14]]


In [24]:
# creating Adaboost classifier with Decision tree classifier at base with gini impurity and depth of 5 and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='gini',max_depth = 5, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 1.00
test accuracy: 0.98
[[35  0  0]
 [ 0 35  0]
 [ 0  0 35]]
[[15  0  0]
 [ 0 15  0]
 [ 0  1 14]]


In [25]:
# creating Adaboost classifier with Decision tree classifier at base with gini impurity and depth of 6 and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='gini',max_depth = 6, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 1.00
test accuracy: 0.98
[[35  0  0]
 [ 0 35  0]
 [ 0  0 35]]
[[15  0  0]
 [ 0 15  0]
 [ 0  1 14]]


In [26]:
# creating Adaboost classifier with Decision tree classifier at base with entropy impurity and depth of 1  and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='entropy',max_depth = 1, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 0.93
test accuracy: 0.93
[[35  0  0]
 [ 0 35  0]
 [ 0  7 28]]
[[15  0  0]
 [ 0 15  0]
 [ 0  3 12]]


In [27]:
# creating Adaboost classifier with Decision tree classifier at base with entropy impurity and depth of 2 and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='entropy',max_depth = 2, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 1.00
test accuracy: 0.96
[[35  0  0]
 [ 0 35  0]
 [ 0  0 35]]
[[15  0  0]
 [ 0 14  1]
 [ 0  1 14]]


In [28]:
# creating Adaboost classifier with Decision tree classifier at base with entropy impurity and depth of 3 and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='entropy',max_depth = 3, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 1.00
test accuracy: 0.96
[[35  0  0]
 [ 0 35  0]
 [ 0  0 35]]
[[15  0  0]
 [ 0 14  1]
 [ 0  1 14]]


In [29]:
# creating Adaboost classifier with Decision tree classifier at base with entropy impurity and depth of 4 and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='entropy',max_depth = 4, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 1.00
test accuracy: 0.98
[[35  0  0]
 [ 0 35  0]
 [ 0  0 35]]
[[15  0  0]
 [ 0 15  0]
 [ 0  1 14]]


In [30]:
# creating Adaboost classifier with Decision tree classifier at base with entropy impurity and depth of 5 and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='entropy',max_depth = 5, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 1.00
test accuracy: 0.98
[[35  0  0]
 [ 0 35  0]
 [ 0  0 35]]
[[15  0  0]
 [ 0 15  0]
 [ 0  1 14]]


In [31]:
# creating Adaboost classifier with Decision tree classifier at base with entropy impurity and depth of 6 and training the data
>>> from sklearn.tree import DecisionTreeClassifier
>>> from sklearn.ensemble  import AdaBoostClassifier 
>>> tree = DecisionTreeClassifier(criterion='entropy',max_depth = 6, random_state=1)
>>> ada = AdaBoostClassifier(base_estimator = tree,n_estimators=500,learning_rate = 0.1, random_state=1)
>>> ada.fit(X_train_std, y_train)
>>> y_pred_1= ada.predict(X_train_std)
>>> y_pred_2 = ada.predict(X_test_std)
>>> from sklearn.metrics import accuracy_score
>>> print('train accuracy: %.2f' % accuracy_score(y_train, y_pred_1))
>>> print('test accuracy: %.2f' % accuracy_score(y_test,y_pred_2))
>>> from sklearn.metrics import confusion_matrix
>>> CM_1 = confusion_matrix(y_train,y_pred_1)
>>> print(CM_1)
>>> CM_2 = confusion_matrix(y_test,y_pred_2)
>>> print(CM_2)

train accuracy: 1.00
test accuracy: 0.96
[[35  0  0]
 [ 0 35  0]
 [ 0  0 35]]
[[15  0  0]
 [ 0 15  0]
 [ 0  2 13]]


Comparision between Decision tree and adaboost classifiers

1. In comparison with Decision tree classifier,the training accuracy of AdaBoost classifier is 100 in all the cases except in the case where the maximum depth of base classifier(Decision tree)is 1.This justifies the  key concept behind boosting which is to let the weak learners subsequently learn from misclassified training samples to improve the performance of the ensemble.
2. The AdaBoost classifier fits the test data well even when the depth of the base classifier(Decision  tree -- gini impurity ) is 1. The test accuracy keeps on increasing as we increase the depth of the base classifier. However,the Adaboost classifier doesn't fit the test data well in the remaining cases where the depths range from 2 to 6 when compared to Decision tree classifier.
3. Also, in case of the entropy impurity of the base classifier, the test accuracy is high when the depth is 1. However, the train accuracy doesn't increase steadily as we increase the depth of base classifier.
4. In contrast with the gini impurity case, the test accuracy of AdaBoost classifier is almost similar to that of Decisiontree classifier in entropy impurity. 
5. As in almost all the cases, the test data doesn't fit effectively as the train data which leads to overfitting. This is because the base classifier we used turns out to be a complex one.