In [1]:
# Import All the required packages from sklearn
import numpy as np
from sklearn import model_selection
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

In [2]:
#Load data 
iris = load_iris()
X = iris.data
Y = iris.target

In [16]:
#Split data in training and testing set 
X_train, X_test, Y_train, Y_test= model_selection.train_test_split( X, Y, test_size=0.30, random_state=1 )


In [4]:

#Create random sub sample to train multiple models
seed = 7
kfold = model_selection.KFold(n_splits=10, random_state=seed)

In [5]:
#Define a decision tree classifier
cart = DecisionTreeClassifier()
num_trees = 100


In [6]:
#Create classification model for bagging
model = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed)


In [7]:
#Train different models and print their accuracy
results = model_selection.cross_val_score(model, X_fit, y_fit,cv=kfold)
for i in range(len(results)):
    print("Model: "+str(i)+" Accuracy is: "+str(results[i]))
    
print("Mean Accuracy is: "+str(results.mean()))

model.fit(X_fit, y_fit)
pred_label = model.predict(X_eval)
nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)
acc = 100*nnz/np.shape(y_test)[0]
print('accuracy is: '+str(acc))

#############################################

Model: 0 Accuracy is: 1.0
Model: 1 Accuracy is: 1.0
Model: 2 Accuracy is: 1.0
Model: 3 Accuracy is: 0.9090909090909091
Model: 4 Accuracy is: 1.0
Model: 5 Accuracy is: 1.0
Model: 6 Accuracy is: 0.9
Model: 7 Accuracy is: 1.0
Model: 8 Accuracy is: 1.0
Model: 9 Accuracy is: 0.7
Mean Accuracy is: 0.9509090909090908
accuracy is: 95.55555555555556


In [8]:

#Define a decision tree classifier
KNN = KNeighborsClassifier()
#base classifier is KNN
#Create classification model for bagging
model = BaggingClassifier(KNeighborsClassifier(),
			max_samples=0.5, max_features=0.5)
#0.5 is 50%

#Train different models and print their accuracy
results = model_selection.cross_val_score(model, X_fit, y_fit,cv=kfold)
for i in range(len(results)):
    print("Model KNN: "+str(i)+" Accuracy is: "+str(results[i]))
    
print("Mean Accuracy is: "+str(results.mean()))

model.fit(X_fit, y_fit)
pred_label = model.predict(X_eval)
nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)
acc = 100*nnz/np.shape(y_test)[0]
print('accuracy SVC is: '+str(acc))


Model KNN: 0 Accuracy is: 1.0
Model KNN: 1 Accuracy is: 0.9090909090909091
Model KNN: 2 Accuracy is: 0.9090909090909091
Model KNN: 3 Accuracy is: 0.9090909090909091
Model KNN: 4 Accuracy is: 1.0
Model KNN: 5 Accuracy is: 0.9
Model KNN: 6 Accuracy is: 0.8
Model KNN: 7 Accuracy is: 1.0
Model KNN: 8 Accuracy is: 1.0
Model KNN: 9 Accuracy is: 1.0
Mean Accuracy is: 0.9427272727272727
accuracy SVC is: 95.55555555555556


In [9]:
#Define a decision tree classifier
nb = GaussianNB()


#Create classification model for bagging
model = BaggingClassifier(cart,
			max_samples=0.5, max_features=0.5)

#Train different models and print their accuracy
results = model_selection.cross_val_score(model, X_fit, y_fit,cv=kfold)
for i in range(len(results)):
    print("Model NB: "+str(i)+" Accuracy is: "+str(results[i]))
    
print("Mean Accuracy is: "+str(results.mean()))

model.fit(X_fit, y_fit)
pred_label = model.predict(X_eval)
nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)
acc = 100*nnz/np.shape(y_test)[0]
print('accuracy SVC is: '+str(acc))

#############################################

Model NB: 0 Accuracy is: 1.0
Model NB: 1 Accuracy is: 0.8181818181818182
Model NB: 2 Accuracy is: 0.9090909090909091
Model NB: 3 Accuracy is: 0.8181818181818182
Model NB: 4 Accuracy is: 1.0
Model NB: 5 Accuracy is: 1.0
Model NB: 6 Accuracy is: 0.9
Model NB: 7 Accuracy is: 1.0
Model NB: 8 Accuracy is: 0.9
Model NB: 9 Accuracy is: 0.8
Mean Accuracy is: 0.9145454545454547
accuracy SVC is: 95.55555555555556


In [10]:
#Define a Logistic Regression
lr = LogisticRegression()


#Create classification model for bagging
model = BaggingClassifier(cart,max_samples=0.5, max_features=0.5)

#Train different models and print their accuracy
results = model_selection.cross_val_score(model, X_fit, y_fit,cv=kfold)
for i in range(len(results)):
    print("Model Logistic: "+str(i)+" Accuracy is: "+str(results[i]))
    
    
print("Mean Accuracy is: "+str(results.mean()))

model.fit(X_fit, y_fit)
pred_label = model.predict(X_eval)
nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)
acc = 100*nnz/np.shape(y_test)[0]
print('accuracy SVC is: '+str(acc))

Model Logistic: 0 Accuracy is: 1.0
Model Logistic: 1 Accuracy is: 1.0
Model Logistic: 2 Accuracy is: 0.8181818181818182
Model Logistic: 3 Accuracy is: 0.9090909090909091
Model Logistic: 4 Accuracy is: 1.0
Model Logistic: 5 Accuracy is: 0.9
Model Logistic: 6 Accuracy is: 0.9
Model Logistic: 7 Accuracy is: 1.0
Model Logistic: 8 Accuracy is: 1.0
Model Logistic: 9 Accuracy is: 0.9
Mean Accuracy is: 0.9427272727272727
accuracy SVC is: 97.77777777777777


#  Voting CLassifier

In [11]:
from sklearn.ensemble import VotingClassifier

In [12]:
model=VotingClassifier(estimators=[('lr',lr),('nb',nb),('KNN',KNN)],voting='hard')
     #weights=[0.3,0.7]
    #can ve different models with different parametersnt
    
    #do fit, predict and confusion matric for model and pri

In [13]:
results=model_selection.cross_val_score(model, X, Y,cv=kfold)



In [14]:
print(results.mean())

0.9466666666666667


# Ada Boost

In [27]:
from sklearn.ensemble import AdaBoostClassifier

In [28]:
Ada=AdaBoostClassifier(n_estimators=10)
#n_estimators==number of iterations
#learning_rate=1
model = Ada.fit(X_train, Y_train)

y_pred = model.predict(X_test)

In [29]:
from sklearn.metrics import accuracy_score
print("Accuracy:",accuracy_score(Y_test, y_pred))

Accuracy: 0.9555555555555556


In [31]:
Ada.estimator_errors_  #error for each iterations

array([0.3047619 , 0.05616791, 0.33336094, 0.01614931, 0.33370025,
       0.00177932, 0.33335032, 0.00172832, 0.33344449, 0.00057907])