# Ensemble methods. Exercises


In this section we have only one exercise:

1. Find the best three classifier in the stacking method using the classifiers from scikit-learn package, such as:


* Linear regression,
* Nearest Neighbors,
* Linear SVM,
* Decision Tree,
* Naive Bayes,
* QDA.

In [1]:
%store -r data_set
%store -r labels
%store -r test_data_set
%store -r test_labels
%store -r unique_labels

## Exercise 1: Find the best three classifier in the stacking method

In [2]:
import numpy as np
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

import itertools

In [3]:
def build_classifiers():
    
    # fill this part
    linear_regression = LinearRegression()
    linear_regression.fit(data_set, labels)
    
    neighbors = KNeighborsClassifier(3)
    neighbors.fit(data_set, labels)
    
    svm = SVC(gamma='auto')
    svm.fit(data_set, labels)
    
    decision_tree = DecisionTreeClassifier()
    decision_tree.fit(data_set, labels)
    
    naive_bayes = GaussianNB()
    naive_bayes.fit(data_set, labels)
    
    qda = QuadraticDiscriminantAnalysis()
    qda.fit(data_set, labels)

    return linear_regression, neighbors, svm, decision_tree, naive_bayes, qda

In [10]:
def build_stacked_classifier(classifiers):
    classifiers_three_list = list(itertools.combinations(classifiers, 3))
    best_accuracy = 0
    best_classifiers_three = []
    best_stacked_classifier = None
    
    for classifiers_three in classifiers_three_list:
        output = []
        for classifier in classifiers_three:
            output.append(classifier.predict(data_set))
        output = np.array(output).reshape((130,3))
    
        # stacked classifier part:
        stacked_classifiers_list = (DecisionTreeClassifier(), KNeighborsClassifier(3), 
                                    SVC(gamma='auto'))
        
        for stacked_classifier in stacked_classifiers_list:
            #stacked_classifier = None # set here
            stacked_classifier.fit(output.reshape((130,3)), labels.reshape((130,)))
            stacked_predicted = stacked_classifier.predict(output)
            accuracy = accuracy_score(labels, stacked_predicted)
            #print(stacked_predicted, labels, accuracy)
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_classifiers_three = classifiers_three
                best_stacked_classifier = stacked_classifier
                
    test_set = []
    for classifier in best_classifiers_three:
        test_set.append(classifier.predict(test_data_set))
    test_set = np.array(test_set).reshape((len(test_set[0]),3))
    predicted = best_stacked_classifier.predict(test_set)
    
    print("BEST CLASSIFIERS:")
    for classifier in best_classifiers_three:
        print(classifier)
        
    print("BEST STACKED CLASSIFIER:")
    print(best_stacked_classifier)
    
    return predicted

In [16]:
classifiers = build_classifiers()
predicted = build_stacked_classifier(classifiers)
accuracy = accuracy_score(test_labels, predicted)
print(accuracy)

BEST CLASSIFIERS:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform')
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')
BEST STACKED CLASSIFIER:
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')
0