# Ensemble methods. Exercises


In this section we have only one exercise:

1. Find the best three classifier in the stacking method using the classifiers from scikit-learn package, such as:


* Linear regression,
* Nearest Neighbors,
* Linear SVM,
* Decision Tree,
* Naive Bayes,
* QDA.

In [1]:
%store -r data_set
%store -r labels
%store -r test_data_set
%store -r test_labels
%store -r unique_labels

## Exercise 1: Find the best three classifier in the stacking method

In [2]:
import numpy as np
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

import re

In [3]:
def build_classifiers():
    
    regression = LinearRegression()
    
    neighbors = KNeighborsClassifier()
    
    svc = SVC(gamma='auto')
    
    tree = DecisionTreeClassifier()
    
    bayes = GaussianNB()
    
    qda = QuadraticDiscriminantAnalysis()
   
    return regression, neighbors, svc, tree, bayes, qda

In [4]:
def build_stacked_classifier(classifiers,stacked_classifier):
    
    output = []
    for classifier in classifiers:
        classifier.fit(data_set,labels)
        output.append(classifier.predict(data_set))
    output = np.array(output).reshape((130,len(classifiers)))
    
    # stacked classifier part:
    stacked_classifier.fit(output, labels.reshape((130,)))
    test_set = []
    for classifier in classifiers:
        test_set.append(classifier.predict(test_data_set))
    test_set = np.array(test_set).reshape((len(test_set[0]),len(classifiers)))
    predicted=np.array([int(round(i)) for i in stacked_classifier.predict(test_set)])
    
    return predicted

In [5]:
def classifier_name(name):
    if name=='LinearRegression':
        return 'Regression'
    if name=='SVC':
        return 'SVC'
    if name=='DecisionTreeClassifier':
        return 'Tree'
    if name=='GaussianNB':
        return 'Bayes'
    if name=='KNeighborsClassifier':
        return 'KNeighbours'
    if name=='QuadraticDiscriminantAnalysis':
        return 'QDA'

In [6]:
def classifier_names(classifiers_three,stacked_classifier):
    three = [classifier_name(re.split('\(',str(classifier))[0]) for classifier in classifiers_three]
    stacked = classifier_name(re.split('\(',str(stacked_classifier))[0])
    return str(three)+' '+stacked

In [10]:
import itertools

classifiers = build_classifiers()

results=[]

for subset in itertools.combinations(classifiers,4):
    for stacked_classifier in subset:
        classifiers_three=list(subset)
        classifiers_three.remove(stacked_classifier)
        
        predicted=build_stacked_classifier(classifiers_three,stacked_classifier)
        accuracy=accuracy_score(test_labels,predicted)
        names=classifier_names(classifiers_three,stacked_classifier)
        results.append([accuracy,names])
        
results=sorted(results)

print('Accuracy\t[Classifiers] Stacked classifier\n')
for result in results:
    print(str(result[0])+'\t'+result[1])

Accuracy	[Classifiers] Stacked classifier

0.0	['KNeighbours', 'Bayes', 'QDA'] Regression
0.0	['KNeighbours', 'SVC', 'Bayes'] Regression
0.0	['KNeighbours', 'SVC', 'Bayes'] Tree
0.0	['KNeighbours', 'SVC', 'QDA'] Regression
0.0	['KNeighbours', 'SVC', 'Tree'] Regression
0.0	['KNeighbours', 'Tree', 'Bayes'] QDA
0.0	['KNeighbours', 'Tree', 'Bayes'] Regression
0.0	['KNeighbours', 'Tree', 'Bayes'] SVC
0.0	['KNeighbours', 'Tree', 'QDA'] Regression
0.0	['Regression', 'Bayes', 'QDA'] KNeighbours
0.0	['Regression', 'KNeighbours', 'Bayes'] SVC
0.0	['Regression', 'KNeighbours', 'Tree'] SVC
0.0	['Regression', 'SVC', 'Bayes'] KNeighbours
0.0	['Regression', 'SVC', 'QDA'] KNeighbours
0.0	['Regression', 'SVC', 'Tree'] KNeighbours
0.0	['Regression', 'Tree', 'Bayes'] KNeighbours
0.0	['Regression', 'Tree', 'Bayes'] SVC
0.0	['Regression', 'Tree', 'QDA'] KNeighbours
0.0	['SVC', 'Bayes', 'QDA'] KNeighbours
0.0	['SVC', 'Bayes', 'QDA'] Regression
0.0	['SVC', 'Tree', 'Bayes'] KNeighbours
0.0	['SVC', 'Tree', 'Ba

