## Exercise 1: Find the best three classifier in the stacking method

In [55]:
import numpy as np
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.datasets import load_iris
from itertools import combinations

iris = load_iris()

data_set = iris.data[0:len(iris.target)-20,:]
labels = iris.target[0:len(iris.target)-20]
unique_labels = np.unique(iris.target)

test_data_set = iris.data[-20:,:]
test_labels = iris.target[-20:]

In [56]:
def build_classifiers():
    listOfClasifiers = [LinearRegression(), KNeighborsClassifier(), SVC(), DecisionTreeClassifier(), GaussianNB(),QuadraticDiscriminantAnalysis()]
    return [clasifier.fit(data_set, labels) for clasifier in listOfClasifiers]

In [57]:
def build_stacked_classifier(classifiers):
    output = []
    for classifier in classifiers:
        output.append(classifier.predict(data_set))
    output = np.array(output).reshape((130,3))
    
    # stacked classifier part:
    stacked_classifier = DecisionTreeClassifier() # set here
    stacked_classifier.fit(output.reshape((130,3)), labels.reshape((130,)))
    test_set = []
    for classifier in classifiers:
        test_set.append(classifier.predict(test_data_set))
    test_set = np.array(test_set).reshape((len(test_set[0]),3))
    predicted = stacked_classifier.predict(test_set)
    return predicted

In [58]:
classifiers = build_classifiers()
combinations_results = []
for combination in combinations(classifiers, 3):
  predicted = build_stacked_classifier(combination)
  accuracy = accuracy_score(test_labels, predicted)
  combinations_results.append([combination, accuracy])

maximumValue = max([j[1], index] for index, j in enumerate(combinations_results))
bestResult = myResults[maximumValue[1]]
print(bestResult)

[(KNeighborsClassifier(), DecisionTreeClassifier(), QuadraticDiscriminantAnalysis()), 0.85]


## Exercise 2:

In [61]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier

# prepare data set

def generate_data(sample_number, feature_number, label_number):
    data_set = np.random.random_sample((sample_number, feature_number))
    labels = np.random.choice(label_number, sample_number)
    return data_set, labels

labels = 2
dimension = 2
test_set_size = 1000
train_set_size = 5000
train_set, train_labels = generate_data(train_set_size, dimension, labels)
test_set, test_labels = generate_data(test_set_size, dimension, labels)

# init weights
number_of_iterations = 10
weights = np.ones((test_set_size,)) / test_set_size


def train_model(classifier, weights):
    return classifier.fit(X=test_set, y=test_labels, sample_weight=weights)

def calculate_error(model):
    predicted = model.predict(test_set)
    I=calculate_accuracy_vector(predicted, test_labels)
    Z=np.sum(I)
    return (1+Z)/1.0

In [64]:
def calc_accuracy(predicted, labels):
    result = []

    for i in range(len(predicted)):
      result.append(0) if predicted[i] == labels[i] else result.append(1)

    return result

def set_new_weights(model):
    accuracy_vector = np.array(calc_accuracy(model.predict(test_set), test_labels))
    return (accuracy_vector + 1) / accuracy_vector.sum()

In [65]:
classifier = DecisionTreeClassifier(max_depth=1, random_state=1)
classifier.fit(X=train_set, y=train_labels)
alphas = []
classifiers = []
for iteration in range(number_of_iterations):
    model = train_model(classifier, weights)
    weights = set_new_weights(model)
    classifiers.append(model)

print(weights)


validate_x, validate_label = generate_data(1, dimension, labels)

[0.00208768 0.00208768 0.00208768 0.00208768 0.00417537 0.00417537
 0.00417537 0.00417537 0.00417537 0.00208768 0.00417537 0.00208768
 0.00208768 0.00208768 0.00417537 0.00417537 0.00208768 0.00417537
 0.00208768 0.00208768 0.00417537 0.00417537 0.00208768 0.00417537
 0.00208768 0.00417537 0.00417537 0.00208768 0.00208768 0.00417537
 0.00208768 0.00208768 0.00417537 0.00208768 0.00417537 0.00208768
 0.00417537 0.00417537 0.00208768 0.00417537 0.00417537 0.00208768
 0.00208768 0.00208768 0.00208768 0.00208768 0.00208768 0.00417537
 0.00208768 0.00208768 0.00208768 0.00417537 0.00208768 0.00417537
 0.00417537 0.00417537 0.00417537 0.00208768 0.00208768 0.00208768
 0.00208768 0.00208768 0.00208768 0.00417537 0.00417537 0.00417537
 0.00208768 0.00417537 0.00417537 0.00417537 0.00417537 0.00208768
 0.00417537 0.00417537 0.00417537 0.00208768 0.00417537 0.00417537
 0.00417537 0.00208768 0.00417537 0.00417537 0.00208768 0.00417537
 0.00208768 0.00417537 0.00417537 0.00208768 0.00417537 0.0020

In [50]:
validate_x, validate_label = generate_data(1, dimension, labels)

In [66]:
def get_prediction(x):
    output = []
    predicted = []
    
    for classifier in classifiers:
        output.append(classifier.predict(x))
        
    output = np.array(output)
    
    for i in range(len(x)):
        classified = output[:, i]
        counts = np.bincount(classified)
        predicted.append(np.argmax(counts))
        
    return predicted

In [67]:
prediction = get_prediction(validate_x)[0]

print(prediction)

1
