In [1]:
import numpy as np

from scipy import stats
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score

In [2]:
def load_data(path):
    x_train = np.loadtxt(path.format('x_tr'), delimiter=',', skiprows=1)
    x_test = np.loadtxt(path.format('x_tst'), delimiter=',', skiprows=1)
    y_train = np.loadtxt(path.format('y_tr'), delimiter=',', skiprows=1)
    y_test = np.loadtxt(path.format('y_tst'), delimiter=',', skiprows=1)
    
    return x_train, x_test, y_train, y_test

In [3]:
# dataset data3

data1 = load_data('csv/data_{}.csv')
data2 = load_data('csv/data2_{}.csv')
data3 = load_data('csv/data3_{}.csv')

# Bagging

## Task 1 - Ensemble kNN

In [4]:
class Ensemble:
    def __init__(self, classifiers):
        self.classifiers = classifiers
        
    def fit(self, x, y):
        for clf in self.classifiers:
            clf.fit(*resample(x, y))
            
    def predict(self, x):
        predictions = np.array([clf.predict(x) for clf in self.classifiers])
        return stats.mode(predictions).mode[0]

In [5]:
x_train, x_test, y_train, y_test = data3

classifiers = [KNeighborsClassifier(n_neighbors=k) for k in [1,3,5,7,9,11,15]]
ensemble = Ensemble(classifiers)
ensemble.fit(x_train, y_train)

predicted = ensemble.predict(x_test)
f1 = f1_score(y_test, predicted, average='micro')
print('Ensemble F1 score:', f1)

Ensemble F1 score: 0.9357638888888888


## Task 2 - Weighted Ensemble kNN

In [6]:
class WeigtedEnsemble(Ensemble):
    def fit(self, x, y):
        x, x_val, y, y_val = train_test_split(x, y, test_size=0.2)
        self.w = np.empty(len(self.classifiers))
        for i, clf in enumerate(self.classifiers):
            clf.fit(*resample(x, y))
            y_pred = clf.predict(x_val)
            self.w[i] = f1_score(y_val, y_pred, average='micro')
        
    def predict(self, x):
        def weigted_vote(votes):
            return np.bincount(votes.astype(np.int), weights=self.w).argmax()
        
        predictions = np.array([clf.predict(x) for clf in self.classifiers])
        return np.fromiter((weigted_vote(votes) for votes in predictions.T), np.float)

In [7]:
x_train, x_test, y_train, y_test = data3

classifiers = [KNeighborsClassifier(n_neighbors=k) for k in [1,3,5,7,9,11,15]]
w_ensemble = WeigtedEnsemble(classifiers)
w_ensemble.fit(x_train, y_train)

predicted = w_ensemble.predict(x_test)
f1 = f1_score(y_test, predicted, average='micro')
print('Weighted ensemble F1 score:', f1)

Weighted ensemble F1 score: 0.9340277777777778
