In [31]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import accuracy_score
import math
import numpy as np
import scipy.stats
import copy
import random
import operator

In [17]:
class Chromossome:
    def __init__(self, algorithm, **hyperparameter_range):
        self.hyperparameter_range = hyperparameter_range
        self.classifier = algorithm()
        self.mutate()
        self.fitness = 0
        
    def fit(self, X, y):
        self.classifier.fit(X, y)
        
    def predict(self, X):
        return self.classifier.predict(X)
    
    def mutate(self, n_positions=None):
        param = {}        
        if not n_positions or n_positions>len(self.hyperparameter_range):
            n_positions = len(self.hyperparameter_range)
        mutation_positions = random.sample(range(0, len(self.hyperparameter_range)), n_positions)
        i = 0
        for hyperparameter, h_range in self.hyperparameter_range.items():
            if i in mutation_positions:
                if isinstance(h_range[0], str):
                    param[hyperparameter] = random.choice(h_range)
                elif isinstance(h_range[0], float):
                    param[hyperparameter] = random.uniform(h_range[0], h_range[1])
                else:
                    param[hyperparameter] = random.randint(h_range[0], h_range[1])
            i+= 1
        
        self.classifier.set_params(**param)

In [36]:
class DiversityEnsembleClassifier:
    def __init__(self, algorithms, population_size = 100, max_epochs = 100, random_state=None):
        self.population_size = population_size
        self.max_epochs = max_epochs
        self.population = []
        self.random_state = random_state
        random.seed(random_state)
        for algorithm, hyperparameters in algorithms.items():
            for i in range(0, math.ceil(population_size/len(algorithms.keys()))):
                self.population.append(Chromossome(algorithm, **hyperparameters))
        
    def fit(self, X, y):
        print('Running epoch ', end='')
        
        max_diversity = float('-inf')
        max_diversity_population = []
        
        for epoch in range(self.max_epochs):
            X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=epoch)
            random.seed(self.random_state)
            print(epoch, end='...')
            predictions = np.empty([2*self.population_size, y_val.shape[0]])
            
            for i in range(0, self.population_size):
                new_chromossome = copy.deepcopy(self.population[i])
                new_chromossome.mutate(1)
                self.population.append(new_chromossome)
                
            for i in range(2*self.population_size):
                chromossome = self.population[i]
                chromossome.fit(X_train, y_train)
                predictions[i] = np.logical_and(chromossome.predict(X_val), y_val)
                 
            distances = np.zeros(2*self.population_size)
            pop_fitness = predictions.sum(axis=1)
            target_chromossome = np.argmin(pop_fitness)
            new_population = [self.population[target_chromossome]]            
            
            for i in range(0, self.population_size-1):
                distances[target_chromossome] = float('-inf')
                d_i = np.logical_xor(predictions, predictions[target_chromossome]).sum(axis=1)
                d_i[d_i == 0] = -50
                distances += d_i
                target_chromossome = np.argmax(distances)
                new_population.append(self.population[target_chromossome])
                self.population[target_chromossome].fitness += pop_fitness[target_chromossome]
                
            diversity = distances[distances>float('-inf')].sum()
            print(diversity)
            self.population =new_population
            if diversity > max_diversity:
                max_diversity = diversity   
                max_diversity_population = copy.copy(self.population)
            
        self.population = max_diversity_population
        for chromossome in self.population:
            chromossome.fit(X, y)
            
    def predict(self, X):
        predictions = np.empty((self.population_size, len(X)))
        y = np.empty(len(X))
        for chromossome in range(0, self.population_size):
            predictions[chromossome] = self.population[chromossome].predict(X)
        for i in range(0, len(X)):
            pred = {}
            for j in range(0, self.population_size):
                if predictions[j][i] in pred:
                    pred[predictions[j][i]] += self.population[j].fitness
                else: 
                    pred[predictions[j][i]]  = self.population[j].fitness
            y[i] = max(pred.items(), key=operator.itemgetter(1))[0]
        return y
        """
        predictions = np.empty([self.population_size, X.shape[0]])
        for i in range(0, self.population_size):
            predictions[i] = self.population[i].predict(X)
        return scipy.stats.mode(predictions, axis=0).mode[0]
        """

In [37]:
wine = datasets.load_wine()

In [38]:
X_train, X_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.2, random_state=40)

In [39]:
alg = {
            KNeighborsClassifier: {'n_neighbors':[1, 107]},
            SVC: {'kernel' : ['linear', 'poly', 'rbf', 'sigmoid'], 
                  'degree' : [0, 3]
                  },
            DecisionTreeClassifier: {'min_samples_leaf':[1, 100], 'max_depth':[3, 20]}
      }
dec = DiversityEnsembleClassifier(alg, population_size=50, max_epochs=200, random_state=42)
dec.fit(X_train,  y_train)
print(accuracy_score(y_train, dec.predict(X_train)))

Running epoch 0...-50439.0
1...-71502.0
2...-46661.0
3...-93738.0
4...-39187.0
5...-82875.0
6...-22587.0
7...-20846.0
8...-16321.0
9...-36516.0
10...-14405.0
11...-17235.0
12...-19930.0
13...-18121.0
14...-39718.0
15...-31919.0
16...-13623.0
17...-26532.0
18...-16799.0
19...-25929.0
20...-19921.0
21...-10789.0
22...-34269.0
23...-63240.0
24...-16098.0
25...-16336.0
26...-17698.0
27...-21075.0
28...-85527.0
29...-15300.0
30...-14524.0
31...-11596.0
32...-21451.0
33...-21799.0
34...-13972.0
35...-34311.0
36...-39539.0
37...-17675.0
38...-39514.0
39...-24204.0
40...-13646.0
41...-37372.0
42...-27774.0
43...-15623.0
44...-32556.0
45...-40800.0
46...-46480.0
47...-29823.0
48...-48501.0
49...-23411.0
50...-25095.0
51...-20049.0
52...-24059.0
53...-31438.0
54...-35317.0
55...-9155.0
56...-56061.0
57...-12399.0
58...-36873.0
59...-21922.0
60...-20191.0
61...-20011.0
62...-19717.0
63...-15104.0
64...-13317.0
65...-24274.0
66...-11008.0
67...-26049.0
68...-22455.0
69...-22615.0
70...-24291.0
71.

In [None]:
#Inspect the classifiers
print([x.classifier for x in dec.population])

In [38]:
#Check number of unique classifiers
unique = set([str(x.classifier.get_params()) for x in dec.population])
len(unique)

41

In [40]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=1000)
rf.fit(X_train, y_train)
print(accuracy_score(y_train, rf.predict(X_train)))

1.0


In [None]:
np.empty()