# Parte 1: Replicando experimentos de ....

__Proposta:__ For additional improvement of the prediction of a classifier, we suggest combing bagging and boosting methodology with sum rule voting (Vote B&B)

__Metodologia__:  In order to calculate the classifiers’ accuracy, the whole training set was divided into ten mutually exclusive and equal-sized subsets and for each subset the classifier was trained on the union of all of the other subsets. Then, cross validation was run 10 times for each algorithm and the median value of the 10-cross validations was calculated.

__Bases de dados__: we used 36 well-known datasets mainly from many domains from the UCI repository.  we 
have used data sets from the domains of: pattern recognition (anneal, iris, mushroom, zoo), image recognition (ionosphere, sonar), medical diagnosis (breast-cancer, breast-w, colic, diabetes, heart-c, heart-h, heart-statlog, hepatitis, lymphotherapy, primary-tumor) commodity trading (autos, credit-g) music composition (waveform), computer games (kr-vs-kp, monk1, monk2), various control applications (balance), language morphological analysis (dimin) [9] and prediction of student dropout (student) [10].

__sum rule voting:__ When the sum rule is used each sub-ensemble has to give a confidence value for each 
candidate. In our algorithm, voters express the degree of their preference using as confidence score the probabilities of sub-ensemble prediction. Next all confidence values are added for each candidate and the candidate with the highest sum wins the election.

__sub-ensembles:__

__sub-classifiers:__ We used 10 sub-classifiers for each sub-ensemble for the proposed algorithm. 

In [37]:
import pandas as pd
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)

from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

## Dataset: waveform

In [2]:
dataset = pd.read_csv("../data/waveform.data",header=None)
#dataset.columns[-1] = 'TARGET'
dataset = dataset.rename(columns={dataset.columns[-1]: 'TARGET'})

print(dataset.shape)
print(dataset['TARGET'].value_counts())

dataset.head()

(5000, 22)
2    1696
0    1657
1    1647
Name: TARGET, dtype: int64


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,TARGET
0,-1.23,-1.56,-1.75,-0.28,0.6,2.22,0.85,0.21,-0.2,0.89,...,2.89,7.75,4.59,3.15,5.12,3.32,1.2,0.24,-0.56,2
1,-0.69,2.43,0.61,2.08,2.3,3.25,5.52,4.55,2.97,2.22,...,1.24,1.89,1.88,-1.34,0.83,1.41,1.78,0.6,2.42,1
2,-0.12,-0.94,1.29,2.59,2.42,3.55,4.94,3.25,1.9,2.07,...,2.5,0.12,1.41,2.78,0.64,0.62,-0.01,-0.79,-0.12,0
3,0.86,0.29,2.19,-0.02,1.13,2.51,2.37,5.45,5.45,4.84,...,2.58,1.4,1.24,1.41,1.07,-1.43,2.84,-1.18,1.12,1
4,1.16,0.37,0.4,-0.59,2.66,1.0,2.69,4.06,5.34,3.53,...,4.3,1.84,1.73,0.21,-0.18,0.13,-0.21,-0.8,-0.68,1


## Modelos

In [3]:
def evaluate(dataset, model, model_name, params={}):
    results = dict()

    scores = list()
    splits = KFold(n_splits=10).split(dataset)

    for i, (train_idx, test_idx) in enumerate(splits):
        print(f"Fold {i+1}/10")
        X_train = dataset.iloc[train_idx,~dataset.columns.isin(["TARGET"])]
        X_test = dataset.iloc[test_idx,~dataset.columns.isin(["TARGET"])]

        y_train = np.ravel(dataset.iloc[train_idx].loc[:,"TARGET"])
        y_test = np.ravel(dataset.iloc[test_idx].loc[:,"TARGET"])

        clf = model(**params)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        scores.append(accuracy_score(y_test, y_pred))
    results[model_name] = scores
    return results

In [62]:
res_decisiontree = evaluate(dataset, 
                            DecisionTreeClassifier, 
                            'DecisionTree', 
                            {'max_depth' : np.sqrt(X_train.shape[1])})

Fold 1/10
Fold 2/10
Fold 3/10
Fold 4/10
Fold 5/10
Fold 6/10
Fold 7/10
Fold 8/10
Fold 9/10
Fold 10/10


In [53]:
res_randomforest = evaluate(dataset, 
                            RandomForestClassifier, 
                            'RandomForest', 
                            {'n_estimators' : 25, 'max_depth' : np.sqrt(X_train.shape[1])})

Fold 1/10
Fold 2/10
Fold 3/10
Fold 4/10
Fold 5/10
Fold 6/10
Fold 7/10
Fold 8/10
Fold 9/10
Fold 10/10


In [57]:
res_adaboost = evaluate(dataset, 
                        AdaBoostClassifier, 
                        'Adaboost', 
                        {
                            'n_estimators' : 25, 
                            'base_estimator' : DecisionTreeClassifier(max_depth=np.sqrt(X_train.shape[1]))
                        })

Fold 1/10
Fold 2/10
Fold 3/10
Fold 4/10
Fold 5/10
Fold 6/10
Fold 7/10
Fold 8/10
Fold 9/10
Fold 10/10


In [59]:
def evaluate_bb(dataset):
    results = dict()

    scores = list()
    splits = KFold(n_splits=10).split(dataset)

    for i, (train_idx, test_idx) in enumerate(splits):
        print(f"Fold {i+1}/10")
        
        probs_1 = list()
        probs_2 = list()
    
        X_train = dataset.iloc[train_idx,~dataset.columns.isin(["TARGET"])]
        X_test = dataset.iloc[test_idx,~dataset.columns.isin(["TARGET"])]

        y_train = np.ravel(dataset.iloc[train_idx].loc[:,"TARGET"])
        y_test = np.ravel(dataset.iloc[test_idx].loc[:,"TARGET"])

        # Modelo Bagging : Random forest
        modelo_rf = RandomForestClassifier(n_estimators=10, 
                                           max_depth=np.sqrt(X_train.shape[1]))
        modelo_rf.fit(X_train, y_train)

        for i in range(modelo_rf.n_estimators):
            prob = modelo_rf.estimators_[i].predict_proba(X_test)
            probs_1.append(prob)

        # Modelo boosting : Ada boost
        modelo_ada = AdaBoostClassifier(n_estimators=10, 
                                        base_estimator=DecisionTreeClassifier(max_depth=np.sqrt(X_train.shape[1])))
        modelo_ada.fit(X_train, y_train)

        for i in range(modelo_ada.n_estimators):
            prob = modelo_ada.estimators_[i].predict_proba(X_test)
            probs_2.append(prob)

        # Sum rule vote
        total_probs = probs_1 + probs_2
        baselearner_matrix = np.sum(total_probs, axis=0)
        y_pred = np.argmax(baselearner_matrix,axis=1)

        # Metrics
        scores.append(accuracy_score(y_test, y_pred))
    results['vote_B&B'] = scores
    return results

In [60]:
res_voteBB = evaluate_bb(dataset)

Fold 1/10
Fold 2/10
Fold 3/10
Fold 4/10
Fold 5/10
Fold 6/10
Fold 7/10
Fold 8/10
Fold 9/10
Fold 10/10


__Comparando resultados: base de dados Waveform__

In [65]:
print(f"{'DecisionTree'} : {np.mean(*res_decisiontree.values())}")
print(f"{'RandomForest'} : {np.mean(*res_randomforest.values())}")
print(f"{'Adaboot'} : {np.mean(*res_adaboost.values())}")
print(f"{'vote B&B'} : {np.mean(*res_voteBB.values())}")

DecisionTree : 0.7376
RandomForest : 0.8222000000000002
Adaboot : 0.8246
vote B&B : 0.8301999999999999


# Parte 2: Sugerindo alteração

__Opção 1:__ sugerir uma estratégia diferente para a combinação dos ensembles. (Trocar o `Sum rule voting`)

__Opção 2:__ utilizar algum algoritmo de redes neurais com base learner para a combinação dos ensembles.

In [6]:
from sklearn.metrics import accuracy_score

__Ideia__

Bagging -> 10 sub-classifiers  
Boosting -> 10 sub-classifiers

Condição de parada do MLP: 100 iterações

In [50]:
dataset = pd.read_csv("../data/waveform.data",header=None)
#dataset.columns[-1] = 'TARGET'
dataset = dataset.rename(columns={dataset.columns[-1]: 'TARGET'})

print(dataset.shape)
print(dataset['TARGET'].value_counts())

dataset.head()

(5000, 22)
2    1696
0    1657
1    1647
Name: TARGET, dtype: int64


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,TARGET
0,-1.23,-1.56,-1.75,-0.28,0.6,2.22,0.85,0.21,-0.2,0.89,...,2.89,7.75,4.59,3.15,5.12,3.32,1.2,0.24,-0.56,2
1,-0.69,2.43,0.61,2.08,2.3,3.25,5.52,4.55,2.97,2.22,...,1.24,1.89,1.88,-1.34,0.83,1.41,1.78,0.6,2.42,1
2,-0.12,-0.94,1.29,2.59,2.42,3.55,4.94,3.25,1.9,2.07,...,2.5,0.12,1.41,2.78,0.64,0.62,-0.01,-0.79,-0.12,0
3,0.86,0.29,2.19,-0.02,1.13,2.51,2.37,5.45,5.45,4.84,...,2.58,1.4,1.24,1.41,1.07,-1.43,2.84,-1.18,1.12,1
4,1.16,0.37,0.4,-0.59,2.66,1.0,2.69,4.06,5.34,3.53,...,4.3,1.84,1.73,0.21,-0.18,0.13,-0.21,-0.8,-0.68,1


In [51]:
for i_atributo in range(21):
    vmin = min(dataset[i_atributo])
    vmax = max(dataset[i_atributo])
    for i in range(len(dataset)):
        v = (dataset.iloc[i, i_atributo] - vmin) / (vmax - vmin)
        dataset.iloc[i, i_atributo] = v
    #for i in range(len(treino_entradas)):
    #    v = (treino_entradas[i][i_atributo] - vmin) / (vmax - vmin)
    #    treino_entradas[i][i_atributo] = v
    #for i in range(len(teste_entradas)):
    #    v = (teste_entradas[i][i_atributo] - vmin) / (vmax - vmin)
    #    teste_entradas[i][i_atributo] = v

dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,TARGET
0,0.289835,0.237027,0.274664,0.37122,0.408818,0.479769,0.345199,0.328345,0.281915,0.284501,...,0.544016,1.0,0.633865,0.565899,0.842718,0.719844,0.535308,0.463504,0.420786,2
1,0.364011,0.796634,0.539238,0.61731,0.579158,0.578998,0.731788,0.710387,0.562943,0.42569,...,0.380811,0.445601,0.393617,0.152074,0.426214,0.534047,0.601367,0.507299,0.798479,1
2,0.442308,0.323983,0.615471,0.67049,0.591182,0.6079,0.683775,0.595951,0.468085,0.409766,...,0.50544,0.278146,0.35195,0.531797,0.407767,0.457198,0.397494,0.3382,0.476553,0
3,0.576923,0.496494,0.716368,0.398332,0.461924,0.507707,0.471026,0.789613,0.782801,0.703822,...,0.513353,0.399243,0.336879,0.40553,0.449515,0.257782,0.722096,0.290754,0.633714,1
4,0.618132,0.507714,0.515695,0.338895,0.61523,0.362235,0.497517,0.667254,0.77305,0.564756,...,0.683482,0.44087,0.380319,0.294931,0.328155,0.409533,0.374715,0.336983,0.405577,1


In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier

class customMLPClassifer(MLPClassifier):
    def resample_with_replacement(self, X_train, y_train, sample_weight):

        # normalize sample_weights if not already
        sample_weight = sample_weight / sample_weight.sum(dtype=np.float64)

        X_train_resampled = np.zeros((len(X_train), len(X_train[0])), dtype=np.float32)
        y_train_resampled = np.zeros((len(y_train)), dtype=int)
        for i in range(len(X_train)):
            # draw a number from 0 to len(X_train)-1
            draw = np.random.choice(np.arange(len(X_train)), p=sample_weight)

            # place the X and y at the drawn number into the resampled X and y
            X_train_resampled[i] = X_train[draw]
            y_train_resampled[i] = y_train[draw]

        return X_train_resampled, y_train_resampled


    def fit(self, X, y, sample_weight=None):
        if sample_weight is not None:
            X, y = self.resample_with_replacement(X, y, sample_weight)

        return self._fit(X, y, incremental=(self.warm_start and
                                            hasattr(self, "classes_")))


adabooster = AdaBoostClassifier(base_estimator=customMLPClassifer())

adabooster.fit(treino_entradas,y_train)

In [58]:
def evaluate_new_bb(dataset):
    results = dict()

    scores = list()
    splits = KFold(n_splits=10).split(dataset)

    for i, (train_idx, test_idx) in enumerate(splits):
        print(f"Fold {i+1}/10")
        
        probs_1 = list()
        probs_2 = list()
    
        X_train = dataset.iloc[train_idx,~dataset.columns.isin(["TARGET"])]
        X_test = dataset.iloc[test_idx,~dataset.columns.isin(["TARGET"])]

        y_train = np.ravel(dataset.iloc[train_idx].loc[:,"TARGET"])
        y_test = np.ravel(dataset.iloc[test_idx].loc[:,"TARGET"])

        # Modelo Bagging : Random forest
        bag_clf = BaggingClassifier(base_estimator=MLPClassifier(),
                         n_estimators=10, random_state=0, verbose=1).fit(X_train, y_train)

        for i in range(bag_clf.n_estimators):
            prob = bag_clf.estimators_[i].predict_proba(X_test)
            probs_1.append(prob)

        # Modelo boosting : Ada boost
        modelo_ada = AdaBoostClassifier(n_estimators=10, 
                                        base_estimator=customMLPClassifer())
        modelo_ada.fit(X_train, y_train)

        for i in range(modelo_ada.n_estimators):
            prob = modelo_ada.estimators_[i].predict_proba(X_test)
            probs_2.append(prob)

        # Sum rule vote
        total_probs = probs_1 + probs_2
        baselearner_matrix = np.sum(total_probs, axis=0)
        y_pred = np.argmax(baselearner_matrix,axis=1)

        # Metrics
        scores.append(accuracy_score(y_test, y_pred))
    results['vote_new_B&B'] = scores
    return results

In [59]:
res_vote_new_BB = evaluate_new_bb(dataset)

Fold 1/10


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   51.7s finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecat

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fold 2/10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   59.5s finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in N

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fold 3/10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   58.9s finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in N

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fold 4/10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   57.4s finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in N

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fold 5/10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   59.2s finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in N

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fold 6/10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   58.2s finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in N

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fold 7/10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   59.3s finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in N

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fold 8/10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   58.9s finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in N

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fold 9/10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   58.1s finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in N

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fold 10/10


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   57.3s finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in N

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train_resampled = np.zeros((len(y_train)), dtype=np.int)


In [60]:
res_vote_new_BB

{'vote_new_B&B': [0.874,
  0.864,
  0.88,
  0.84,
  0.848,
  0.864,
  0.882,
  0.864,
  0.892,
  0.824]}

In [62]:
print(f"{'new vote B&B'} : {np.mean(*res_vote_new_BB.values())}")

new vote B&B : 0.8632000000000002
