1.

In [2]:
from sklearn.metrics import (accuracy_score, f1_score)
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.utils import resample
import tensorflow as tf
import numpy as np
from random import sample 
from copy import deepcopy
from random import sample, randint, randrange
import imgaug.augmenters as iaa
from contextlib import contextmanager
from timeit import default_timer
from sklearn.model_selection import cross_validate

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
(x_train_f, y_train_f), (x_test_f, y_test_f) = tf.keras.datasets.fashion_mnist.load_data()

In [5]:
x_train = x_train_f.reshape((x_train_f.shape[0],-1))
x_test = x_test_f.reshape((x_test_f.shape[0],-1))
y_train = y_train_f.reshape((y_train_f.shape[0],))
y_test = y_test_f

X_concat = np.concatenate([x_train, x_test])
Y = np.concatenate([y_train, y_test])

scaler = StandardScaler()

scaler.fit(X_concat)

X_transform = scaler.transform(X_concat)

pca = PCA(n_components=50)
pca.fit(X_transform)

X = pca.transform(X_transform)

classes_count = 10

2.

Pełne dane

In [3]:
clf = SVC(probability=True)

In [4]:
scores = cross_validate(clf, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))

In [5]:
scores

{'fit_time': array([569.94333029, 681.70209527, 523.28199863, 675.5077529 ,
        676.29380679]),
 'score_time': array([50.25808024, 89.57924485, 65.31366014, 89.0055511 , 87.02576327]),
 'test_accuracy': array([0.87992857, 0.87328571, 0.87907143, 0.87614286, 0.86821429]),
 'test_neg_log_loss': array([-0.33515649, -0.34396189, -0.33904138, -0.34529357, -0.35832827]),
 'test_neg_mean_squared_error': array([-1.62378571, -1.67857143, -1.603     , -1.64307143, -1.74128571]),
 'test_roc_auc_ovr': array([0.99038776, 0.99014709, 0.99003381, 0.98998907, 0.98917899]),
 'test_f1_weighted': array([0.87905915, 0.87265031, 0.87821835, 0.87516468, 0.86771275]),
 'test_precision_weighted': array([0.87914365, 0.87260454, 0.87835215, 0.87529052, 0.86773478]),
 'test_recall_weighted': array([0.87992857, 0.87328571, 0.87907143, 0.87614286, 0.86821429])}

Niepełne dane i cechy

In [6]:
def average_pred(predictions):
    m = len(predictions[0][0])
    all_results = [[0 for x in range(classes_count)] for y in range(m)] 
    results = [0] * m
    for (_, pred_proba) in predictions:
        for i in range(m):
            for j in range(classes_count):
                all_results[i][j] += pred_proba[i][j]
    for i in range(m):
        results[i] = all_results[i].index(max(all_results[i]))
    return results

def average_pred_proba(predictions, n_classifiers):
    m = len(predictions[0][0])
    results = [[0 for x in range(classes_count)] for y in range(m)]
    for (_, pred_proba) in predictions:
        for i in range(m):
            for j in range(classes_count):
                results[i][j] += pred_proba[i][j]

    for i in range(m):
        for j in range(classes_count):
            results[i][j] /= n_classifiers            
    return results

def majority_pred(predictions):
    m = len(predictions[0][0])
    results = [0] * m
    for i in range(m):
        all_results = [0 for x in range(classes_count)] 
        for (pred, _) in predictions:
            all_results[pred[i]] += 1
        results[i] = all_results.index(max(all_results))
        
    return results

def majority_pred_proba(predictions, n_classifiers):
    m = len(predictions[0][0])
    results = [[0 for x in range(classes_count)] for y in range(m)]
    classifiers_votes_count = [0] * m
    majority_results = majority_pred(predictions)
    for classifier in range(0, n_classifiers):
        for i in range(m):
            voted_class = majority_results[i] 
            (pred, pred_proba) = predictions[classifier]
            if(pred[i] == voted_class):
                classifiers_votes_count[i] += 1
                for j in range(classes_count):
                    results[i][j] += pred_proba[i][j]
    for i in range(m):
        for j in range(classes_count):
            results[i][j] /= classifiers_votes_count[i]
    return results      
    
def borda_pred(predictions):
    m = len(predictions[0][0])
    all_results = [[0 for x in range(classes_count)] for y in range(m)] 
    results = [0] * m
    
    def get_final_borda_points(predictions):
        return np.argsort(np.argsort(predictions)).tolist()

    for (_, pred_proba) in predictions:
        for i in range(m):
            pred_proba[i] = get_final_borda_points(pred_proba[i])
    for (_, pred_proba) in predictions:
        for i in range(m):
            for j in range(classes_count):
                all_results[i][j] += pred_proba[i][j]
    for i in range(m):
        results[i] = all_results[i].index(max(all_results[i]))
    return results

def borda_pred_proba(predictions, n_classifiers):
    m = len(predictions[0][0])
    results = [[0 for x in range(classes_count)] for y in range(m)] 
    
    def get_final_borda_points(predictions):
        return np.argsort(np.argsort(predictions)).tolist()

    def get_points():
        sum = 0
        for i in range(classes_count):
            sum += i
        return sum * n_classifiers
    
    for (_, pred_proba) in predictions:
        for i in range(m):
            pred_proba[i] = get_final_borda_points(pred_proba[i])
    for (_, pred_proba) in predictions:
        for i in range(m):
            for j in range(classes_count):
                results[i][j] += pred_proba[i][j]
                
    for i in range(m):
        for j in range(classes_count):
            results[i][j] /= get_points()
        
    return results

In [7]:
class MinorClassifiers:
    def __init__(self, samp, feat, voting, max_iter=-1):
        self.samp = samp
        self.feat = feat
        self.classifiers = []
        self.predictions = []
        self.cut_features = []
        self.voting = voting
        self.max_iter = max_iter
    
    def get_params(self, deep = False):
        return {
            'samp': self.samp,
            'feat': self.feat,
            'voting': self.voting,
            'max_iter': self.max_iter
        }
    
    def predict(self, X):
        for i in range(len(self.classifiers)):
            classifier = self.classifiers[i]
            f = self.cut_features[i]
            x_test = X[:,f]
            y_pred = classifier.predict(x_test)
            pred = classifier.predict_proba(x_test)
            self.predictions.append((y_pred, pred))
            
        if self.voting == 'average':
            return average_pred(self.predictions)
        
        if self.voting == 'majority':
            return majority_pred(self.predictions)
        
        if self.voting == 'borda':
            return borda_pred(self.predictions)
    
    def predict_proba(self, X):
        if self.voting == 'average':
            return average_pred_proba(self.predictions, len(self.classifiers))
        
        if self.voting == 'majority':
            return majority_pred_proba(self.predictions, len(self.classifiers))
        
        if self.voting == 'borda':
            return borda_pred_proba(self.predictions, len(self.classifiers))
        
    def fit(self, X, Y):
        feature_list = [n for n in range(50)]
        samples_all = X.shape[0]
        features_all = X.shape[1]
        
        for i in range(10):
            f = sample(feature_list, int(features_all * self.feat))
            self.cut_features.append(f)
            x_train_f = X[:,f]
                
            x_train_s, y_train_s = resample(x_train_f, Y, n_samples=int(self.samp * samples_all), replace=False, random_state=0)

            svm_clf = SVC(probability=True, max_iter=self.max_iter)
            svm_clf.fit(x_train_s, y_train_s)
                
            self.classifiers.append(svm_clf)

In [7]:
n_samples = [0.1, 0.35, 0.7]
n_features = [0.25, 0.5, 0.75]
minors = []

In [89]:
for n in n_samples:
    minors.append(MinorClassifiers(n, 1, 'average'))
    
for n in n_features:
    minors.append(MinorClassifiers(1, n, 'average'))

In [90]:
for minor in minors:
    scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
    print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
    print(scores)

Samples: 0.1, features: 1
{'fit_time': array([84.31182337, 41.57707858, 39.99521232, 40.09037495, 40.52187133]), 'score_time': array([96.89617443, 56.92793465, 57.37920856, 56.68667912, 57.90234208]), 'test_accuracy': array([0.84071429, 0.83478571, 0.84542857, 0.84107143, 0.83164286]), 'test_neg_log_loss': array([-0.43518617, -0.44896716, -0.43659481, -0.44667309, -0.45587556]), 'test_neg_mean_squared_error': array([-2.15207143, -2.07292857, -1.97335714, -2.02228571, -2.14457143]), 'test_roc_auc_ovr': array([0.98494211, 0.98453555, 0.98471949, 0.98439707, 0.98370706]), 'test_f1_weighted': array([0.8391561 , 0.83337703, 0.84363625, 0.83927396, 0.83025872]), 'test_precision_weighted': array([0.83913872, 0.83298658, 0.84356639, 0.83908444, 0.8299967 ]), 'test_recall_weighted': array([0.84071429, 0.83478571, 0.84542857, 0.84107143, 0.83164286])}
Samples: 0.35, features: 1
{'fit_time': array([360.26168919, 359.53345442, 370.93991184, 350.37274957,
       406.04506087]), 'score_time': array(

In [91]:
minors = []
for n in n_samples:
    minors.append(MinorClassifiers(n, 1, 'majority'))
    
for n in n_features:
    minors.append(MinorClassifiers(1, n, 'majority'))

In [92]:
for minor in minors:
    scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
    print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
    print(scores)

Samples: 0.1, features: 1
{'fit_time': array([56.82572389, 57.20369482, 56.38576436, 55.56382656, 56.58639836]), 'score_time': array([84.32776642, 84.74555922, 84.77660441, 83.66742492, 84.6532557 ]), 'test_accuracy': array([0.83978571, 0.83428571, 0.84357143, 0.83792857, 0.83142857]), 'test_neg_log_loss': array([-0.43522308, -0.44909969, -0.43698738, -0.44666182, -0.45577667]), 'test_neg_mean_squared_error': array([-2.12764286, -2.05285714, -1.9915   , -2.04007143, -2.14814286]), 'test_roc_auc_ovr': array([0.98495772, 0.98454974, 0.98470733, 0.9844151, 0.98372447]), 'test_f1_weighted': array([0.83755568, 0.83249143, 0.84130593, 0.83561584, 0.82991786]), 'test_precision_weighted': array([0.83841189, 0.83256783, 0.84176772, 0.83601731, 0.82983928]), 'test_recall_weighted': array([0.83978571, 0.83428571, 0.84357143, 0.83792857, 0.83142857])}
Samples: 0.35, features: 1
{'fit_time': array([513.24694347, 517.16748118, 521.75415373, 520.95773458,
       512.52031517]), 'score_time': array([2

In [93]:
minors = []
for n in n_samples:
    minors.append(MinorClassifiers(n, 1, 'borda'))
    
for n in n_features:
    minors.append(MinorClassifiers(1, n, 'borda'))

In [94]:
for minor in minors:
    scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
    print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
    print(scores)

Samples: 0.1, features: 1
{'fit_time': array([87.44013858, 88.77895427, 86.76357579, 86.24450612, 87.22120094]), 'score_time': array([126.74483824, 126.52158928, 128.14419055, 124.89729929,
       127.32271028]), 'test_accuracy': array([0.84042857, 0.8345    , 0.84542857, 0.84078571, 0.83185714]), 'test_neg_log_loss': array([-1.64024205, -1.64302593, -1.63951448, -1.63986571, -1.64830953]), 'test_neg_mean_squared_error': array([-2.1585    , -2.076     , -1.97057143, -2.02171429, -2.13864286]), 'test_roc_auc_ovr': array([0.96908014, 0.96860216, 0.96924101, 0.96847257, 0.96730666]), 'test_f1_weighted': array([0.83886394, 0.8331168 , 0.84362075, 0.83900183, 0.83048114]), 'test_precision_weighted': array([0.83885455, 0.83268507, 0.8435643 , 0.83884806, 0.83024321]), 'test_recall_weighted': array([0.84042857, 0.8345    , 0.84542857, 0.84078571, 0.83185714])}
Samples: 0.35, features: 1
{'fit_time': array([787.80688047, 785.43082952, 779.7077179 , 475.55766678,
       373.69895101]), 'score_t

3a.

In [8]:
n_samples = [0.1, 0.35, 0.7]
n_features = [0.25, 0.5, 0.75]
minors = []
for n in n_samples:
    minors.append(MinorClassifiers(n, 1, 'average'))
    
for n in n_features:
    minors.append(MinorClassifiers(1, n, 'average'))

In [9]:
parts = [0.1, 0.25, 0.5, 0.75] ## for 1.0 already calculated

In [10]:
for p in parts:
    new_X = X[:int(p*X.shape[0])]
    new_Y = Y[:int(p*Y.shape[0])]
    clf = SVC(probability=True)
    scores = cross_validate(clf, new_X, new_Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
    print("Full " + str(p) + "%")
    print(scores)

Full 0.1%
{'fit_time': array([5.46239209, 4.65058708, 4.63876724, 4.71758962, 4.71252751]), 'score_time': array([0.65225339, 0.65923309, 0.67614841, 0.67272663, 0.66023946]), 'test_accuracy': array([0.84      , 0.84857143, 0.84928571, 0.85071429, 0.84285714]), 'test_neg_log_loss': array([-0.42436276, -0.44769205, -0.42998709, -0.41914087, -0.44072538]), 'test_neg_mean_squared_error': array([-2.125     , -1.98285714, -1.91357143, -1.73071429, -1.88857143]), 'test_roc_auc_ovr': array([0.98558101, 0.98416104, 0.98490932, 0.98641686, 0.98560762]), 'test_f1_weighted': array([0.83827333, 0.84654821, 0.84728468, 0.84912954, 0.84234899]), 'test_precision_weighted': array([0.83952203, 0.84668943, 0.84702711, 0.85015398, 0.84335987]), 'test_recall_weighted': array([0.84      , 0.84857143, 0.84928571, 0.85071429, 0.84285714])}
Full 0.25%
{'fit_time': array([25.11102748, 24.30357695, 23.88813043, 23.96315742, 24.14174485]), 'score_time': array([3.64084435, 3.69768977, 3.97439456, 3.64971972, 3.680

In [10]:
for p in parts:
    for minor in minors:
        new_X = X[:int(p*X.shape[0])]
        new_Y = Y[:int(p*Y.shape[0])]
        scores = cross_validate(minor, new_X, new_Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
        print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
        print("Part: " + str(p))
        print(scores)

Samples: 0.1, features: 1
Part: 0.1
{'fit_time': array([2.29719567, 1.70368028, 2.15973496, 2.0188539 , 2.19210458]), 'score_time': array([2.05231571, 1.92805362, 1.89582109, 1.60407615, 2.13352513]), 'test_accuracy': array([0.75142857, 0.78571429, 0.77785714, 0.77      , 0.76142857]), 'test_neg_log_loss': array([-0.67599502, -0.63176933, -0.63846399, -0.65180112, -0.64980752]), 'test_neg_mean_squared_error': array([-2.92714286, -2.61214286, -2.92285714, -2.73285714, -3.07785714]), 'test_roc_auc_ovr': array([0.97005722, 0.973803  , 0.97332964, 0.97245143, 0.97235581]), 'test_f1_weighted': array([0.75158389, 0.78841656, 0.77606978, 0.7684865 , 0.76034427]), 'test_precision_weighted': array([0.75540541, 0.79454754, 0.77798412, 0.7690227 , 0.76119575]), 'test_recall_weighted': array([0.75142857, 0.78571429, 0.77785714, 0.77      , 0.76142857])}
Samples: 0.35, features: 1
Part: 0.1
{'fit_time': array([14.8534503 , 14.45188546, 13.14409876, 15.0696094 , 14.83215594]), 'score_time': array([4

Samples: 1, features: 0.5
Part: 0.25
{'fit_time': array([192.97073984, 205.45588923, 173.9368968 , 175.31263471,
       184.64827466]), 'score_time': array([28.03897595, 27.23800111, 25.3146553 , 25.29203749, 26.43889213]), 'test_accuracy': array([0.84942857, 0.85628571, 0.852     , 0.84914286, 0.84114286]), 'test_neg_log_loss': array([-0.42618665, -0.41256305, -0.42656799, -0.43070748, -0.43520494]), 'test_neg_mean_squared_error': array([-1.97885714, -1.79485714, -1.90428571, -1.92428571, -1.99314286]), 'test_roc_auc_ovr': array([0.98629641, 0.98770577, 0.98678364, 0.98585703, 0.98579907]), 'test_f1_weighted': array([0.84772714, 0.85543236, 0.85114852, 0.84761935, 0.84086479]), 'test_precision_weighted': array([0.84718769, 0.85526102, 0.85118413, 0.8477256 , 0.84095856]), 'test_recall_weighted': array([0.84942857, 0.85628571, 0.852     , 0.84914286, 0.84114286])}
Samples: 1, features: 0.75
Part: 0.25
{'fit_time': array([186.57287383, 198.81872892, 231.52305603, 205.95697236,
       20

Samples: 0.7, features: 1
Part: 0.75
{'fit_time': array([992.65783238, 972.19321179, 969.12032366, 992.69585729,
       982.43080115]), 'score_time': array([184.77053499, 185.0336144 , 184.29363942, 187.15090632,
       186.47876716]), 'test_accuracy': array([0.87361905, 0.8647619 , 0.87133333, 0.87133333, 0.87104762]), 'test_neg_log_loss': array([-0.35374136, -0.37110207, -0.36068511, -0.36008983, -0.36138627]), 'test_neg_mean_squared_error': array([-1.67028571, -1.766     , -1.70485714, -1.75219048, -1.71619048]), 'test_roc_auc_ovr': array([0.98954335, 0.9884767 , 0.98932537, 0.98889936, 0.98919016]), 'test_f1_weighted': array([0.87266227, 0.8633564 , 0.87105127, 0.87034925, 0.87038592]), 'test_precision_weighted': array([0.87253932, 0.86324064, 0.87101501, 0.86987684, 0.87032348]), 'test_recall_weighted': array([0.87361905, 0.8647619 , 0.87133333, 0.87133333, 0.87104762])}
Samples: 1, features: 0.25
Part: 0.75
{'fit_time': array([1525.12347174, 1419.57922053, 1441.5380671 , 1403.442

3b.

In [15]:
max_iter = 800
clf = SVC(probability=True, max_iter=max_iter)
scores = cross_validate(clf, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([268.49231243, 270.47176385, 274.89406872, 282.98455596,
       271.60984707]), 'score_time': array([32.86756682, 34.2289772 , 33.26482916, 36.63378453, 31.50107074]), 'test_accuracy': array([0.76364286, 0.76157143, 0.78378571, 0.73871429, 0.76592857]), 'test_neg_log_loss': array([-0.79471302, -0.836688  , -0.82049978, -0.89372988, -0.83243672]), 'test_neg_mean_squared_error': array([-3.38771429, -3.27492857, -3.165     , -3.61707143, -3.31528571]), 'test_roc_auc_ovr': array([0.96845503, 0.96680759, 0.96788232, 0.9684521 , 0.9693496 ]), 'test_f1_weighted': array([0.75952819, 0.76216814, 0.78354667, 0.72532567, 0.76561288]), 'test_precision_weighted': array([0.80279123, 0.79242146, 0.81372721, 0.79975356, 0.80461454]), 'test_recall_weighted': array([0.76364286, 0.76157143, 0.78378571, 0.73871429, 0.76592857])}

In [16]:
max_iter = 400
clf = SVC(probability=True, max_iter=max_iter)
scores = cross_validate(clf, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([169.78876853, 170.64779186, 170.36103034, 170.39319086,
       169.5521059 ]), 'score_time': array([22.95931482, 22.83487535, 22.81313467, 22.78349543, 22.98829341]), 'test_accuracy': array([0.677     , 0.68321429, 0.63707143, 0.63164286, 0.70964286]), 'test_neg_log_loss': array([-1.08670584, -1.02812242, -1.13246654, -1.16859031, -1.0744041 ]), 'test_neg_mean_squared_error': array([-3.90014286, -3.71557143, -4.53885714, -4.02442857, -3.10528571]), 'test_roc_auc_ovr': array([0.9394987 , 0.94849325, 0.93504347, 0.93088563, 0.93814107]), 'test_f1_weighted': array([0.67837373, 0.68697439, 0.64258247, 0.62774827, 0.70924032]), 'test_precision_weighted': array([0.69171156, 0.70522007, 0.67686811, 0.65040435, 0.71485179]), 'test_recall_weighted': array([0.677     , 0.68321429, 0.63707143, 0.63164286, 0.70964286])}

In [17]:
max_iter = 150
clf = SVC(probability=True, max_iter=max_iter)
scores = cross_validate(clf, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([87.91286635, 88.58666968, 88.71988821, 88.33225298,
       88.12497497]), 'score_time': array([12.3018477 , 12.46776652, 12.30523252, 12.27081656, 12.26265287]), 'test_accuracy': array([0.61464286, 0.59164286, 0.54007143, 0.57585714, 0.56771429]), 'test_neg_log_loss': array([-1.10343767, -1.27323033, -1.28397075, -1.1111919 , -1.13579455]), 'test_neg_mean_squared_error': array([-4.49442857, -4.36507143, -5.49335714, -4.46364286, -4.81364286]), 'test_roc_auc_ovr': array([0.93929826, 0.92563028, 0.92711858, 0.93997434, 0.9348088 ]), 'test_f1_weighted': array([0.60758766, 0.56649061, 0.53495911, 0.56497117, 0.54736532]), 'test_precision_weighted': array([0.61144946, 0.56724206, 0.54080434, 0.5945054 , 0.57911227]), 'test_recall_weighted': array([0.61464286, 0.59164286, 0.54007143, 0.57585714, 0.56771429])}

In [22]:
n_samples = [0.1, 0.35, 0.7]
n_features = [0.25, 0.5, 0.75]

In [30]:
minor = MinorClassifiers(0.35, 1, 'average', 400)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([296.42285895, 314.18454361, 292.06716371, 293.75242424,
        289.61418462]),
 'score_time': array([141.14618993, 137.48688149, 137.79954123, 136.86935616,
        136.98183751]),
 'test_accuracy': array([0.70564286, 0.70921429, 0.70228571, 0.70378571, 0.70064286]),
 'test_neg_log_loss': array([-0.65224121, -0.6607985 , -0.6896096 , -0.67687165, -0.68862129]),
 'test_neg_mean_squared_error': array([-4.68835714, -5.07421429, -4.93992857, -5.02314286, -4.90035714]),
 'test_roc_auc_ovr': array([0.97834201, 0.97702676, 0.97704669, 0.97672786, 0.97688595]),
 'test_f1_weighted': array([0.67871371, 0.69188892, 0.67905816, 0.68498253, 0.67920567]),
 'test_precision_weighted': array([0.83238957, 0.81504231, 0.82473694, 0.82529234, 0.82584915]),
 'test_recall_weighted': array([0.70564286, 0.70921429, 0.70228571, 0.70378571, 0.70064286])}

In [31]:
minor = MinorClassifiers(0.35, 1, 'average', 350)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([279.49761558, 280.31250954, 280.48036766, 281.46957493,
        279.62691021]),
 'score_time': array([132.83870506, 132.28659678, 131.50510097, 131.00428843,
        130.32982969]),
 'test_accuracy': array([0.70192857, 0.69421429, 0.70178571, 0.6895    , 0.6845    ]),
 'test_neg_log_loss': array([-0.68592075, -0.70732949, -0.70387432, -0.72512659, -0.75756024]),
 'test_neg_mean_squared_error': array([-4.57571429, -4.71678571, -4.69321429, -5.09792857, -4.83107143]),
 'test_roc_auc_ovr': array([0.97468954, 0.97576062, 0.97483064, 0.97357005, 0.9729724 ]),
 'test_f1_weighted': array([0.67487227, 0.67015095, 0.67754063, 0.66651744, 0.65850158]),
 'test_precision_weighted': array([0.82101709, 0.82014848, 0.81387572, 0.81427703, 0.81348623]),
 'test_recall_weighted': array([0.70192857, 0.69421429, 0.70178571, 0.6895    , 0.6845    ])}

In [38]:
minor = MinorClassifiers(0.35, 1, 'average', 325)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([272.20656681, 272.8075633 , 301.00371122, 273.41464877,
        270.1839304 ]),
 'score_time': array([130.04804945, 136.5688436 , 129.10406375, 130.21395874,
        126.81948853]),
 'test_accuracy': array([0.69492857, 0.71714286, 0.72057143, 0.70221429, 0.69764286]),
 'test_neg_log_loss': array([-0.73153558, -0.72855036, -0.69474643, -0.72889211, -0.69621021]),
 'test_neg_mean_squared_error': array([-4.66171429, -3.73721429, -3.87871429, -4.4805    , -4.69692857]),
 'test_roc_auc_ovr': array([0.97085696, 0.96877766, 0.97141086, 0.97214607, 0.97363734]),
 'test_f1_weighted': array([0.66684237, 0.69666593, 0.70379803, 0.68220469, 0.68098542]),
 'test_precision_weighted': array([0.80080046, 0.81082563, 0.80123557, 0.81184496, 0.80110919]),
 'test_recall_weighted': array([0.69492857, 0.71714286, 0.72057143, 0.70221429, 0.69764286])}

In [32]:
minor = MinorClassifiers(0.35, 1, 'average', 200)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([218.38733387, 218.69842768, 219.05990887, 217.95086741,
        217.42693925]),
 'score_time': array([104.77483416, 107.56812811, 104.91655803, 102.73763847,
        104.16188884]),
 'test_accuracy': array([0.66042857, 0.66192857, 0.6625    , 0.641     , 0.7145    ]),
 'test_neg_log_loss': array([-0.88054859, -0.87242599, -0.8779921 , -0.89199159, -0.82719791]),
 'test_neg_mean_squared_error': array([-4.39142857, -4.52264286, -4.144     , -4.44057143, -3.86471429]),
 'test_roc_auc_ovr': array([0.96081893, 0.96158988, 0.96194729, 0.9615229 , 0.964138  ]),
 'test_f1_weighted': array([0.65934874, 0.65605568, 0.66088375, 0.63967771, 0.71981044]),
 'test_precision_weighted': array([0.75427786, 0.76055296, 0.76237524, 0.73385164, 0.77709154]),
 'test_recall_weighted': array([0.66042857, 0.66192857, 0.6625    , 0.641     , 0.7145    ])}

In [33]:
minor = MinorClassifiers(0.35, 1, 'average', 150)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([185.09186244, 185.3493154 , 185.91444302, 184.81984925,
        184.33586097]),
 'score_time': array([90.63734865, 92.96006083, 91.17755127, 88.53236294, 90.94236302]),
 'test_accuracy': array([0.6345    , 0.63521429, 0.606     , 0.59842857, 0.61642857]),
 'test_neg_log_loss': array([-0.91330331, -0.95978   , -0.98914976, -1.04721473, -1.03362223]),
 'test_neg_mean_squared_error': array([-4.55307143, -4.14028571, -4.71614286, -5.03157143, -4.98171429]),
 'test_roc_auc_ovr': array([0.95974012, 0.95836996, 0.95459847, 0.94863881, 0.94953623]),
 'test_f1_weighted': array([0.63316902, 0.63351194, 0.60652329, 0.59973702, 0.61603834]),
 'test_precision_weighted': array([0.72450816, 0.71632629, 0.70322777, 0.73177081, 0.73710985]),
 'test_recall_weighted': array([0.6345    , 0.63521429, 0.606     , 0.59842857, 0.61642857])}

In [37]:
minor = MinorClassifiers(0.35, 1, 'average', 125)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([171.25957131, 167.23069859, 165.14174056, 163.58293629,
        163.4891398 ]),
 'score_time': array([82.69252539, 85.02434945, 81.92288828, 80.1411829 , 83.26068544]),
 'test_accuracy': array([0.63907143, 0.61507143, 0.61442857, 0.61364286, 0.61492857]),
 'test_neg_log_loss': array([-0.93658792, -1.0396887 , -0.98166522, -1.06591713, -1.05640836]),
 'test_neg_mean_squared_error': array([-4.01264286, -3.8065    , -4.59385714, -4.62592857, -4.68785714]),
 'test_roc_auc_ovr': array([0.95764981, 0.94423691, 0.95411729, 0.94365022, 0.94747456]),
 'test_f1_weighted': array([0.62501887, 0.61381167, 0.61504007, 0.60866136, 0.61454495]),
 'test_precision_weighted': array([0.70689379, 0.67438205, 0.69712075, 0.68619174, 0.71361828]),
 'test_recall_weighted': array([0.63907143, 0.61507143, 0.61442857, 0.61364286, 0.61492857])}

In [34]:
minor = MinorClassifiers(0.35, 1, 'average', 100)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([142.281394  , 142.45767403, 142.72049356, 142.17265463,
        141.7577405 ]),
 'score_time': array([72.24895477, 73.91322851, 71.99611974, 71.33585787, 73.52608013]),
 'test_accuracy': array([0.65585714, 0.60621429, 0.652     , 0.64578571, 0.62778571]),
 'test_neg_log_loss': array([-0.96976767, -1.0451028 , -0.99191688, -1.0305175 , -1.01883576]),
 'test_neg_mean_squared_error': array([-3.64735714, -3.98307143, -3.98542857, -4.14192857, -4.87278571]),
 'test_roc_auc_ovr': array([0.95230012, 0.9486736 , 0.9544467 , 0.94446413, 0.95226727]),
 'test_f1_weighted': array([0.65034748, 0.59878786, 0.64806086, 0.64563062, 0.62409013]),
 'test_precision_weighted': array([0.69082564, 0.66886807, 0.70894459, 0.68725273, 0.69423514]),
 'test_recall_weighted': array([0.65585714, 0.60621429, 0.652     , 0.64578571, 0.62778571])}

In [35]:
minor = MinorClassifiers(0.35, 1, 'average', 75)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([114.78482962, 114.96514177, 115.35730958, 115.20922613,
        115.17951584]),
 'score_time': array([60.35450459, 62.32924438, 59.55043364, 60.04241347, 61.50837064]),
 'test_accuracy': array([0.61714286, 0.6285    , 0.61621429, 0.6655    , 0.62764286]),
 'test_neg_log_loss': array([-1.00778558, -1.06327628, -1.08779895, -0.93664721, -1.03722654]),
 'test_neg_mean_squared_error': array([-4.622     , -4.44307143, -4.59857143, -3.73007143, -3.92771429]),
 'test_roc_auc_ovr': array([0.94494785, 0.94658784, 0.94405069, 0.95323101, 0.94647372]),
 'test_f1_weighted': array([0.61135051, 0.62326141, 0.60415674, 0.6641064 , 0.61210561]),
 'test_precision_weighted': array([0.64582973, 0.66041884, 0.65600692, 0.6944696 , 0.62501711]),
 'test_recall_weighted': array([0.61714286, 0.6285    , 0.61621429, 0.6655    , 0.62764286])}

In [36]:
minor = MinorClassifiers(0.35, 1, 'average', 50)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([83.15531611, 83.90358305, 84.11916018, 84.99377131, 83.63079143]),
 'score_time': array([46.45082307, 46.89669585, 45.71877241, 46.01510477, 46.73188281]),
 'test_accuracy': array([0.5745    , 0.57264286, 0.61435714, 0.65035714, 0.54092857]),
 'test_neg_log_loss': array([-1.12836647, -1.17872121, -1.09114444, -1.02428432, -1.17571353]),
 'test_neg_mean_squared_error': array([-5.804     , -5.24514286, -4.34107143, -4.34192857, -5.13764286]),
 'test_roc_auc_ovr': array([0.93659325, 0.93538302, 0.94217975, 0.95056589, 0.93448474]),
 'test_f1_weighted': array([0.553685  , 0.56224468, 0.59892286, 0.64254125, 0.51605662]),
 'test_precision_weighted': array([0.59309169, 0.5911449 , 0.61528258, 0.66604915, 0.56102059]),
 'test_recall_weighted': array([0.5745    , 0.57264286, 0.61435714, 0.65035714, 0.54092857])}

In [39]:
minor = MinorClassifiers(0.7, 1, 'average', 400)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([986.641325  , 991.45128489, 985.79248118, 985.58623242,
        986.20748425]),
 'score_time': array([194.97903705, 201.66469526, 197.82561541, 196.4134748 ,
        198.61544871]),
 'test_accuracy': array([0.6355    , 0.5995    , 0.62971429, 0.59885714, 0.68807143]),
 'test_neg_log_loss': array([-0.86531025, -1.02396551, -0.90704313, -1.00910107, -0.86015374]),
 'test_neg_mean_squared_error': array([-4.92171429, -5.82628571, -5.04435714, -5.83714286, -4.51135714]),
 'test_roc_auc_ovr': array([0.9628437 , 0.95237955, 0.96018144, 0.95429294, 0.95695315]),
 'test_f1_weighted': array([0.61873455, 0.57144158, 0.61114802, 0.58103993, 0.69238337]),
 'test_precision_weighted': array([0.74091902, 0.75153889, 0.74356474, 0.75336962, 0.7865239 ]),
 'test_recall_weighted': array([0.6355    , 0.5995    , 0.62971429, 0.59885714, 0.68807143])}

In [40]:
minor = MinorClassifiers(0.7, 1, 'average', 350)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([920.20909882, 948.12369418, 924.54665852, 920.57072186,
        922.41450477]),
 'score_time': array([183.11328387, 187.75370359, 185.01358628, 182.29643297,
        187.39569259]),
 'test_accuracy': array([0.675     , 0.5975    , 0.59857143, 0.64635714, 0.63842857]),
 'test_neg_log_loss': array([-0.85318975, -1.02876073, -1.00327972, -0.92549352, -0.98442991]),
 'test_neg_mean_squared_error': array([-4.226     , -5.56314286, -5.34778571, -4.59035714, -5.27607143]),
 'test_roc_auc_ovr': array([0.95828353, 0.94982933, 0.95197585, 0.95641992, 0.95343668]),
 'test_f1_weighted': array([0.67809087, 0.58263064, 0.59125805, 0.64164446, 0.63667307]),
 'test_precision_weighted': array([0.76657291, 0.73098318, 0.73628971, 0.75445496, 0.77088624]),
 'test_recall_weighted': array([0.675     , 0.5975    , 0.59857143, 0.64635714, 0.63842857])}

In [43]:
minor = MinorClassifiers(0.7, 1, 'average', 150)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([539.26484847, 555.10156274, 546.74318695, 542.00018144,
        542.47315764]),
 'score_time': array([116.23794198, 115.1909759 , 114.04470491, 114.34042358,
        114.78279185]),
 'test_accuracy': array([0.60121429, 0.62935714, 0.61628571, 0.56014286, 0.59014286]),
 'test_neg_log_loss': array([-1.01876255, -1.00185183, -1.03331889, -1.2219924 , -1.10831312]),
 'test_neg_mean_squared_error': array([-4.28935714, -4.79628571, -4.78921429, -5.15085714, -5.16157143]),
 'test_roc_auc_ovr': array([0.94677043, 0.95122565, 0.94927902, 0.92949911, 0.94404551]),
 'test_f1_weighted': array([0.59584986, 0.62223107, 0.60980938, 0.55539665, 0.57997708]),
 'test_precision_weighted': array([0.62594603, 0.68747142, 0.68537658, 0.64269245, 0.64864441]),
 'test_recall_weighted': array([0.60121429, 0.62935714, 0.61628571, 0.56014286, 0.59014286])}

In [44]:
minor = MinorClassifiers(0.7, 1, 'average', 100)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([399.08553672, 402.3629117 , 400.57509232, 407.8192277 ,
        412.66565299]),
 'score_time': array([87.45647311, 87.58016014, 86.6333468 , 87.13858438, 86.44907975]),
 'test_accuracy': array([0.52714286, 0.61628571, 0.55542857, 0.61778571, 0.55264286]),
 'test_neg_log_loss': array([-1.26660848, -1.11162926, -1.10135726, -1.07869131, -1.23508521]),
 'test_neg_mean_squared_error': array([-5.11592857, -4.89371429, -4.97114286, -4.20364286, -5.60457143]),
 'test_roc_auc_ovr': array([0.92376393, 0.93903109, 0.93591284, 0.93982449, 0.92910115]),
 'test_f1_weighted': array([0.52817091, 0.60873015, 0.54260554, 0.60433089, 0.5457999 ]),
 'test_precision_weighted': array([0.55096905, 0.63188001, 0.56448916, 0.61687993, 0.57711326]),
 'test_recall_weighted': array([0.52714286, 0.61628571, 0.55542857, 0.61778571, 0.55264286])}

In [46]:
minor = MinorClassifiers(0.7, 1, 'average', 75)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([319.28643894, 321.50856805, 320.2868917 , 322.14102435,
        321.1473887 ]),
 'score_time': array([71.10918617, 71.32742023, 71.13590097, 71.90008473, 71.54242134]),
 'test_accuracy': array([0.5795    , 0.59557143, 0.55892857, 0.59921429, 0.59071429]),
 'test_neg_log_loss': array([-1.1546669 , -1.14175081, -1.177203  , -1.11262423, -1.13883695]),
 'test_neg_mean_squared_error': array([-5.38507143, -5.25321429, -4.75278571, -4.81614286, -5.2885    ]),
 'test_roc_auc_ovr': array([0.93613401, 0.93689145, 0.93083939, 0.94045861, 0.93372145]),
 'test_f1_weighted': array([0.57249565, 0.5831948 , 0.55530921, 0.57084809, 0.56695303]),
 'test_precision_weighted': array([0.58527911, 0.60500898, 0.58946407, 0.59167332, 0.58411026]),
 'test_recall_weighted': array([0.5795    , 0.59557143, 0.55892857, 0.59921429, 0.59071429])}

In [50]:
minor = MinorClassifiers(0.7, 1, 'average', 65)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([282.52659225, 286.09214711, 284.34584928, 285.34522057,
        287.08991742]),
 'score_time': array([64.09338307, 64.16477585, 64.02074313, 64.72416663, 64.39476657]),
 'test_accuracy': array([0.57685714, 0.58085714, 0.55842857, 0.58164286, 0.62057143]),
 'test_neg_log_loss': array([-1.17383863, -1.21183463, -1.22119085, -1.16396554, -1.1187706 ]),
 'test_neg_mean_squared_error': array([-5.51021429, -5.78957143, -5.15614286, -5.76557143, -5.38792857]),
 'test_roc_auc_ovr': array([0.9313747 , 0.92761656, 0.92631667, 0.93259505, 0.93817542]),
 'test_f1_weighted': array([0.57044623, 0.57202595, 0.54815742, 0.55842003, 0.60693943]),
 'test_precision_weighted': array([0.58776344, 0.58864016, 0.58931006, 0.57901571, 0.6186146 ]),
 'test_recall_weighted': array([0.57685714, 0.58085714, 0.55842857, 0.58164286, 0.62057143])}

In [51]:
minor = MinorClassifiers(0.7, 1, 'average', 60)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([279.00218487, 274.27297997, 272.11184263, 269.39266396,
        268.51408267]),
 'score_time': array([60.85277581, 60.66021705, 59.99221563, 61.5861783 , 61.05079293]),
 'test_accuracy': array([0.55271429, 0.57828571, 0.53921429, 0.57228571, 0.5485    ]),
 'test_neg_log_loss': array([-1.24739813, -1.21778978, -1.27137555, -1.15264473, -1.23066381]),
 'test_neg_mean_squared_error': array([-5.75578571, -5.8255    , -5.83335714, -5.83028571, -6.7485    ]),
 'test_roc_auc_ovr': array([0.92090773, 0.92841288, 0.9198478 , 0.93292489, 0.92382675]),
 'test_f1_weighted': array([0.5501913 , 0.5687654 , 0.52360922, 0.5503146 , 0.52183911]),
 'test_precision_weighted': array([0.57353519, 0.58204052, 0.54820956, 0.57320188, 0.54034695]),
 'test_recall_weighted': array([0.55271429, 0.57828571, 0.53921429, 0.57228571, 0.5485    ])}

In [47]:
minor = MinorClassifiers(0.7, 1, 'average', 50)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([228.39742541, 229.75483036, 242.25878525, 241.69006896,
        245.85980535]),
 'score_time': array([52.81404734, 52.95384741, 52.91180277, 53.12450433, 53.40163898]),
 'test_accuracy': array([0.52278571, 0.58442857, 0.58771429, 0.5635    , 0.57035714]),
 'test_neg_log_loss': array([-1.2990195 , -1.22766467, -1.20601421, -1.221807  , -1.21056531]),
 'test_neg_mean_squared_error': array([-6.86064286, -5.78128571, -5.71057143, -6.49678571, -6.06192857]),
 'test_roc_auc_ovr': array([0.91603058, 0.92808567, 0.92875082, 0.92572411, 0.9278385 ]),
 'test_f1_weighted': array([0.50740534, 0.57909704, 0.56364542, 0.54343816, 0.54520297]),
 'test_precision_weighted': array([0.58042513, 0.58872959, 0.5894495 , 0.58532003, 0.55835676]),
 'test_recall_weighted': array([0.52278571, 0.58442857, 0.58771429, 0.5635    , 0.57035714])}

In [53]:
minor = MinorClassifiers(0.7, 1, 'average', 35)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([167.97897172, 167.95495629, 168.66336131, 168.4537673 ,
        168.52511621]),
 'score_time': array([40.83498216, 40.67140341, 41.19097114, 40.7477138 , 41.09799814]),
 'test_accuracy': array([0.525     , 0.47642857, 0.49128571, 0.52278571, 0.51385714]),
 'test_neg_log_loss': array([-1.33984928, -1.46743225, -1.37512639, -1.33583124, -1.35874479]),
 'test_neg_mean_squared_error': array([-6.37135714, -8.80242857, -7.79328571, -7.09857143, -7.123     ]),
 'test_roc_auc_ovr': array([0.92039049, 0.88552589, 0.9064088 , 0.91192555, 0.90119968]),
 'test_f1_weighted': array([0.50031763, 0.47059719, 0.48385736, 0.49695447, 0.4981503 ]),
 'test_precision_weighted': array([0.58274083, 0.53870349, 0.53314898, 0.52247604, 0.5365456 ]),
 'test_recall_weighted': array([0.525     , 0.47642857, 0.49128571, 0.52278571, 0.51385714])}

In [48]:
minor = MinorClassifiers(0.7, 1, 'average', 25)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([126.27484155, 123.82787967, 125.49757195, 124.18005061,
        124.83183122]),
 'score_time': array([31.92747474, 31.26302195, 32.69752789, 31.86574483, 32.6289022 ]),
 'test_accuracy': array([0.47557143, 0.40135714, 0.46928571, 0.49764286, 0.47214286]),
 'test_neg_log_loss': array([-1.39282119, -1.59682887, -1.40947916, -1.36147062, -1.42801129]),
 'test_neg_mean_squared_error': array([-9.06478571, -9.63942857, -8.18864286, -8.51792857, -8.64735714]),
 'test_roc_auc_ovr': array([0.91394932, 0.87057964, 0.9026254 , 0.90738758, 0.8914552 ]),
 'test_f1_weighted': array([0.45825646, 0.39471684, 0.45150222, 0.48241735, 0.45950732]),
 'test_precision_weighted': array([0.63869722, 0.52325566, 0.5077085 , 0.51758134, 0.51737294]),
 'test_recall_weighted': array([0.47557143, 0.40135714, 0.46928571, 0.49764286, 0.47214286])}

In [54]:
minor = MinorClassifiers(0.7, 1, 'average', 17)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([106.31739163,  91.54695654,  90.85926032,  90.57167554,
         90.56801534]),
 'score_time': array([24.02064371, 22.83422279, 23.80629206, 23.48782492, 23.90958548]),
 'test_accuracy': array([0.39078571, 0.32857143, 0.4065    , 0.42992857, 0.48721429]),
 'test_neg_log_loss': array([-1.57028592, -1.67840578, -1.54537708, -1.52856506, -1.41873629]),
 'test_neg_mean_squared_error': array([ -9.95814286, -10.76878571, -10.01185714,  -9.81835714,
         -8.24521429]),
 'test_roc_auc_ovr': array([0.88994333, 0.85954538, 0.88479089, 0.87395492, 0.91607156]),
 'test_f1_weighted': array([0.34291846, 0.30042982, 0.38472652, 0.41152021, 0.49463488]),
 'test_precision_weighted': array([0.52240807, 0.4538486 , 0.47327846, 0.46788032, 0.56659668]),
 'test_recall_weighted': array([0.39078571, 0.32857143, 0.4065    , 0.42992857, 0.48721429])}

In [52]:
minor = MinorClassifiers(0.7, 1, 'average', 15)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([80.41246128, 79.24889326, 79.92349625, 79.97187304, 79.83688998]),
 'score_time': array([21.56283808, 20.69777656, 21.44565177, 21.18762422, 21.67076564]),
 'test_accuracy': array([0.37642857, 0.34371429, 0.40857143, 0.42364286, 0.44592857]),
 'test_neg_log_loss': array([-1.60958337, -1.69969677, -1.59288056, -1.55483473, -1.47080487]),
 'test_neg_mean_squared_error': array([-10.82721429, -11.08342857,  -9.40728571, -11.01492857,
         -8.81935714]),
 'test_roc_auc_ovr': array([0.88238917, 0.84631275, 0.87818853, 0.87154743, 0.91117233]),
 'test_f1_weighted': array([0.33454128, 0.31082798, 0.38547232, 0.40443118, 0.4458218 ]),
 'test_precision_weighted': array([0.46939045, 0.40462287, 0.5041977 , 0.44760012, 0.54795977]),
 'test_recall_weighted': array([0.37642857, 0.34371429, 0.40857143, 0.42364286, 0.44592857])}

In [49]:
minor = MinorClassifiers(0.7, 1, 'average', 10)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([56.3200326 , 55.75302386, 56.43872952, 56.21218753, 56.26777244]),
 'score_time': array([15.97350717, 15.11374903, 15.7972703 , 15.33176255, 15.74773359]),
 'test_accuracy': array([0.39028571, 0.37992857, 0.40621429, 0.3925    , 0.44414286]),
 'test_neg_log_loss': array([-1.66596632, -1.74725657, -1.63922296, -1.66297541, -1.57671205]),
 'test_neg_mean_squared_error': array([-10.93307143, -13.21614286, -11.80507143, -12.642     ,
        -12.02757143]),
 'test_roc_auc_ovr': array([0.88841149, 0.83603315, 0.87340285, 0.85249926, 0.89777504]),
 'test_f1_weighted': array([0.33333076, 0.35510236, 0.35868573, 0.36680717, 0.42512291]),
 'test_precision_weighted': array([0.47783895, 0.45469359, 0.51067583, 0.53222786, 0.57916256]),
 'test_recall_weighted': array([0.39028571, 0.37992857, 0.40621429, 0.3925    , 0.44414286])}

In [55]:
minor = MinorClassifiers(1, 0.25, 'average', 200)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([406.58318591, 407.81274128, 418.6465621 , 419.49554396,
        414.05800676]),
 'score_time': array([91.42204952, 92.39797235, 93.13732553, 96.36500454, 94.19888878]),
 'test_accuracy': array([0.28835714, 0.28214286, 0.293     , 0.27407143, 0.259     ]),
 'test_neg_log_loss': array([-1.84253628, -1.92583275, -1.81558522, -1.93051637, -1.93760063]),
 'test_neg_mean_squared_error': array([-18.465     , -17.40792857, -18.76214286, -20.59328571,
        -21.04685714]),
 'test_roc_auc_ovr': array([0.8687839 , 0.85780973, 0.89145804, 0.8338061 , 0.84896467]),
 'test_f1_weighted': array([0.22283508, 0.22330065, 0.23118764, 0.20790178, 0.18970064]),
 'test_precision_weighted': array([0.53298158, 0.51983743, 0.58080211, 0.53731076, 0.44624252]),
 'test_recall_weighted': array([0.28835714, 0.28214286, 0.293     , 0.27407143, 0.259     ])}

In [56]:
minor = MinorClassifiers(1, 0.25, 'average', 150)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([313.89002514, 314.23753071, 320.79831624, 307.32875514,
        321.73126006]),
 'score_time': array([76.31072545, 75.43037033, 75.58613157, 75.61714244, 77.14964318]),
 'test_accuracy': array([0.2525    , 0.25628571, 0.28835714, 0.26464286, 0.20507143]),
 'test_neg_log_loss': array([-1.94288152, -1.95327251, -1.9103359 , -1.90185319, -2.10620843]),
 'test_neg_mean_squared_error': array([-20.97328571, -22.11421429, -19.22785714, -20.02442857,
        -26.04757143]),
 'test_roc_auc_ovr': array([0.85181339, 0.85707765, 0.84113318, 0.83612134, 0.78961074]),
 'test_f1_weighted': array([0.18007703, 0.18877256, 0.23583106, 0.20330655, 0.13746501]),
 'test_precision_weighted': array([0.46046439, 0.56670018, 0.46478286, 0.44818184, 0.56609089]),
 'test_recall_weighted': array([0.2525    , 0.25628571, 0.28835714, 0.26464286, 0.20507143])}

In [60]:
minor = MinorClassifiers(1, 0.25, 'average', 125)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([265.96350861, 266.65783262, 264.39328361, 261.26265049,
        255.28132033]),
 'score_time': array([64.82917142, 68.22424054, 65.33852911, 65.99755692, 63.65140676]),
 'test_accuracy': array([0.31207143, 0.21092857, 0.26292857, 0.35364286, 0.29957143]),
 'test_neg_log_loss': array([-1.94160416, -2.06878118, -2.00303763, -1.89074224, -1.93251037]),
 'test_neg_mean_squared_error': array([-20.60542857, -22.31664286, -20.72635714, -16.45971429,
        -16.2455    ]),
 'test_roc_auc_ovr': array([0.84506382, 0.76765372, 0.83343897, 0.84421467, 0.83795812]),
 'test_f1_weighted': array([0.25817451, 0.15522835, 0.21411381, 0.3203743 , 0.25664232]),
 'test_precision_weighted': array([0.57190506, 0.49181366, 0.44279824, 0.47493635, 0.42628197]),
 'test_recall_weighted': array([0.31207143, 0.21092857, 0.26292857, 0.35364286, 0.29957143])}

In [57]:
minor = MinorClassifiers(1, 0.25, 'average', 100)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([216.14537501, 217.23633051, 215.22439551, 216.34083056,
        213.13596272]),
 'score_time': array([56.41407466, 57.01150227, 57.00154614, 56.29222035, 55.27818394]),
 'test_accuracy': array([0.2705    , 0.23385714, 0.25471429, 0.28742857, 0.28      ]),
 'test_neg_log_loss': array([-2.05486073, -2.07735042, -2.00941624, -2.07912148, -1.95808968]),
 'test_neg_mean_squared_error': array([-20.49214286, -22.28235714, -20.89614286, -18.27235714,
        -18.66185714]),
 'test_roc_auc_ovr': array([0.778278  , 0.76308872, 0.82231271, 0.77847462, 0.8188336 ]),
 'test_f1_weighted': array([0.22925418, 0.19803983, 0.21479341, 0.24634565, 0.24522165]),
 'test_precision_weighted': array([0.43890188, 0.41873981, 0.48615203, 0.39721805, 0.44961912]),
 'test_recall_weighted': array([0.2705    , 0.23385714, 0.25471429, 0.28742857, 0.28      ])}

In [58]:
minor = MinorClassifiers(1, 0.25, 'average', 75)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([165.6325357 , 164.51508284, 162.56578517, 162.96758914,
        161.52698874]),
 'score_time': array([46.35500693, 45.67637897, 45.67880726, 47.13040233, 44.58344316]),
 'test_accuracy': array([0.2275    , 0.28564286, 0.30971429, 0.2315    , 0.3035    ]),
 'test_neg_log_loss': array([-2.14310534, -2.06058168, -1.96417382, -2.15067018, -2.04924856]),
 'test_neg_mean_squared_error': array([-23.62321429, -19.69271429, -18.2245    , -24.20471429,
        -17.22      ]),
 'test_roc_auc_ovr': array([0.71841049, 0.8001375 , 0.81916642, 0.71612803, 0.80549721]),
 'test_f1_weighted': array([0.18803853, 0.2568765 , 0.29148854, 0.20523895, 0.27213389]),
 'test_precision_weighted': array([0.42837694, 0.44018942, 0.46379113, 0.44356723, 0.43278922]),
 'test_recall_weighted': array([0.2275    , 0.28564286, 0.30971429, 0.2315    , 0.3035    ])}

In [59]:
minor = MinorClassifiers(1, 0.25, 'average', 50)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([110.88628078, 110.74154639, 110.56458545, 110.77741647,
        110.29150581]),
 'score_time': array([32.72580218, 33.66157341, 34.51647329, 32.83388805, 33.52570605]),
 'test_accuracy': array([0.31428571, 0.29514286, 0.199     , 0.3595    , 0.24935714]),
 'test_neg_log_loss': array([-2.03130074, -2.10940904, -2.16050899, -2.00981217, -2.12895473]),
 'test_neg_mean_squared_error': array([-15.36657143, -18.05792857, -19.71042857, -17.43514286,
        -17.87185714]),
 'test_roc_auc_ovr': array([0.76961168, 0.76847942, 0.70991837, 0.81835635, 0.70340269]),
 'test_f1_weighted': array([0.30165984, 0.2996204 , 0.18353594, 0.35472614, 0.25360801]),
 'test_precision_weighted': array([0.41895125, 0.44414874, 0.32818147, 0.50918037, 0.35223711]),
 'test_recall_weighted': array([0.31428571, 0.29514286, 0.199     , 0.3595    , 0.24935714])}

In [62]:
minor = MinorClassifiers(1, 0.25, 'average', 35)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([88.28466296, 81.65510488, 80.80373263, 80.65896988, 80.52924061]),
 'score_time': array([25.35922766, 25.53961873, 25.12945032, 25.54995346, 25.32177567]),
 'test_accuracy': array([0.18771429, 0.26342857, 0.24678571, 0.24757143, 0.21042857]),
 'test_neg_log_loss': array([-2.19027528, -2.14533155, -2.13428563, -2.19354463, -2.16679224]),
 'test_neg_mean_squared_error': array([-16.72914286, -22.79985714, -17.81671429, -22.34085714,
        -18.22685714]),
 'test_roc_auc_ovr': array([0.6388361 , 0.72616958, 0.69476046, 0.7155396 , 0.71689516]),
 'test_f1_weighted': array([0.18197434, 0.24892668, 0.24679065, 0.24254143, 0.20638936]),
 'test_precision_weighted': array([0.31022523, 0.37675377, 0.33870295, 0.43676287, 0.33988329]),
 'test_recall_weighted': array([0.18771429, 0.26342857, 0.24678571, 0.24757143, 0.21042857])}

In [61]:
minor = MinorClassifiers(1, 0.25, 'average', 20)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([48.22742319, 48.14788699, 48.33357239, 48.04281139, 50.91272211]),
 'score_time': array([16.16131687, 16.02982521, 15.76152611, 15.90311503, 16.26365662]),
 'test_accuracy': array([0.33742857, 0.32857143, 0.35442857, 0.38314286, 0.21357143]),
 'test_neg_log_loss': array([-2.13912946, -2.14420601, -2.11507086, -2.07620991, -2.26429729]),
 'test_neg_mean_squared_error': array([-16.17757143, -15.91421429, -16.0145    , -16.14592857,
        -19.76271429]),
 'test_roc_auc_ovr': array([0.76623991, 0.71862607, 0.76944871, 0.80314656, 0.60274795]),
 'test_f1_weighted': array([0.33256767, 0.32109021, 0.34678852, 0.3851458 , 0.21754201]),
 'test_precision_weighted': array([0.4535035 , 0.37849033, 0.38645567, 0.45500229, 0.3297921 ]),
 'test_recall_weighted': array([0.33742857, 0.32857143, 0.35442857, 0.38314286, 0.21357143])}

In [63]:
minor = MinorClassifiers(1, 0.5, 'average', 150)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([ 956.98348141, 1190.58367205,  763.96592999,  603.93747234,
         528.9153955 ]),
 'score_time': array([196.78776312, 193.63710928, 109.94623065, 102.85365725,
         92.14072633]),
 'test_accuracy': array([0.55764286, 0.53642857, 0.59764286, 0.52      , 0.40985714]),
 'test_neg_log_loss': array([-1.28851054, -1.34845585, -1.21221832, -1.3583285 , -1.48131617]),
 'test_neg_mean_squared_error': array([ -7.03264286,  -8.53414286,  -6.36321429,  -8.10207143,
        -11.94485714]),
 'test_roc_auc_ovr': array([0.9425194 , 0.93778287, 0.94679934, 0.93656423, 0.92210219]),
 'test_f1_weighted': array([0.54391289, 0.5231569 , 0.58746871, 0.50872923, 0.38234899]),
 'test_precision_weighted': array([0.61128599, 0.62227528, 0.67502655, 0.6035732 , 0.51082416]),
 'test_recall_weighted': array([0.55764286, 0.53642857, 0.59764286, 0.52      , 0.40985714])}

In [64]:
minor = MinorClassifiers(1, 0.5, 'average', 100)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([403.39759231, 395.57485795, 394.52115512, 555.34990501,
        437.63361073]),
 'score_time': array([71.91559267, 70.27218342, 74.70916152, 84.64124703, 67.6369369 ]),
 'test_accuracy': array([0.51907143, 0.51642857, 0.43007143, 0.5245    , 0.50557143]),
 'test_neg_log_loss': array([-1.44829103, -1.41424552, -1.52002325, -1.40176898, -1.42139254]),
 'test_neg_mean_squared_error': array([ -9.025     ,  -8.94992857, -11.04692857,  -9.12614286,
         -9.4335    ]),
 'test_roc_auc_ovr': array([0.91720806, 0.92475079, 0.90521959, 0.92681743, 0.92635863]),
 'test_f1_weighted': array([0.50718234, 0.51160013, 0.41678298, 0.52104975, 0.49187699]),
 'test_precision_weighted': array([0.57071834, 0.59142985, 0.51374254, 0.59875997, 0.59378284]),
 'test_recall_weighted': array([0.51907143, 0.51642857, 0.43007143, 0.5245    , 0.50557143])}

In [65]:
minor = MinorClassifiers(1, 0.5, 'average', 75)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([277.88545871, 272.85462189, 271.8572371 , 269.76975107,
        265.66976762]),
 'score_time': array([54.46540213, 55.07202053, 80.74313474, 59.2318666 , 55.4976337 ]),
 'test_accuracy': array([0.49492857, 0.50385714, 0.52314286, 0.51192857, 0.46842857]),
 'test_neg_log_loss': array([-1.53348639, -1.53948784, -1.47043889, -1.50447709, -1.52873079]),
 'test_neg_mean_squared_error': array([-10.33471429, -11.40121429,  -8.95792857, -10.02414286,
        -10.54478571]),
 'test_roc_auc_ovr': array([0.91828632, 0.9090065 , 0.91733758, 0.91413551, 0.9077846 ]),
 'test_f1_weighted': array([0.48075827, 0.49734934, 0.51062444, 0.50629545, 0.4508824 ]),
 'test_precision_weighted': array([0.55795908, 0.60774645, 0.55956679, 0.57680614, 0.52480659]),
 'test_recall_weighted': array([0.49492857, 0.50385714, 0.52314286, 0.51192857, 0.46842857])}

In [70]:
minor = MinorClassifiers(1, 0.5, 'average', 65)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([335.22147393, 314.85721636, 257.92502165, 259.9761982 ,
        255.01151466]),
 'score_time': array([58.9679215 , 50.57985663, 48.55298281, 49.24956203, 49.76311827]),
 'test_accuracy': array([0.48692857, 0.43935714, 0.49385714, 0.49021429, 0.47607143]),
 'test_neg_log_loss': array([-1.56654299, -1.6655439 , -1.55248562, -1.57097901, -1.62433908]),
 'test_neg_mean_squared_error': array([-12.03707143, -12.65042857, -10.18742857, -11.00528571,
        -11.09685714]),
 'test_roc_auc_ovr': array([0.89620201, 0.88133541, 0.90865062, 0.90612745, 0.89067766]),
 'test_f1_weighted': array([0.4814952 , 0.44282382, 0.4885999 , 0.48636423, 0.47474305]),
 'test_precision_weighted': array([0.58223476, 0.55076319, 0.54867503, 0.55831745, 0.52700219]),
 'test_recall_weighted': array([0.48692857, 0.43935714, 0.49385714, 0.49021429, 0.47607143])}

In [86]:
minor = MinorClassifiers(1, 0.5, 'average', 55)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([462.09767962, 466.92024803, 251.44462633, 279.54543447,
        237.82723069]),
 'score_time': array([93.11277318, 64.20347404, 44.37069702, 43.83991694, 43.34510636]),
 'test_accuracy': array([0.4975    , 0.45421429, 0.46178571, 0.53507143, 0.49557143]),
 'test_neg_log_loss': array([-1.62552348, -1.64786075, -1.67013372, -1.59763612, -1.54045651]),
 'test_neg_mean_squared_error': array([ -9.22214286, -12.97878571, -10.51842857,  -9.3205    ,
        -10.04342857]),
 'test_roc_auc_ovr': array([0.90022805, 0.89168003, 0.89223255, 0.90752453, 0.89951583]),
 'test_f1_weighted': array([0.48099955, 0.44690962, 0.46113709, 0.5193939 , 0.47912726]),
 'test_precision_weighted': array([0.53791722, 0.56877366, 0.5247416 , 0.5912206 , 0.53667805]),
 'test_recall_weighted': array([0.4975    , 0.45421429, 0.46178571, 0.53507143, 0.49557143])}

In [66]:
minor = MinorClassifiers(1, 0.5, 'average', 50)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([208.16430426, 205.71958971, 216.15533972, 202.08698773,
        201.38510776]),
 'score_time': array([40.31113195, 40.72430444, 41.01101065, 39.50999928, 40.55594659]),
 'test_accuracy': array([0.48335714, 0.46557143, 0.47907143, 0.5115    , 0.4645    ]),
 'test_neg_log_loss': array([-1.66965847, -1.7063596 , -1.69806032, -1.61771908, -1.67204362]),
 'test_neg_mean_squared_error': array([-11.78707143, -12.25492857, -12.14028571,  -9.78964286,
        -12.02307143]),
 'test_roc_auc_ovr': array([0.88519778, 0.87157388, 0.89058486, 0.90817913, 0.88623533]),
 'test_f1_weighted': array([0.47488518, 0.45602154, 0.47079243, 0.50479185, 0.44707346]),
 'test_precision_weighted': array([0.55734835, 0.54443268, 0.56543286, 0.56932264, 0.55150044]),
 'test_recall_weighted': array([0.48335714, 0.46557143, 0.47907143, 0.5115    , 0.4645    ])}

In [87]:
minor = MinorClassifiers(1, 0.5, 'average', 45)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([197.82009172, 201.2009933 , 198.40350318, 200.3753171 ,
        197.73498225]),
 'score_time': array([36.71669769, 37.63194108, 37.27467513, 37.90374517, 36.99289012]),
 'test_accuracy': array([0.489     , 0.4315    , 0.45371429, 0.42628571, 0.43628571]),
 'test_neg_log_loss': array([-1.59537795, -1.71141775, -1.75327174, -1.73882495, -1.65923031]),
 'test_neg_mean_squared_error': array([-10.13485714, -13.52521429, -12.08957143, -13.97864286,
        -10.52992857]),
 'test_roc_auc_ovr': array([0.90452755, 0.87859356, 0.8676295 , 0.85810739, 0.88242406]),
 'test_f1_weighted': array([0.48163384, 0.42549441, 0.43951185, 0.41647823, 0.41677854]),
 'test_precision_weighted': array([0.53554265, 0.57163768, 0.52103478, 0.53733372, 0.49748016]),
 'test_recall_weighted': array([0.489     , 0.4315    , 0.45371429, 0.42628571, 0.43628571])}

In [71]:
minor = MinorClassifiers(1, 0.5, 'average', 40)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([161.27654696, 160.75344968, 161.34567857, 160.96548104,
        161.19671416]),
 'score_time': array([34.27233601, 32.95263433, 34.39874053, 33.71570063, 34.43368149]),
 'test_accuracy': array([0.42264286, 0.49378571, 0.45978571, 0.52464286, 0.41421429]),
 'test_neg_log_loss': array([-1.73169733, -1.64848449, -1.72606124, -1.66644639, -1.76431247]),
 'test_neg_mean_squared_error': array([-14.83328571,  -9.48028571, -12.67907143,  -9.31078571,
        -14.41407143]),
 'test_roc_auc_ovr': array([0.8680238 , 0.89165678, 0.87168552, 0.90624531, 0.85536688]),
 'test_f1_weighted': array([0.4197386 , 0.47609794, 0.45735867, 0.50415674, 0.40078745]),
 'test_precision_weighted': array([0.51460272, 0.57019784, 0.55577576, 0.58295985, 0.51722292]),
 'test_recall_weighted': array([0.42264286, 0.49378571, 0.45978571, 0.52464286, 0.41421429])}

In [67]:
minor = MinorClassifiers(1, 0.5, 'average', 35)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([144.05908561, 143.2655704 , 142.39475799, 143.68716979,
        143.19930387]),
 'score_time': array([30.35370255, 30.65963697, 30.80450654, 30.71820235, 30.73316312]),
 'test_accuracy': array([0.44985714, 0.42857143, 0.46107143, 0.45892857, 0.46035714]),
 'test_neg_log_loss': array([-1.75017351, -1.79793346, -1.83483606, -1.71117839, -1.79460806]),
 'test_neg_mean_squared_error': array([-12.66335714, -14.38435714, -12.14235714, -11.81128571,
        -11.52485714]),
 'test_roc_auc_ovr': array([0.86220961, 0.85151529, 0.86073412, 0.8739936 , 0.85434766]),
 'test_f1_weighted': array([0.44274589, 0.42066477, 0.46341548, 0.44087865, 0.45242449]),
 'test_precision_weighted': array([0.50377001, 0.52638676, 0.55231644, 0.50301792, 0.51443573]),
 'test_recall_weighted': array([0.44985714, 0.42857143, 0.46107143, 0.45892857, 0.46035714])}

In [68]:
minor = MinorClassifiers(1, 0.5, 'average', 25)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([104.38752222, 104.41383743, 104.77667832, 104.10911679,
        104.69900012]),
 'score_time': array([23.39634705, 23.19027209, 23.65782118, 23.4804852 , 23.80704474]),
 'test_accuracy': array([0.44421429, 0.41742857, 0.45021429, 0.42521429, 0.41485714]),
 'test_neg_log_loss': array([-1.85370232, -1.7862257 , -1.86164504, -1.8696393 , -1.93487088]),
 'test_neg_mean_squared_error': array([-13.63107143, -12.90035714, -12.00371429, -13.63492857,
        -14.39421429]),
 'test_roc_auc_ovr': array([0.84658139, 0.85338694, 0.84164394, 0.84541982, 0.82440534]),
 'test_f1_weighted': array([0.42301945, 0.39828718, 0.43789774, 0.40536443, 0.39371166]),
 'test_precision_weighted': array([0.50388771, 0.54696182, 0.52736243, 0.52252472, 0.447323  ]),
 'test_recall_weighted': array([0.44421429, 0.41742857, 0.45021429, 0.42521429, 0.41485714])}

In [73]:
minor = MinorClassifiers(1, 0.5, 'average', 20)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([84.78406477, 85.2114253 , 84.52296591, 85.14145398, 85.18974996]),
 'score_time': array([20.00648618, 19.81563163, 19.56334424, 19.91039801, 19.85698318]),
 'test_accuracy': array([0.41007143, 0.36985714, 0.46978571, 0.43271429, 0.40785714]),
 'test_neg_log_loss': array([-1.8360594 , -1.91415748, -1.72030452, -1.83050794, -1.85369325]),
 'test_neg_mean_squared_error': array([-14.05721429, -16.21614286, -12.49542857, -13.70535714,
        -13.9145    ]),
 'test_roc_auc_ovr': array([0.84619769, 0.81956502, 0.88025529, 0.85719179, 0.82722386]),
 'test_f1_weighted': array([0.40696028, 0.35560659, 0.45343374, 0.41044796, 0.38920592]),
 'test_precision_weighted': array([0.4743215 , 0.508099  , 0.56248155, 0.50343365, 0.45044322]),
 'test_recall_weighted': array([0.41007143, 0.36985714, 0.46978571, 0.43271429, 0.40785714])}

In [72]:
minor = MinorClassifiers(1, 0.5, 'average', 17)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([73.6317153 , 73.54530215, 75.66650176, 73.65973186, 74.56347346]),
 'score_time': array([17.57123446, 17.81765246, 17.50822401, 17.60376048, 17.43839502]),
 'test_accuracy': array([0.38285714, 0.40864286, 0.36057143, 0.43814286, 0.44285714]),
 'test_neg_log_loss': array([-1.92851899, -1.97153973, -1.93714085, -1.84028564, -1.84092561]),
 'test_neg_mean_squared_error': array([-14.78271429, -13.85828571, -14.97185714, -14.5565    ,
        -13.69271429]),
 'test_roc_auc_ovr': array([0.83024028, 0.80317113, 0.82346129, 0.85225571, 0.84654593]),
 'test_f1_weighted': array([0.37325563, 0.39339374, 0.33789081, 0.42952594, 0.43013741]),
 'test_precision_weighted': array([0.50338208, 0.51430441, 0.46689105, 0.49964996, 0.53503024]),
 'test_recall_weighted': array([0.38285714, 0.40864286, 0.36057143, 0.43814286, 0.44285714])}

In [69]:
minor = MinorClassifiers(1, 0.5, 'average', 10)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([46.8632257 , 46.82730269, 46.5325346 , 46.38738751, 47.57580209]),
 'score_time': array([11.85789418, 11.79204321, 11.76211739, 11.7751019 , 11.78913617]),
 'test_accuracy': array([0.38714286, 0.34985714, 0.3515    , 0.37885714, 0.36928571]),
 'test_neg_log_loss': array([-1.9869174 , -2.0670739 , -2.00191973, -1.99406435, -2.00891447]),
 'test_neg_mean_squared_error': array([-16.4625    , -14.63464286, -14.83257143, -17.64485714,
        -17.39364286]),
 'test_roc_auc_ovr': array([0.79185326, 0.74647559, 0.77878526, 0.79226176, 0.78617892]),
 'test_f1_weighted': array([0.37784755, 0.33681632, 0.32188222, 0.37166826, 0.3514858 ]),
 'test_precision_weighted': array([0.48671874, 0.43918286, 0.37750038, 0.5529473 , 0.49202013]),
 'test_recall_weighted': array([0.38714286, 0.34985714, 0.3515    , 0.37885714, 0.36928571])}

In [74]:
minor = MinorClassifiers(1, 0.75, 'average', 100)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([521.75239086, 539.56865168, 535.4280529 , 539.82829881,
        563.05885363]),
 'score_time': array([76.65035415, 79.74872398, 79.30715132, 79.55968881, 76.31492257]),
 'test_accuracy': array([0.58892857, 0.60778571, 0.61071429, 0.59314286, 0.55307143]),
 'test_neg_log_loss': array([-1.19471572, -1.20803477, -1.1677398 , -1.21199298, -1.24724678]),
 'test_neg_mean_squared_error': array([-6.21128571, -6.176     , -5.804     , -6.04507143, -6.50185714]),
 'test_roc_auc_ovr': array([0.94069406, 0.94424976, 0.94673237, 0.94051134, 0.93384379]),
 'test_f1_weighted': array([0.57856441, 0.5964571 , 0.60294627, 0.58178735, 0.53077565]),
 'test_precision_weighted': array([0.59778382, 0.63465305, 0.62820486, 0.60315246, 0.56391115]),
 'test_recall_weighted': array([0.58892857, 0.60778571, 0.61071429, 0.59314286, 0.55307143])}

In [75]:
minor = MinorClassifiers(1, 0.75, 'average', 75)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([411.33266187, 410.08182764, 414.80697131, 410.58777499,
        409.20496583]),
 'score_time': array([62.98871613, 63.82553554, 65.27762961, 65.00812817, 63.0729506 ]),
 'test_accuracy': array([0.55321429, 0.55942857, 0.544     , 0.54007143, 0.54164286]),
 'test_neg_log_loss': array([-1.31326995, -1.30792991, -1.31093335, -1.33941048, -1.32752467]),
 'test_neg_mean_squared_error': array([-7.845     , -7.56871429, -8.22885714, -8.26642857, -7.53928571]),
 'test_roc_auc_ovr': array([0.92690457, 0.93144437, 0.92377033, 0.92396627, 0.92679624]),
 'test_f1_weighted': array([0.54415156, 0.55348429, 0.53960492, 0.53610701, 0.52641107]),
 'test_precision_weighted': array([0.57064981, 0.58996099, 0.57710061, 0.57578191, 0.55728175]),
 'test_recall_weighted': array([0.55321429, 0.55942857, 0.544     , 0.54007143, 0.54164286])}

In [76]:
minor = MinorClassifiers(1, 0.75, 'average', 60)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([331.55507684, 334.44962168, 334.6291678 , 333.14210129,
        338.86247444]),
 'score_time': array([53.01084447, 53.71146154, 53.46694613, 53.35215688, 54.22893333]),
 'test_accuracy': array([0.5435    , 0.539     , 0.54292857, 0.57364286, 0.54985714]),
 'test_neg_log_loss': array([-1.33697498, -1.38037885, -1.35205054, -1.30760264, -1.36684406]),
 'test_neg_mean_squared_error': array([-7.86621429, -8.37364286, -7.72442857, -7.08592857, -8.12428571]),
 'test_roc_auc_ovr': array([0.92228369, 0.92044719, 0.92264284, 0.93180551, 0.92024976]),
 'test_f1_weighted': array([0.53416411, 0.53321212, 0.53757903, 0.56498253, 0.53921442]),
 'test_precision_weighted': array([0.5652627 , 0.56795373, 0.57134423, 0.58991723, 0.5773893 ]),
 'test_recall_weighted': array([0.5435    , 0.539     , 0.54292857, 0.57364286, 0.54985714])}

In [77]:
minor = MinorClassifiers(1, 0.75, 'average', 50)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([273.32174778, 271.16651583, 273.85580778, 273.92159462,
        270.96831465]),
 'score_time': array([45.28265023, 45.72998571, 46.74565887, 46.54114032, 46.86298275]),
 'test_accuracy': array([0.52378571, 0.49642857, 0.574     , 0.54971429, 0.55085714]),
 'test_neg_log_loss': array([-1.44575697, -1.4449028 , -1.33478778, -1.38130774, -1.41827536]),
 'test_neg_mean_squared_error': array([-8.4225    , -7.76142857, -7.38257143, -7.55757143, -8.09921429]),
 'test_roc_auc_ovr': array([0.91710446, 0.90759177, 0.93052242, 0.92412435, 0.91418887]),
 'test_f1_weighted': array([0.51690324, 0.48606984, 0.5660404 , 0.54168244, 0.54089977]),
 'test_precision_weighted': array([0.55839568, 0.55729187, 0.59377446, 0.57639462, 0.56293116]),
 'test_recall_weighted': array([0.52378571, 0.49642857, 0.574     , 0.54971429, 0.55085714])}

In [88]:
minor = MinorClassifiers(1, 0.75, 'average', 45)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([308.23570991, 296.08728218, 290.78469253, 291.57393551,
        290.11198568]),
 'score_time': array([43.06828666, 43.89277053, 43.16428804, 43.74510717, 43.26962614]),
 'test_accuracy': array([0.52221429, 0.53835714, 0.5375    , 0.49871429, 0.51128571]),
 'test_neg_log_loss': array([-1.41925373, -1.4456693 , -1.38488614, -1.4603455 , -1.43092889]),
 'test_neg_mean_squared_error': array([-8.73235714, -9.43257143, -7.77578571, -9.67107143, -8.14942857]),
 'test_roc_auc_ovr': array([0.91191632, 0.90968392, 0.92395407, 0.90534471, 0.91258611]),
 'test_f1_weighted': array([0.51978735, 0.53340364, 0.52025159, 0.49413272, 0.48221086]),
 'test_precision_weighted': array([0.56549918, 0.58193448, 0.57355592, 0.55075879, 0.53909149]),
 'test_recall_weighted': array([0.52221429, 0.53835714, 0.5375    , 0.49871429, 0.51128571])}

In [11]:
minor = MinorClassifiers(1, 0.75, 'average', 40)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([322.53996468, 404.56392717, 510.94665003, 507.57748365,
        503.49685931]),
 'score_time': array([39.72186565, 52.16873908, 70.43777895, 70.4358604 , 73.20933461]),
 'test_accuracy': array([0.53214286, 0.50478571, 0.49157143, 0.475     , 0.49114286]),
 'test_neg_log_loss': array([-1.40845501, -1.48995101, -1.46394908, -1.52131972, -1.52504276]),
 'test_neg_mean_squared_error': array([-8.372     , -9.13228571, -9.80228571, -9.19635714, -9.51178571]),
 'test_roc_auc_ovr': array([0.91417173, 0.90908273, 0.91073535, 0.90153003, 0.9040769 ]),
 'test_f1_weighted': array([0.52103612, 0.50228205, 0.48724505, 0.46606924, 0.47732279]),
 'test_precision_weighted': array([0.55419896, 0.56482545, 0.56057627, 0.54796737, 0.52587233]),
 'test_recall_weighted': array([0.53214286, 0.50478571, 0.49157143, 0.475     , 0.49114286])}

In [78]:
minor = MinorClassifiers(1, 0.75, 'average', 35)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([207.17076492, 212.90225983, 216.38435388, 204.55270171,
        204.8181076 ]),
 'score_time': array([37.20586205, 36.38500643, 37.74322581, 35.64859533, 35.26901865]),
 'test_accuracy': array([0.50892857, 0.47328571, 0.53171429, 0.52278571, 0.49542857]),
 'test_neg_log_loss': array([-1.50565308, -1.51269286, -1.5179592 , -1.44394178, -1.4891825 ]),
 'test_neg_mean_squared_error': array([-10.17385714,  -9.33164286,  -9.83585714,  -9.139     ,
         -8.53607143]),
 'test_roc_auc_ovr': array([0.90516216, 0.89873124, 0.9027058 , 0.91748011, 0.9017652 ]),
 'test_f1_weighted': array([0.50169549, 0.45826317, 0.52251136, 0.52159767, 0.4718188 ]),
 'test_precision_weighted': array([0.5660375 , 0.55196614, 0.57402917, 0.58671309, 0.505369  ]),
 'test_recall_weighted': array([0.50892857, 0.47328571, 0.53171429, 0.52278571, 0.49542857])}

In [82]:
minor = MinorClassifiers(1, 0.75, 'average', 45)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([520.84710932, 305.0349648 , 516.70040345, 517.7560606 ,
        511.90243506]),
 'score_time': array([49.83183026, 92.28943348, 94.36861825, 93.15736103, 93.56156468]),
 'test_accuracy': array([0.53964286, 0.49507143, 0.53107143, 0.55007143, 0.51335714]),
 'test_neg_log_loss': array([-1.42569812, -1.44658763, -1.40271424, -1.4171155 , -1.39542757]),
 'test_neg_mean_squared_error': array([-8.866     , -9.74407143, -8.018     , -8.11778571, -7.37092857]),
 'test_roc_auc_ovr': array([0.91519396, 0.90799346, 0.9183157 , 0.92042298, 0.91299078]),
 'test_f1_weighted': array([0.53658025, 0.49187083, 0.52454665, 0.54759776, 0.48872158]),
 'test_precision_weighted': array([0.56681527, 0.54656591, 0.56715886, 0.58105375, 0.5373847 ]),
 'test_recall_weighted': array([0.53964286, 0.49507143, 0.53107143, 0.55007143, 0.51335714])}

In [20]:
minor = MinorClassifiers(1, 0.75, 'average', 35)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([451.48101759, 448.33198857, 454.86333036, 457.95929885,
        460.51726484]),
 'score_time': array([59.18930221, 59.20171475, 58.58895755, 58.5455029 , 62.48744559]),
 'test_accuracy': array([0.48685714, 0.4675    , 0.478     , 0.50557143, 0.5245    ]),
 'test_neg_log_loss': array([-1.53776107, -1.52495898, -1.49507382, -1.53579522, -1.56354638]),
 'test_neg_mean_squared_error': array([-10.54907143,  -9.29978571,  -9.29578571,  -9.40428571,
        -10.44471429]),
 'test_roc_auc_ovr': array([0.90547578, 0.89674217, 0.90305764, 0.90090298, 0.89727179]),
 'test_f1_weighted': array([0.492386  , 0.4565555 , 0.46429339, 0.50259538, 0.51407951]),
 'test_precision_weighted': array([0.57578388, 0.54610833, 0.56110214, 0.55439255, 0.55659042]),
 'test_recall_weighted': array([0.48685714, 0.4675    , 0.478     , 0.50557143, 0.5245    ])}

In [15]:
minor = MinorClassifiers(1, 0.75, 'average', 30)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([394.01500368, 392.82663441, 387.47874117, 396.43253732,
        398.30700898]),
 'score_time': array([51.68606687, 53.23127651, 52.72960925, 55.77597332, 56.33389735]),
 'test_accuracy': array([0.51535714, 0.50078571, 0.41057143, 0.47507143, 0.48      ]),
 'test_neg_log_loss': array([-1.55502531, -1.49745348, -1.62580389, -1.58836749, -1.53697519]),
 'test_neg_mean_squared_error': array([-10.28671429,  -9.03578571, -10.90792857, -10.49828571,
         -9.50607143]),
 'test_roc_auc_ovr': array([0.89987024, 0.90100165, 0.87832647, 0.89514706, 0.89804591]),
 'test_f1_weighted': array([0.51668575, 0.48939733, 0.39821087, 0.47111152, 0.46233152]),
 'test_precision_weighted': array([0.56526261, 0.57548431, 0.52167031, 0.54563698, 0.51440972]),
 'test_recall_weighted': array([0.51535714, 0.50078571, 0.41057143, 0.47507143, 0.48      ])}

In [19]:
minor = MinorClassifiers(1, 0.75, 'average', 27)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([362.70391798, 356.90701079, 359.82514381, 357.44739413,
        356.23155975]),
 'score_time': array([49.90893888, 49.36405182, 47.27458763, 49.51216435, 51.53476858]),
 'test_accuracy': array([0.47835714, 0.48057143, 0.49207143, 0.43878571, 0.44642857]),
 'test_neg_log_loss': array([-1.59722091, -1.55371356, -1.56073236, -1.59394003, -1.64559104]),
 'test_neg_mean_squared_error': array([-10.4835    , -10.9055    ,  -9.574     , -10.10428571,
        -10.12342857]),
 'test_roc_auc_ovr': array([0.88847812, 0.89772522, 0.90140186, 0.87461633, 0.87313584]),
 'test_f1_weighted': array([0.46434671, 0.47136866, 0.48882973, 0.42824179, 0.42608542]),
 'test_precision_weighted': array([0.54229923, 0.58699393, 0.57116543, 0.52283942, 0.48071663]),
 'test_recall_weighted': array([0.47835714, 0.48057143, 0.49207143, 0.43878571, 0.44642857])}

In [79]:
minor = MinorClassifiers(1, 0.75, 'average', 25)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([150.28962469, 149.40990043, 149.88650036, 150.36196566,
        150.02560139]),
 'score_time': array([26.75279999, 27.04740477, 27.45977306, 27.18761873, 27.11735106]),
 'test_accuracy': array([0.46821429, 0.44778571, 0.48221429, 0.47042857, 0.48021429]),
 'test_neg_log_loss': array([-1.64767457, -1.55590165, -1.56769521, -1.6045631 , -1.54033575]),
 'test_neg_mean_squared_error': array([-10.37257143, -10.69778571, -10.85371429, -10.47714286,
         -9.26385714]),
 'test_roc_auc_ovr': array([0.87569865, 0.88938107, 0.89944863, 0.89230429, 0.89409635]),
 'test_f1_weighted': array([0.46401766, 0.43663339, 0.48236   , 0.46535133, 0.45749385]),
 'test_precision_weighted': array([0.53669609, 0.55712883, 0.56734936, 0.5815889 , 0.52591803]),
 'test_recall_weighted': array([0.46821429, 0.44778571, 0.48221429, 0.47042857, 0.48021429])}

In [92]:
minor = MinorClassifiers(1, 0.75, 'average', 15)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([106.81243253, 106.48246837, 106.74202013, 106.82598734,
        106.65281272]),
 'score_time': array([18.23206472, 18.64603996, 18.45040488, 18.3291223 , 18.36816287]),
 'test_accuracy': array([0.47135714, 0.44414286, 0.4005    , 0.42742857, 0.44628571]),
 'test_neg_log_loss': array([-1.73414841, -1.75384909, -1.71900526, -1.7154375 , -1.73896861]),
 'test_neg_mean_squared_error': array([-12.75592857, -12.46907143, -13.42571429, -12.55792857,
        -13.11135714]),
 'test_roc_auc_ovr': array([0.87365436, 0.86338177, 0.88533036, 0.87417493, 0.87147953]),
 'test_f1_weighted': array([0.45240156, 0.43122008, 0.38716733, 0.40733447, 0.42631088]),
 'test_precision_weighted': array([0.58079907, 0.55144477, 0.56547948, 0.57960526, 0.56964707]),
 'test_recall_weighted': array([0.47135714, 0.44414286, 0.4005    , 0.42742857, 0.44628571])}

In [14]:
minor = MinorClassifiers(1, 0.75, 'average', 11)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([119.58573318, 111.7580924 , 153.23480082, 158.49286532,
        159.41713357]),
 'score_time': array([16.73025179, 19.04306579, 24.42577791, 27.09413099, 26.15965366]),
 'test_accuracy': array([0.43621429, 0.38785714, 0.3935    , 0.41807143, 0.41628571]),
 'test_neg_log_loss': array([-1.84461299, -1.7881891 , -1.79770897, -1.81350886, -1.8121919 ]),
 'test_neg_mean_squared_error': array([-14.6075    , -14.45307143, -15.17328571, -14.68707143,
        -15.80585714]),
 'test_roc_auc_ovr': array([0.84551129, 0.85227616, 0.86218762, 0.87182911, 0.84419668]),
 'test_f1_weighted': array([0.40885767, 0.36113715, 0.38377145, 0.39704378, 0.38790379]),
 'test_precision_weighted': array([0.55941997, 0.57521608, 0.6051934 , 0.56944648, 0.54917677]),
 'test_recall_weighted': array([0.43621429, 0.38785714, 0.3935    , 0.41807143, 0.41628571])}

In [91]:
minor = MinorClassifiers(1, 0.75, 'average', 10)
scores = cross_validate(minor, X, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
scores

{'fit_time': array([85.36399961, 85.32352495, 85.47160029, 85.20026016, 85.55496907]),
 'score_time': array([13.97271538, 13.69775796, 13.81598949, 13.6288631 , 13.59755087]),
 'test_accuracy': array([0.35992857, 0.37478571, 0.36828571, 0.43171429, 0.43057143]),
 'test_neg_log_loss': array([-1.91410148, -1.8417944 , -1.89110759, -1.74065176, -1.76524238]),
 'test_neg_mean_squared_error': array([-16.38735714, -15.83721429, -16.65371429, -13.9195    ,
        -14.24221429]),
 'test_roc_auc_ovr': array([0.84746911, 0.84917054, 0.84929931, 0.88103235, 0.86712117]),
 'test_f1_weighted': array([0.34526788, 0.34949244, 0.34131278, 0.39863914, 0.38757032]),
 'test_precision_weighted': array([0.56909061, 0.57195762, 0.58783102, 0.58259735, 0.53038963]),
 'test_recall_weighted': array([0.35992857, 0.37478571, 0.36828571, 0.43171429, 0.43057143])}

4. 

In [11]:
def make_noise_data(data, percent):
    m = data.shape[0]
    n = data.shape[1]
    new_data = deepcopy(data)
    for i in range(m):
        for j in range(n):
            rand = randrange(-1, 2, 2) #random integer from {-1, 1}
            new_data[i][j] = (1 + rand * percent) * data[i][j]
    return new_data

In [12]:
def make_noise_label(labels, percent):
    labels_with_noise = deepcopy(labels)
    arr_size = labels.shape[0]
    indexes = [n for n in range(arr_size)]
    indexes_to_change = sample(indexes, int(arr_size * percent))
    
    for i in indexes_to_change:
        old_val = labels[i]
        new_val = randint(0, classes_count-1)
        while old_val == new_val:
            new_val = randint(0, classes_count-1)
        labels_with_noise[i] = new_val
        
    return labels_with_noise

In [18]:
n_samples = [0.1, 0.35, 0.7]
n_features = [0.25, 0.5, 0.75]
minors = []
for n in n_samples:
    minors.append(MinorClassifiers(n, 1, 'average'))
    
for n in n_features:
    minors.append(MinorClassifiers(1, n, 'average'))

In [14]:
noises = [0.01, 0.05, 0.1]

In [15]:
for n in noises:
    new_y_train = make_noise_label(Y, n)
    clf = SVC(probability=True)
    scores = cross_validate(clf, X, new_y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
    print("Full " + str(n) + "%")
    print(scores)

Full 0.01%
{'fit_time': array([456.81661558, 456.1075387 , 454.05069327, 454.65215397,
       443.14230084]), 'score_time': array([46.84140134, 47.64082694, 46.40989137, 46.6297276 , 46.05647755]), 'test_accuracy': array([0.87014286, 0.86435714, 0.86914286, 0.86728571, 0.85992857]), 'test_neg_log_loss': array([-0.40421965, -0.41409273, -0.40907839, -0.4109215 , -0.42168201]), 'test_neg_mean_squared_error': array([-1.80942857, -1.84857143, -1.78692857, -1.86557143, -1.91507143]), 'test_roc_auc_ovr': array([0.98470132, 0.98428659, 0.98451878, 0.98457667, 0.98398773]), 'test_f1_weighted': array([0.86932354, 0.86370591, 0.8683069 , 0.86642266, 0.85950861]), 'test_precision_weighted': array([0.86936324, 0.86376591, 0.8684633 , 0.8665921 , 0.8595742 ]), 'test_recall_weighted': array([0.87014286, 0.86435714, 0.86914286, 0.86728571, 0.85992857])}
Full 0.05%
{'fit_time': array([692.17911267, 690.1500957 , 687.48242617, 693.06894517,
       690.06399322]), 'score_time': array([54.93726802, 54.73

In [15]:
for n in noises:
    for minor in minors:
        new_y_train = make_noise_label(Y, n)
        scores = cross_validate(minor, X, new_y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
        print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
        print("Noise " + str(n))
        print(scores)

Samples: 0.1, features: 1
Noise 0.01
{'fit_time': array([44.15251517, 44.40823722, 44.79876828, 43.93469787, 44.49649692]), 'score_time': array([58.73130631, 58.48243427, 58.53770328, 57.89160728, 58.73069668]), 'test_accuracy': array([0.83142857, 0.82771429, 0.83778571, 0.83185714, 0.8245    ]), 'test_neg_log_loss': array([-0.50686052, -0.50839664, -0.49776846, -0.51577597, -0.51521768]), 'test_neg_mean_squared_error': array([-2.31857143, -2.2025    , -2.13964286, -2.20085714, -2.29007143]), 'test_roc_auc_ovr': array([0.97865446, 0.97919225, 0.97944031, 0.97802815, 0.97875112]), 'test_f1_weighted': array([0.82986614, 0.82632357, 0.8357828 , 0.83005499, 0.82315756]), 'test_precision_weighted': array([0.8298868 , 0.8260838 , 0.83581026, 0.82992718, 0.82284182]), 'test_recall_weighted': array([0.83142857, 0.82771429, 0.83778571, 0.83185714, 0.8245    ])}
Samples: 0.35, features: 1
Noise 0.01
{'fit_time': array([401.97660613, 411.0583744 , 404.74461555, 401.73142743,
       400.05051756])

Samples: 1, features: 0.5
Noise 0.05
{'fit_time': array([7456.3474493 , 4555.71863914, 4421.3907001 , 4470.64562774,
       4042.51135516]), 'score_time': array([391.29696631, 345.14422894, 362.64291573, 353.39773846,
       346.18799686]), 'test_accuracy': array([0.82971429, 0.82092857, 0.82928571, 0.82121429, 0.81585714]), 'test_neg_log_loss': array([-0.63374688, -0.64638837, -0.63908584, -0.6578005 , -0.66521278]), 'test_neg_mean_squared_error': array([-2.52857143, -2.537     , -2.49314286, -2.67028571, -2.74814286]), 'test_roc_auc_ovr': array([0.96294216, 0.96115132, 0.96161057, 0.96014319, 0.95944963]), 'test_f1_weighted': array([0.82829739, 0.82002003, 0.82827487, 0.8203129 , 0.81496664]), 'test_precision_weighted': array([0.82843742, 0.81996462, 0.82844782, 0.82031528, 0.81471603]), 'test_recall_weighted': array([0.82971429, 0.82092857, 0.82928571, 0.82121429, 0.81585714])}
Samples: 1, features: 0.75
Noise 0.05
{'fit_time': array([ 5302.86048484,  5247.91040945,  8157.29574442, 

In [19]:
noises = [0.1, 0.2, 0.3]

In [20]:
for n in noises:
    new_x_train = make_noise_data(X, n)
    clf = SVC(probability=True)
    scores = cross_validate(clf, new_x_train, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
    print("Full " + str(n) + "%")
    print(scores)

Full 0.1%
{'fit_time': array([391.33737779, 392.77413607, 392.44460082, 386.7590735 ,
       399.97994089]), 'score_time': array([44.76011014, 44.9320941 , 44.95547462, 43.93913484, 44.93002605]), 'test_accuracy': array([0.8775    , 0.87271429, 0.87607143, 0.8735    , 0.86671429]), 'test_neg_log_loss': array([-0.34456843, -0.35030322, -0.34749276, -0.35349998, -0.36641845]), 'test_neg_mean_squared_error': array([-1.65657143, -1.6615    , -1.64278571, -1.69457143, -1.76514286]), 'test_roc_auc_ovr': array([0.98989768, 0.98986078, 0.98968087, 0.98961975, 0.98871485]), 'test_f1_weighted': array([0.87646635, 0.87198031, 0.87513805, 0.87254236, 0.86606023]), 'test_precision_weighted': array([0.87652458, 0.87194339, 0.87531651, 0.8727319 , 0.86597874]), 'test_recall_weighted': array([0.8775    , 0.87271429, 0.87607143, 0.8735    , 0.86671429])}
Full 0.2%
{'fit_time': array([421.31390333, 422.70350432, 426.59197259, 422.88736916,
       404.47163606]), 'score_time': array([47.49341798, 47.4181

In [20]:
for n in noises:
    for minor in minors:
        new_x_train = make_noise_data(X, n)
        scores = cross_validate(minor, new_x_train, Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
        print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
        print("Noise " + str(n))
        print(scores)

Samples: 0.1, features: 1
Noise 0.1
{'fit_time': array([47.13355684, 41.3211596 , 40.28899074, 40.85308266, 40.49138474]), 'score_time': array([57.35100174, 56.90003872, 57.0849297 , 56.27276921, 57.60540533]), 'test_accuracy': array([0.83871429, 0.83428571, 0.84192857, 0.83907143, 0.82864286]), 'test_neg_log_loss': array([-0.44221281, -0.45527807, -0.44261212, -0.45291123, -0.46276772]), 'test_neg_mean_squared_error': array([-2.18692857, -2.05435714, -2.002     , -2.05428571, -2.17128571]), 'test_roc_auc_ovr': array([0.98450359, 0.98417258, 0.98444783, 0.98402373, 0.98332779]), 'test_f1_weighted': array([0.83725635, 0.83282733, 0.83973607, 0.83713447, 0.82726621]), 'test_precision_weighted': array([0.83727799, 0.83233749, 0.83977594, 0.83682986, 0.82700069]), 'test_recall_weighted': array([0.83871429, 0.83428571, 0.84192857, 0.83907143, 0.82864286])}
Samples: 0.35, features: 1
Noise 0.1
{'fit_time': array([377.78099251, 366.9104805 , 364.55141234, 360.51012778,
       362.62381554]), 

Samples: 1, features: 0.5
Noise 0.2
{'fit_time': array([2449.06383133, 2714.39082479, 2426.28336358, 2421.42372942,
       2653.66901302]), 'score_time': array([312.70195961, 327.19612384, 316.39450645, 317.94203401,
       318.79683685]), 'test_accuracy': array([0.864     , 0.85907143, 0.86414286, 0.86142857, 0.85607143]), 'test_neg_log_loss': array([-0.38483655, -0.40624795, -0.39526215, -0.40303768, -0.40678339]), 'test_neg_mean_squared_error': array([-1.82642857, -1.83692857, -1.84771429, -1.83692857, -1.94935714]), 'test_roc_auc_ovr': array([0.98880637, 0.9881393 , 0.98796058, 0.9877812 , 0.98740941]), 'test_f1_weighted': array([0.86261984, 0.85815696, 0.86285125, 0.86031367, 0.85522885]), 'test_precision_weighted': array([0.86247068, 0.85795957, 0.86257403, 0.86033349, 0.85482821]), 'test_recall_weighted': array([0.864     , 0.85907143, 0.86414286, 0.86142857, 0.85607143])}
Samples: 1, features: 0.75
Noise 0.2
{'fit_time': array([2957.80772328, 2938.78233671, 2903.90740061, 2997.