In [1]:
from sklearn.datasets import fetch_20newsgroups
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.utils import resample
from sklearn import preprocessing
import statistics
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, log_loss, average_precision_score
from random import sample, randint, randrange
from copy import deepcopy
from sklearn.model_selection import cross_validate

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
newsgroups_train = fetch_20newsgroups(subset='train')
newsgroups_test = fetch_20newsgroups(subset='test')

In [4]:
vectorizer = TfidfVectorizer()
x_train = vectorizer.fit_transform(newsgroups_train.data)
y_train = newsgroups_train.target
x_test = vectorizer.transform(newsgroups_test.data)
y_test = newsgroups_test.target
classes_count = 20

Change dimensions to 500

In [5]:
svd = TruncatedSVD(n_components=500)
svd.fit(x_train)
svd_x_train = svd.transform(x_train)
svd_x_test = svd.transform(x_test)

In [6]:
svd_x_train = np.concatenate((svd_x_train, svd_x_test))
y_train = np.concatenate((y_train, y_test))

SVM on full data and full features

In [38]:
clf = SVC(probability=True, max_iter=200)
#clf.fit(svd_x_train, y_train)
#clf.fit(x_train_res, y_train_res)

In [16]:
m = svd_x_train.shape[0]

In [45]:
m3 = int(m*0.1)
svd_x_train_10 = svd_x_train[:m3]
y_train_10 = y_train[:m3]

In [14]:
y_train_noise_40 = make_noise_label(y_train, 0.4)

In [39]:
np.unique(y_train)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [15]:
print(y_train_noise_40.shape)
print(svd_x_train.shape)

(18846,)
(18846, 500)


In [39]:
scores = cross_validate(clf, svd_x_train, y_train, cv=5, scoring=('accuracy', 
                                                    'neg_log_loss', 
                                                    'neg_mean_squared_error', 
                                                    'roc_auc_ovr', 
                                                    'f1_weighted', 
                                                    'precision_weighted', 
                                                    'recall_weighted'))



In [40]:
scores

{'fit_time': array([453.452003  , 452.28610563, 452.48716164, 447.85763693,
        450.32840657]),
 'score_time': array([46.16600728, 46.40932512, 46.56393313, 45.55130672, 45.94004512]),
 'test_accuracy': array([0.86949602, 0.88007429, 0.88166622, 0.83443884, 0.83762271]),
 'test_neg_log_loss': array([-0.47028495, -0.45748055, -0.45810768, -0.56722823, -0.54911391]),
 'test_neg_mean_squared_error': array([-6.30371353, -5.90501459, -5.07084107, -9.62111966, -9.44229239]),
 'test_roc_auc_ovr': array([0.99272923, 0.99265828, 0.99277632, 0.98831115, 0.98951535]),
 'test_f1_weighted': array([0.87051746, 0.88002696, 0.88256631, 0.83389696, 0.83774838]),
 'test_precision_weighted': array([0.8735847 , 0.88092778, 0.8848363 , 0.83623437, 0.84251736]),
 'test_recall_weighted': array([0.86949602, 0.88007429, 0.88166622, 0.83443884, 0.83762271])}

In [16]:
scores = cross_validate(clf, svd_x_train, y_train_noise_40, cv=5, scoring=('accuracy', 
                                                    'neg_log_loss', 
                                                    'neg_mean_squared_error', 
                                                    'roc_auc_ovr', 
                                                    'f1_weighted', 
                                                    'precision_weighted', 
                                                    'recall_weighted'))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [17]:
scores

{'fit_time': array([1244.50412655, 1570.05026317, 1476.82827353, 1522.17273259,
        1421.41617584]),
 'score_time': array([72.63192344, 82.25924134, 82.61139894, 88.18289876, 73.61998105]),
 'test_accuracy': array([0.5       , 0.50464314, 0.4887238 , 0.49376492, 0.47545768]),
 'test_neg_log_loss': array([-2.27421874, -2.24522513, -2.30234006, -2.23431492, -2.29031023]),
 'test_neg_mean_squared_error': array([-34.75251989, -34.11488458, -34.23985142, -35.36004245,
        -34.06924914]),
 'test_roc_auc_ovr': array([0.77234373, 0.77954066, 0.75769786, 0.7747673 , 0.76451044]),
 'test_f1_weighted': array([0.49584589, 0.49774046, 0.48334969, 0.48867876, 0.46880907]),
 'test_precision_weighted': array([0.49512082, 0.49420359, 0.48109027, 0.48748494, 0.46745597]),
 'test_recall_weighted': array([0.5       , 0.50464314, 0.4887238 , 0.49376492, 0.47545768])}

In [9]:
y_pred = clf.predict(svd_x_test)
#y_pred = clf.predict(x_test_res)

In [10]:
accuracy = accuracy_score(y_test, y_pred)
#accuracy = accuracy_score(y_test_res, y_pred)

In [11]:
accuracy #0.78956

0.7873074880509825

In [12]:
f_score = f1_score(y_test, y_pred, average='weighted')
#f_score = f1_score(y_test_res, y_pred, average='weighted')
f_score

0.7888587724079459

10 sub-classifiers

In [10]:
n_samples = [0.1, 0.35, 0.7]
n_features = [0.25, 0.5, 0.75]

samples_all = svd_x_train.shape[0]
features_all = svd_x_train.shape[1]

In [11]:
class MinorClassifiers:
    def __init__(self, samp, feat, voting, max_iter=-1):
        self.samp = samp
        self.feat = feat
        self.classifiers = []
        self.predictions = []
        self.cut_features = []
        self.voting = voting
        self.max_iter = max_iter
    
    def get_params(self, deep = False):
        return {
            'samp': self.samp,
            'feat': self.feat,
            'voting': self.voting,
            'max_iter': self.max_iter
        }
    
    def predict(self, X):
        for i in range(len(self.classifiers)):
            classifier = self.classifiers[i]
            f = self.cut_features[i]
            x_test = X[:,f]
            y_pred = classifier.predict(x_test)
            pred = classifier.predict_proba(x_test)
            self.predictions.append((y_pred, pred))
            
        if self.voting == 'average':
            return average_pred(self.predictions)
        
        if self.voting == 'majority':
            return majority_pred(self.predictions)
        
        if self.voting == 'borda':
            return borda_pred(self.predictions)
    
    def predict_proba(self, X):
        if self.voting == 'average':
            return average_pred_proba(self.predictions, len(self.classifiers))
        
        if self.voting == 'majority':
            return majority_pred_proba(self.predictions, len(self.classifiers))
        
        if self.voting == 'borda':
            return borda_pred_proba(self.predictions, len(self.classifiers))
        
    def fit(self, X, Y):
        feature_list = [n for n in range(500)]
        samples_all = X.shape[0]
        features_all = X.shape[1]
        
        for i in range(10):
            f = sample(feature_list, int(features_all * self.feat))
            self.cut_features.append(f)
            x_train_f = X[:,f]
                
            x_train_s, y_train_s = resample(x_train_f, Y, n_samples=int(self.samp * samples_all), replace=False, random_state=0)

            svm_clf = SVC(probability=True, max_iter=self.max_iter)
            svm_clf.fit(x_train_s, y_train_s)
                
            self.classifiers.append(svm_clf)

In [12]:
def average_pred(predictions):
    #predictions = list of tuples 
    m = len(predictions[0][0])
    all_results = [[0 for x in range(classes_count)] for y in range(m)] 
    results = [0] * m
    for (_, pred_proba) in predictions:
        for i in range(m):
            for j in range(classes_count):
                all_results[i][j] += pred_proba[i][j]
    for i in range(m):
        results[i] = all_results[i].index(max(all_results[i]))
    return results

def average_pred_proba(predictions, n_classifiers):
    m = len(predictions[0][0])
    results = [[0 for x in range(classes_count)] for y in range(m)]
    for (_, pred_proba) in predictions:
        for i in range(m):
            for j in range(classes_count):
                results[i][j] += pred_proba[i][j]
    for i in range(m):
        for j in range(classes_count):
            results[i][j] /= n_classifiers            
    return results

def majority_pred(predictions):
    m = len(predictions[0][0])
    results = [0] * m
    for i in range(m):
        all_results = [0 for x in range(classes_count)] 
        for (pred, _) in predictions:
            all_results[pred[i]] += 1
        results[i] = all_results.index(max(all_results))        
    return results

def majority_pred_proba(predictions, n_classifiers):
    m = len(predictions[0][0])
    results = [[0 for x in range(classes_count)] for y in range(m)]
    classifiers_votes_count = [0] * m
    majority_results = majority_pred(predictions)
    for classifier in range(0, n_classifiers):
        for i in range(m):
            voted_class = majority_results[i] 
            (pred, pred_proba) = predictions[classifier]
            if(pred[i] == voted_class):
                classifiers_votes_count[i] += 1
                for j in range(classes_count):
                    results[i][j] += pred_proba[i][j]
    for i in range(m):
        for j in range(classes_count):
            results[i][j] /= classifiers_votes_count[i]
    return results      
    

def borda_pred(predictions):
    m = len(predictions[0][0])
    all_results = [[0 for x in range(classes_count)] for y in range(m)] 
    results = [0] * m
    
    def get_final_borda_points(predictions):
        return np.argsort(np.argsort(predictions)).tolist()

    for (_, pred_proba) in predictions:
        for i in range(m):
            pred_proba[i] = get_final_borda_points(pred_proba[i])
    for (_, pred_proba) in predictions:
        for i in range(m):
            for j in range(classes_count):
                all_results[i][j] += pred_proba[i][j]
    for i in range(m):
        results[i] = all_results[i].index(max(all_results[i]))
    return results

def borda_pred_proba(predictions, n_classifiers):
    m = len(predictions[0][0])
    results = [[0 for x in range(classes_count)] for y in range(m)] 
    
    def get_final_borda_points(predictions):
        return np.argsort(np.argsort(predictions)).tolist()

    def get_points():
        sum = 0
        for i in range(classes_count):
            sum += i
        return sum * n_classifiers
    
    for (_, pred_proba) in predictions:
        for i in range(m):
            pred_proba[i] = get_final_borda_points(pred_proba[i])
    for (_, pred_proba) in predictions:
        for i in range(m):
            for j in range(classes_count):
                results[i][j] += pred_proba[i][j]
    for i in range(m):
        for j in range(classes_count):
            results[i][j] /= get_points()
        
    return results

In [13]:
n_samples = [0.1, 0.35, 0.7]
n_features = [0.25, 0.5, 0.75]
minors = []

In [12]:
for n in n_samples:
    minors.append(MinorClassifiers(n, 1, 'average'))
    
for n in n_features:
    minors.append(MinorClassifiers(1, n, 'average'))

In [13]:
for minor in minors:
    scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
    print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
    print(scores)

Samples: 0.1, features: 1
{'fit_time': array([148.91131902, 146.96860027, 145.08941054, 151.25387406,
       145.18135452]), 'score_time': array([79.84562659, 85.42474294, 78.85297894, 84.54803824, 79.84669614]), 'test_accuracy': array([0.74217507, 0.7460865 , 0.73839215, 0.70443088, 0.70018573]), 'test_neg_log_loss': array([-1.03252602, -1.00095979, -1.02576483, -1.0182766 , -0.98272496]), 'test_neg_mean_squared_error': array([-13.57824934, -12.46590608, -13.52241974, -16.59007694,
       -15.47704962]), 'test_roc_auc_ovr': array([0.96891407, 0.9726225 , 0.97246307, 0.96648893, 0.9706784 ]), 'test_f1_weighted': array([0.74581488, 0.74695513, 0.73995017, 0.70787399, 0.7053624 ]), 'test_precision_weighted': array([0.76351305, 0.7598769 , 0.75137709, 0.72947445, 0.73186469]), 'test_recall_weighted': array([0.74217507, 0.7460865 , 0.73839215, 0.70443088, 0.70018573])}
Samples: 0.35, features: 1
{'fit_time': array([1250.17741156, 1215.59635448, 1221.13495445, 1253.77801323,
       1255.189

Samples: 1, features: 0.5
{'fit_time': array([3869.10345006, 3843.47206473, 3612.73425889, 3794.17781258,
       3604.67475128]), 'score_time': array([314.03146791, 316.82461762, 308.79971576, 313.6022191 ,
       311.20412183]), 'test_accuracy': array([0.8734748 , 0.88087026, 0.88299284, 0.83072433, 0.838684  ]), 'test_neg_log_loss': array([-2.33324503, -2.32945829, -2.33060984, -2.34056075, -2.33796621]), 'test_neg_mean_squared_error': array([-6.32095491, -6.00106129, -5.92332184, -9.60785354, -9.45449721]), 'test_roc_auc_ovr': array([0.98608608, 0.98770734, 0.98684618, 0.98106492, 0.98216681]), 'test_f1_weighted': array([0.87408639, 0.88037591, 0.88350422, 0.83098397, 0.8388703 ]), 'test_precision_weighted': array([0.87629491, 0.88109681, 0.88530988, 0.83342773, 0.84099954]), 'test_recall_weighted': array([0.8734748 , 0.88087026, 0.88299284, 0.83072433, 0.838684  ])}
Samples: 1, features: 0.75
{'fit_time': array([5152.01870775, 5203.89438987, 5332.218431  , 5689.703866  ,
       530

3a

In [13]:
parts = [0.1, 0.25, 0.5, 0.75] ## for 1.0 already calculated

In [22]:
n_samples = [0.1, 0.35, 0.7]
n_features = [0.25, 0.5, 0.75]
minors = []

In [23]:
for n in n_samples:
    minors.append(MinorClassifiers(n, 1, 'average'))
    
for n in n_features:
    minors.append(MinorClassifiers(1, n, 'average'))

In [24]:
for p in parts:
    new_X = svd_x_train[:int(p*svd_x_train.shape[0])]
    new_Y = y_train[:int(p*y_train.shape[0])]
    #clf = SVC(probability=True)
    #scores = cross_validate(clf, new_X, new_Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
    #print("Full " + str(p) + "%")
    #print(scores)
    print("Data " + str(p))
    for minor in minors:
        scores = cross_validate(minor, new_X, new_Y, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
        print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
        print(scores)

Data 0.1
Samples: 0.7, features: 1
{'fit_time': array([76.41327071, 75.94379377, 76.88744974, 73.38574314, 73.52283382]), 'score_time': array([5.86673832, 5.99670053, 5.93612862, 5.71173048, 6.10567951]), 'test_accuracy': array([0.74535809, 0.73474801, 0.72413793, 0.75596817, 0.7287234 ]), 'test_neg_log_loss': array([-1.08154999, -1.05632682, -1.05962249, -1.10169275, -1.11996161]), 'test_neg_mean_squared_error': array([-13.16710875, -11.50928382, -14.54376658, -15.23607427,
       -13.1462766 ]), 'test_roc_auc_ovr': array([0.95871098, 0.96679956, 0.96866598, 0.96282195, 0.95810908]), 'test_f1_weighted': array([0.74606982, 0.73582449, 0.72886921, 0.75608787, 0.72404682]), 'test_precision_weighted': array([0.77087507, 0.75137784, 0.75825003, 0.77764468, 0.73074313]), 'test_recall_weighted': array([0.74535809, 0.73474801, 0.72413793, 0.75596817, 0.7287234 ])}
Data 0.25
Samples: 0.7, features: 1
{'fit_time': array([422.98991966, 442.01739025, 432.92568946, 456.66984463,
       456.4957156

In [8]:
def make_noise_label(labels, percent):
    labels_with_noise = deepcopy(labels)
    arr_size = labels.shape[0]
    indexes = [n for n in range(arr_size)]
    indexes_to_change = sample(indexes, int(arr_size * percent))
    
    for i in indexes_to_change:
        old_val = labels[i]
        new_val = randint(0, classes_count-1)
        while old_val == new_val:
            new_val = randint(0, classes_count-1)
        labels_with_noise[i] = new_val
        
    return labels_with_noise

In [9]:
def make_noise_data(data, percent):
    m = data.shape[0]
    n = data.shape[1]
    new_data = deepcopy(data)
    for i in range(m):
        for j in range(n):
            rand = randrange(-1, 2, 2) #random integer from {-1, 1}
            new_data[i][j] = (1 + rand * percent) * data[i][j]
    return new_data

3b

In [10]:
n_samples = [0.1, 0.35, 0.7]
n_features = [0.25, 0.5, 0.75]
minors = []

In [11]:
for n in n_samples:
    minors.append(MinorClassifiers(n, 1, 'average'))
    
for n in n_features:
    minors.append(MinorClassifiers(1, n, 'average'))

In [19]:
#0.1 samples, 1 features:
minor = MinorClassifiers(0.1, 1, 'average', 37)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)





Samples: 0.1, features: 1
{'fit_time': array([88.18258214, 88.79833651, 88.36883354, 88.65271568, 88.86816239]), 'score_time': array([70.55228567, 71.71939254, 71.96069217, 71.08581972, 71.06397247]), 'test_accuracy': array([0.71087533, 0.71212523, 0.71477846, 0.66887769, 0.66569382]), 'test_neg_log_loss': array([-1.21828004, -1.21973044, -1.21845791, -1.20584033, -1.20061204]), 'test_neg_mean_squared_error': array([-15.57453581, -16.97187583, -17.00530645, -20.36932873,
       -19.63544707]), 'test_roc_auc_ovr': array([0.96612881, 0.9690826 , 0.96927898, 0.96246453, 0.96610453]), 'test_f1_weighted': array([0.71100002, 0.71377589, 0.71618044, 0.6763783 , 0.67076775]), 'test_precision_weighted': array([0.74159652, 0.73126927, 0.73426947, 0.70696561, 0.71098357]), 'test_recall_weighted': array([0.71087533, 0.71212523, 0.71477846, 0.66887769, 0.66569382])}


In [25]:
#0.35 samples, 1 features:
minor = MinorClassifiers(0.35, 1, 'average', 67)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)





Samples: 0.35, features: 1
{'fit_time': array([679.35120106, 705.39891338, 708.47526312, 589.7783947 ,
       579.19102955]), 'score_time': array([227.29149961, 227.5492425 , 224.91980171, 183.9766016 ,
       184.35918283]), 'test_accuracy': array([0.78992042, 0.80047758, 0.79145662, 0.7402494 , 0.74422924]), 'test_neg_log_loss': array([-0.88485678, -0.85586388, -0.85271501, -0.90502966, -0.89169428]), 'test_neg_mean_squared_error': array([-12.55490716, -11.15494826, -10.81215176, -18.89944282,
       -15.38604404]), 'test_roc_auc_ovr': array([0.98385408, 0.98441024, 0.98445036, 0.97971468, 0.97960131]), 'test_f1_weighted': array([0.79238135, 0.80023554, 0.79171325, 0.74816903, 0.74822482]), 'test_precision_weighted': array([0.81270595, 0.81514821, 0.80767963, 0.78599126, 0.77601008]), 'test_recall_weighted': array([0.78992042, 0.80047758, 0.79145662, 0.7402494 , 0.74422924])}


In [26]:
#0.35 samples, 1 features:
minor = MinorClassifiers(0.35, 1, 'average', 60)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)





Samples: 0.35, features: 1
{'fit_time': array([518.06512356, 518.87955689, 518.64861751, 518.96508956,
       519.59036708]), 'score_time': array([178.6300385 , 180.76850486, 180.87402821, 177.56428266,
       177.83952188]), 'test_accuracy': array([0.77002653, 0.77155744, 0.77447599, 0.70628814, 0.72777925]), 'test_neg_log_loss': array([-0.95988191, -0.94984152, -0.94273586, -1.00934698, -0.96272312]), 'test_neg_mean_squared_error': array([-14.00159151, -13.59405678, -13.70098169, -21.5693818 ,
       -16.76306713]), 'test_roc_auc_ovr': array([0.98235683, 0.98274584, 0.98266208, 0.97624718, 0.97846987]), 'test_f1_weighted': array([0.77289082, 0.77181284, 0.77632399, 0.7149581 , 0.73529219]), 'test_precision_weighted': array([0.80222289, 0.79565381, 0.80073138, 0.7751644 , 0.77971673]), 'test_recall_weighted': array([0.77002653, 0.77155744, 0.77447599, 0.70628814, 0.72777925])}


In [27]:
#0.35 samples, 1 features:
minor = MinorClassifiers(0.35, 1, 'average', 75)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)





Samples: 0.35, features: 1
{'fit_time': array([632.91830993, 634.29189324, 634.90638423, 635.62602735,
       635.1425612 ]), 'score_time': array([190.37768078, 192.55618072, 193.05149484, 189.80399752,
       190.75811768]), 'test_accuracy': array([0.7994695 , 0.79013001, 0.80976386, 0.74820907, 0.75669939]), 'test_neg_log_loss': array([-0.80001471, -0.82590568, -0.78862186, -0.86273699, -0.82765372]), 'test_neg_mean_squared_error': array([-11.13554377, -11.35977713,  -8.74688246, -17.93871053,
       -13.99204033]), 'test_roc_auc_ovr': array([0.98500296, 0.98462691, 0.98533116, 0.97954997, 0.98117357]), 'test_f1_weighted': array([0.80195404, 0.78858954, 0.80851991, 0.7544222 , 0.75882522]), 'test_precision_weighted': array([0.81667017, 0.81013221, 0.81824026, 0.78636961, 0.78616635]), 'test_recall_weighted': array([0.7994695 , 0.79013001, 0.80976386, 0.74820907, 0.75669939])}


In [28]:
#0.35 samples, 1 features:
minor = MinorClassifiers(0.35, 1, 'average', 85)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)





Samples: 0.35, features: 1
{'fit_time': array([700.83008718, 702.66129637, 701.95059466, 703.01163602,
       703.37542272]), 'score_time': array([197.17552614, 198.69432044, 200.1085608 , 196.42561603,
       197.03189516]), 'test_accuracy': array([0.8005305 , 0.81321305, 0.81347838, 0.75829132, 0.76916954]), 'test_neg_log_loss': array([-0.7595405 , -0.73932792, -0.74086625, -0.80546789, -0.77781198]), 'test_neg_mean_squared_error': array([-10.93395225,  -9.61289467,  -8.56725922, -16.16290793,
       -12.51101088]), 'test_roc_auc_ovr': array([0.98561887, 0.98635704, 0.98650887, 0.98109126, 0.98246566]), 'test_f1_weighted': array([0.80153787, 0.8141087 , 0.81189537, 0.76319254, 0.76979594]), 'test_precision_weighted': array([0.81595977, 0.82398607, 0.82251979, 0.78686616, 0.7916651 ]), 'test_recall_weighted': array([0.8005305 , 0.81321305, 0.81347838, 0.75829132, 0.76916954])}


In [29]:
#0.35 samples, 1 features:
minor = MinorClassifiers(0.35, 1, 'average', 90)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)





Samples: 0.35, features: 1
{'fit_time': array([730.98269486, 733.2683804 , 732.77811146, 735.22843242,
       733.40605879]), 'score_time': array([199.80270076, 201.70700336, 202.86931109, 199.58590865,
       200.41521978]), 'test_accuracy': array([0.80848806, 0.81666224, 0.81347838, 0.76439374, 0.77951711]), 'test_neg_log_loss': array([-0.73191719, -0.72006375, -0.72047549, -0.77839905, -0.75115513]), 'test_neg_mean_squared_error': array([ -9.86233422, -10.20827806,  -8.25603608, -16.63916158,
       -12.01963386]), 'test_roc_auc_ovr': array([0.98622476, 0.98642358, 0.98656918, 0.9816965 , 0.98251651]), 'test_f1_weighted': array([0.80891516, 0.81744713, 0.81183254, 0.76912524, 0.77980218]), 'test_precision_weighted': array([0.82119132, 0.82729571, 0.8236648 , 0.79060025, 0.79121644]), 'test_recall_weighted': array([0.80848806, 0.81666224, 0.81347838, 0.76439374, 0.77951711])}


In [None]:
#1 samples, 0.25 features:

In [52]:
minor = MinorClassifiers(1, 0.25, 'average', 60)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([654.58637667, 698.84309006, 700.00438643, 731.05498075,
       711.61136723]), 'score_time': array([112.4843576 , 112.6858356 , 111.73476195, 111.47778559,
       109.4632833 ]), 'test_accuracy': array([0.66047745, 0.67444946, 0.63279384, 0.57468825, 0.62722207]), 'test_neg_log_loss': array([-1.94342222, -1.89011763, -1.95764361, -1.85298489, -1.86447538]), 'test_neg_mean_squared_error': array([-35.75994695, -30.90368798, -38.58848501, -52.90846378,
       -42.2194216 ]), 'test_roc_auc_ovr': array([0.94682403, 0.95399201, 0.94238097, 0.9583144 , 0.95595992]), 'test_f1_weighted': array([0.67707226, 0.68641156, 0.64683677, 0.61929707, 0.65191537]), 'test_precision_weighted': array([0.73981103, 0.73814242, 0.7169479 , 0.76390258, 0.74220858]), 'test_recall_weighted': array([0.66047745, 0.67444946, 0.63279384, 0.57468825, 0.62722207])}


In [53]:
minor = MinorClassifiers(1, 0.25, 'average', 55)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([642.57872462, 644.89121032, 635.11523223, 640.97578073,
       639.1190567 ]), 'score_time': array([106.27422523, 105.53670454, 106.3424921 , 103.87617683,
       101.62964511]), 'test_accuracy': array([0.64801061, 0.65269302, 0.64712125, 0.59166888, 0.57813744]), 'test_neg_log_loss': array([-1.94026266, -1.86607968, -1.9500675 , -1.90346203, -1.90262572]), 'test_neg_mean_squared_error': array([-36.533687  , -35.42425046, -36.88272751, -47.01830724,
       -49.16821438]), 'test_roc_auc_ovr': array([0.93832816, 0.94292947, 0.93874092, 0.95833737, 0.95443233]), 'test_f1_weighted': array([0.66748531, 0.67124546, 0.66203296, 0.62901143, 0.61986535]), 'test_precision_weighted': array([0.72585274, 0.72944149, 0.7220945 , 0.75451119, 0.75872466]), 'test_recall_weighted': array([0.64801061, 0.65269302, 0.64712125, 0.59166888, 0.57813744])}


In [54]:
minor = MinorClassifiers(1, 0.25, 'average', 50)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([582.30747223, 580.84629846, 583.02487826, 584.32365823,
       583.20498991]), 'score_time': array([ 98.58690286, 100.34554267,  99.14629984,  96.76612782,
        98.49477625]), 'test_accuracy': array([0.60026525, 0.55054391, 0.59007694, 0.58503582, 0.6192624 ]), 'test_neg_log_loss': array([-2.07043357, -2.17885812, -2.13491448, -1.9662209 , -1.86354094]), 'test_neg_mean_squared_error': array([-45.36710875, -49.47731494, -45.78959936, -49.66038737,
       -41.85248076]), 'test_roc_auc_ovr': array([0.92124689, 0.92165401, 0.92310427, 0.95218121, 0.95136383]), 'test_f1_weighted': array([0.62847176, 0.58267646, 0.61421211, 0.6215638 , 0.64589991]), 'test_precision_weighted': array([0.71086743, 0.70827138, 0.705795  , 0.74311935, 0.72838391]), 'test_recall_weighted': array([0.60026525, 0.55054391, 0.59007694, 0.58503582, 0.6192624 ])}


In [69]:
minor = MinorClassifiers(1, 0.25, 'average', 48)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([555.75384307, 559.56569028, 559.6182642 , 558.76342058,
       559.1712389 ]), 'score_time': array([97.22147369, 96.8145082 , 96.0690465 , 94.39497876, 92.1265347 ]), 'test_accuracy': array([0.58302387, 0.60440435, 0.57946405, 0.49907137, 0.54656408]), 'test_neg_log_loss': array([-2.09836408, -2.02466279, -2.1420835 , -2.20278194, -2.09449683]), 'test_neg_mean_squared_error': array([-46.16604775, -44.03210401, -49.33271425, -63.2894667 ,
       -54.99363226]), 'test_roc_auc_ovr': array([0.92454674, 0.93497974, 0.91770921, 0.93153972, 0.93208022]), 'test_f1_weighted': array([0.61046096, 0.63091561, 0.60634022, 0.5351385 , 0.58115379]), 'test_precision_weighted': array([0.70852733, 0.72774097, 0.70339212, 0.7148566 , 0.72475351]), 'test_recall_weighted': array([0.58302387, 0.60440435, 0.57946405, 0.49907137, 0.54656408])}


In [55]:
minor = MinorClassifiers(1, 0.25, 'average', 47)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([550.9587326 , 547.16493917, 550.95220542, 547.30124092,
       545.87052321]), 'score_time': array([96.85340595, 95.28815508, 94.7938807 , 92.33134651, 92.91000605]), 'test_accuracy': array([0.60397878, 0.48421332, 0.6394269 , 0.56487132, 0.56274874]), 'test_neg_log_loss': array([-2.06884812, -2.23397127, -1.91563667, -2.00879462, -2.02722053]), 'test_neg_mean_squared_error': array([-42.50185676, -62.38630937, -38.40275935, -50.70522685,
       -52.59219952]), 'test_roc_auc_ovr': array([0.93296272, 0.91450992, 0.93820658, 0.95035271, 0.94210977]), 'test_f1_weighted': array([0.63066433, 0.52358982, 0.65748222, 0.60111785, 0.59853042]), 'test_precision_weighted': array([0.71709477, 0.72975527, 0.71503173, 0.73404158, 0.73852151]), 'test_recall_weighted': array([0.60397878, 0.48421332, 0.6394269 , 0.56487132, 0.56274874])}


In [56]:
minor = MinorClassifiers(1, 0.25, 'average', 45)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([526.69562817, 527.19686818, 524.12829518, 523.70032048,
       529.32794762]), 'score_time': array([92.87156844, 92.69515038, 91.84836292, 90.16693616, 89.5096252 ]), 'test_accuracy': array([0.52360743, 0.56911648, 0.59485275, 0.50358185, 0.5266649 ]), 'test_neg_log_loss': array([-2.26275732, -2.1444612 , -2.06042571, -2.15137214, -2.11434423]), 'test_neg_mean_squared_error': array([-55.98381963, -49.6120987 , -48.44812948, -64.60042452,
       -56.8089679 ]), 'test_roc_auc_ovr': array([0.899797  , 0.91961648, 0.91856502, 0.94143668, 0.93307747]), 'test_f1_weighted': array([0.55058211, 0.60658039, 0.62027175, 0.54913919, 0.57110084]), 'test_precision_weighted': array([0.70934968, 0.71635015, 0.7053856 , 0.73587352, 0.72878509]), 'test_recall_weighted': array([0.52360743, 0.56911648, 0.59485275, 0.50358185, 0.5266649 ])}


In [57]:
minor = MinorClassifiers(1, 0.25, 'average', 43)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([499.78054619, 503.19740653, 502.67313623, 506.04485321,
       506.23576498]), 'score_time': array([89.88369131, 88.80898714, 91.07708454, 86.74712849, 87.2606461 ]), 'test_accuracy': array([0.53262599, 0.5669939 , 0.54736004, 0.49880605, 0.60042452]), 'test_neg_log_loss': array([-2.25075629, -2.10284426, -2.220732  , -2.25508411, -2.01733956]), 'test_neg_mean_squared_error': array([-51.67161804, -50.83178562, -51.98248872, -63.26001592,
       -41.04006368]), 'test_roc_auc_ovr': array([0.90182782, 0.92384392, 0.90464234, 0.92132083, 0.94286294]), 'test_f1_weighted': array([0.56847601, 0.60774299, 0.57875177, 0.53514647, 0.62071452]), 'test_precision_weighted': array([0.69341573, 0.72549898, 0.68806057, 0.71719928, 0.70636989]), 'test_recall_weighted': array([0.53262599, 0.5669939 , 0.54736004, 0.49880605, 0.60042452])}


In [58]:
minor = MinorClassifiers(1, 0.25, 'average', 40)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([467.63118362, 470.62720966, 471.17309952, 469.85717249,
       470.55045462]), 'score_time': array([84.82701302, 86.05775023, 85.59438467, 83.19988012, 82.33481598]), 'test_accuracy': array([0.48169761, 0.46564075, 0.56778986, 0.56646325, 0.54231892]), 'test_neg_log_loss': array([-2.36131213, -2.31934225, -2.18454197, -2.07250712, -2.1825836 ]), 'test_neg_mean_squared_error': array([-61.26074271, -63.59750597, -50.85566463, -50.76731228,
       -51.13239586]), 'test_roc_auc_ovr': array([0.86997185, 0.89414572, 0.90831853, 0.93920295, 0.92192539]), 'test_f1_weighted': array([0.52791254, 0.50586987, 0.59828362, 0.60329109, 0.57134658]), 'test_precision_weighted': array([0.6859626 , 0.70630083, 0.69906918, 0.72331383, 0.68582545]), 'test_recall_weighted': array([0.48169761, 0.46564075, 0.56778986, 0.56646325, 0.54231892])}


In [59]:
minor = MinorClassifiers(1, 0.25, 'average', 30)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([357.1370914 , 352.34810019, 353.69484925, 356.97817588,
       356.283499  ]), 'score_time': array([68.62639594, 68.281394  , 69.30107832, 68.46598053, 66.4661963 ]), 'test_accuracy': array([0.45331565, 0.41973998, 0.44468029, 0.52693022, 0.45635447]), 'test_neg_log_loss': array([-2.44723106, -2.47852431, -2.46612485, -2.25295809, -2.39570416]), 'test_neg_mean_squared_error': array([-66.97559682, -69.13717166, -66.98885646, -51.78429292,
       -62.65853011]), 'test_roc_auc_ovr': array([0.85048003, 0.849924  , 0.8528582 , 0.91572659, 0.89471948]), 'test_f1_weighted': array([0.50434621, 0.47665296, 0.49328694, 0.55839729, 0.48772439]), 'test_precision_weighted': array([0.66786294, 0.65727294, 0.64977726, 0.67991793, 0.66225973]), 'test_recall_weighted': array([0.45331565, 0.41973998, 0.44468029, 0.52693022, 0.45635447])}


In [60]:
minor = MinorClassifiers(1, 0.25, 'average', 29)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([345.14209461, 343.29029322, 340.17843246, 342.49989343,
       347.44546533]), 'score_time': array([66.95394945, 69.25588274, 68.50326657, 71.49008751, 66.68162203]), 'test_accuracy': array([0.48647215, 0.42425046, 0.4531706 , 0.50835765, 0.48819315]), 'test_neg_log_loss': array([-2.34398554, -2.46811871, -2.43864547, -2.31379033, -2.36789936]), 'test_neg_mean_squared_error': array([-60.16286472, -68.72433006, -64.89254444, -59.65375431,
       -57.27248607]), 'test_roc_auc_ovr': array([0.87060262, 0.86120753, 0.84822974, 0.9020852 , 0.89800368]), 'test_f1_weighted': array([0.53330292, 0.47520037, 0.49856031, 0.53818752, 0.51766989]), 'test_precision_weighted': array([0.67750834, 0.66913916, 0.64593037, 0.67946644, 0.66338337]), 'test_recall_weighted': array([0.48647215, 0.42425046, 0.4531706 , 0.50835765, 0.48819315])}


In [61]:
minor = MinorClassifiers(1, 0.25, 'average', 28)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([335.63943529, 332.738415  , 339.48606753, 333.25363588,
       333.44349122]), 'score_time': array([66.02936602, 65.90219331, 66.77987671, 65.8019774 , 64.49197388]), 'test_accuracy': array([0.44854111, 0.37171664, 0.41602547, 0.46404882, 0.43114885]), 'test_neg_log_loss': array([-2.450547  , -2.50481542, -2.48972129, -2.42505484, -2.41978305]), 'test_neg_mean_squared_error': array([-63.50503979, -76.17829663, -70.23268772, -63.31122314,
       -69.75457681]), 'test_roc_auc_ovr': array([0.86284376, 0.84860148, 0.84037468, 0.89849713, 0.88779606]), 'test_f1_weighted': array([0.48742804, 0.41699899, 0.46267296, 0.49553046, 0.4760672 ]), 'test_precision_weighted': array([0.65158164, 0.66446863, 0.66066056, 0.68075653, 0.6807329 ]), 'test_recall_weighted': array([0.44854111, 0.37171664, 0.41602547, 0.46404882, 0.43114885])}


In [62]:
minor = MinorClassifiers(1, 0.25, 'average', 25)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([297.49617386, 301.848176  , 301.38595152, 299.97268772,
       304.39932299]), 'score_time': array([62.00866652, 60.79987717, 59.43598175, 58.71092343, 59.69434333]), 'test_accuracy': array([0.44350133, 0.42531175, 0.42769966, 0.51578668, 0.37171664]), 'test_neg_log_loss': array([-2.4813801 , -2.50691908, -2.50924677, -2.39655483, -2.54613854]), 'test_neg_mean_squared_error': array([-64.86790451, -65.09180154, -68.11912974, -52.01008225,
       -68.30326347]), 'test_roc_auc_ovr': array([0.84878775, 0.84942283, 0.84115966, 0.90100161, 0.87016459]), 'test_f1_weighted': array([0.49106448, 0.47843171, 0.47476377, 0.54008773, 0.40421861]), 'test_precision_weighted': array([0.6495497 , 0.63673896, 0.64147212, 0.6590484 , 0.64369437]), 'test_recall_weighted': array([0.44350133, 0.42531175, 0.42769966, 0.51578668, 0.37171664])}


In [63]:
minor = MinorClassifiers(1, 0.25, 'average', 20)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([243.47384119, 243.0263443 , 243.94656134, 244.46072268,
       244.72990632]), 'score_time': array([51.26190662, 52.10260653, 51.88672137, 49.54844069, 49.72197413]), 'test_accuracy': array([0.38992042, 0.33244893, 0.38524808, 0.36216503, 0.43512868]), 'test_neg_log_loss': array([-2.5943461 , -2.6592485 , -2.60081309, -2.63472731, -2.55735579]), 'test_neg_mean_squared_error': array([-70.61167109, -78.92119926, -74.45874237, -76.241974  ,
       -59.20350226]), 'test_roc_auc_ovr': array([0.81933477, 0.80970277, 0.81069138, 0.84146263, 0.84987894]), 'test_f1_weighted': array([0.43628498, 0.38432592, 0.43790579, 0.38698576, 0.46600625]), 'test_precision_weighted': array([0.60445463, 0.61266317, 0.6362021 , 0.61546603, 0.62534279]), 'test_recall_weighted': array([0.38992042, 0.33244893, 0.38524808, 0.36216503, 0.43512868])}


In [64]:
minor = MinorClassifiers(1, 0.25, 'average', 15)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([185.07555819, 187.45811868, 185.26408243, 185.14032316,
       186.42736697]), 'score_time': array([43.36550331, 41.6615746 , 43.59050202, 40.94208789, 38.69251704]), 'test_accuracy': array([0.31591512, 0.31069249, 0.314407  , 0.40010613, 0.43751658]), 'test_neg_log_loss': array([-2.74733816, -2.73776724, -2.72266074, -2.6163938 , -2.56880796]), 'test_neg_mean_squared_error': array([-76.70106101, -81.03024675, -83.51154152, -64.2515256 ,
       -57.21544176]), 'test_roc_auc_ovr': array([0.77556153, 0.7770913 , 0.76726935, 0.84009568, 0.84048139]), 'test_f1_weighted': array([0.35736007, 0.36524782, 0.36613795, 0.41922828, 0.4656994 ]), 'test_precision_weighted': array([0.58173501, 0.58936945, 0.60335859, 0.58135693, 0.59914015]), 'test_recall_weighted': array([0.31591512, 0.31069249, 0.314407  , 0.40010613, 0.43751658])}


In [65]:
minor = MinorClassifiers(1, 0.25, 'average', 13)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([164.39329934, 162.27841687, 167.68766785, 165.13241696,
       167.21511054]), 'score_time': array([38.00784135, 37.08639002, 38.05272174, 36.68836689, 34.82594943]), 'test_accuracy': array([0.33129973, 0.29211993, 0.29105864, 0.30405943, 0.37543115]), 'test_neg_log_loss': array([-2.73221991, -2.76460477, -2.81760962, -2.77883778, -2.68310712]), 'test_neg_mean_squared_error': array([-78.37453581, -81.47068188, -82.17723534, -72.67100027,
       -65.25364818]), 'test_roc_auc_ovr': array([0.77903982, 0.7545565 , 0.73509122, 0.78493461, 0.81122771]), 'test_f1_weighted': array([0.37550804, 0.33968268, 0.3307604 , 0.332058  , 0.40148497]), 'test_precision_weighted': array([0.57764553, 0.58462264, 0.57491473, 0.52942048, 0.59195944]), 'test_recall_weighted': array([0.33129973, 0.29211993, 0.29105864, 0.30405943, 0.37543115])}


In [66]:
minor = MinorClassifiers(1, 0.25, 'average', 12)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([150.35279417, 151.83482432, 149.34844661, 149.7166121 ,
       152.18495393]), 'score_time': array([35.51947069, 34.93705416, 34.0464077 , 35.14549899, 33.21118879]), 'test_accuracy': array([0.30344828, 0.29822234, 0.30087556, 0.36747148, 0.3605731 ]), 'test_neg_log_loss': array([-2.78598069, -2.78960198, -2.75993999, -2.69237641, -2.72511052]), 'test_neg_mean_squared_error': array([-78.03687003, -81.98328469, -83.24542319, -67.67365349,
       -67.0047758 ]), 'test_roc_auc_ovr': array([0.75477398, 0.7509361 , 0.75598866, 0.81358275, 0.79356513]), 'test_f1_weighted': array([0.34681615, 0.35110219, 0.34856223, 0.40456793, 0.38128736]), 'test_precision_weighted': array([0.58502696, 0.5769988 , 0.62048477, 0.59624052, 0.56102763]), 'test_recall_weighted': array([0.30344828, 0.29822234, 0.30087556, 0.36747148, 0.3605731 ])}


In [67]:
minor = MinorClassifiers(1, 0.25, 'average', 11)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([140.42635131, 138.60924411, 137.73853636, 138.64474726,
       139.11177826]), 'score_time': array([33.4910109 , 32.54544926, 34.12728167, 33.00320411, 30.95770407]), 'test_accuracy': array([0.30238727, 0.28760945, 0.29397718, 0.30936588, 0.34730698]), 'test_neg_log_loss': array([-2.79692628, -2.7691721 , -2.80372125, -2.77421529, -2.73233275]), 'test_neg_mean_squared_error': array([-79.86870027, -84.89227912, -85.22817724, -73.61740515,
       -69.54789069]), 'test_roc_auc_ovr': array([0.74643216, 0.77471652, 0.74252966, 0.77551786, 0.79049409]), 'test_f1_weighted': array([0.34448603, 0.33967723, 0.34292818, 0.34572867, 0.36935052]), 'test_precision_weighted': array([0.56801713, 0.56785464, 0.6147552 , 0.55133303, 0.5666707 ]), 'test_recall_weighted': array([0.30238727, 0.28760945, 0.29397718, 0.30936588, 0.34730698])}


In [68]:
minor = MinorClassifiers(1, 0.25, 'average', 10)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([127.57225633, 129.75733089, 128.02307773, 130.41009378,
       129.57045221]), 'score_time': array([32.1061151 , 31.16666079, 32.06870055, 30.89287639, 27.64254808]), 'test_accuracy': array([0.30981432, 0.29716105, 0.274078  , 0.37861502, 0.33934731]), 'test_neg_log_loss': array([-2.76557566, -2.78490221, -2.82739577, -2.71475573, -2.7499931 ]), 'test_neg_mean_squared_error': array([-82.233687  , -81.71345184, -86.59007694, -64.43672062,
       -63.74475988]), 'test_roc_auc_ovr': array([0.75499207, 0.75647938, 0.72460849, 0.80028519, 0.79218731]), 'test_f1_weighted': array([0.34722277, 0.34607803, 0.31359362, 0.40641672, 0.3662373 ]), 'test_precision_weighted': array([0.55684378, 0.55579467, 0.54624522, 0.56233988, 0.56222084]), 'test_recall_weighted': array([0.30981432, 0.29716105, 0.274078  , 0.37861502, 0.33934731])}


In [31]:
minor = MinorClassifiers(1, 0.25, 'average', 7)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.25
{'fit_time': array([93.4171679 , 94.19812822, 94.21342778, 94.64838552, 94.93701982]), 'score_time': array([23.02239776, 22.53022766, 22.81746578, 22.51076388, 21.06616497]), 'test_accuracy': array([0.25437666, 0.27248607, 0.22340143, 0.35526665, 0.31732555]), 'test_neg_log_loss': array([-2.87377157, -2.85869649, -2.92257668, -2.80895912, -2.80733294]), 'test_neg_mean_squared_error': array([-82.2535809 , -86.04643141, -85.83682674, -59.73175909,
       -64.70522685]), 'test_roc_auc_ovr': array([0.71269201, 0.72177258, 0.67445088, 0.75987369, 0.76134147]), 'test_f1_weighted': array([0.27197697, 0.30821392, 0.23568709, 0.37066567, 0.3327849 ]), 'test_precision_weighted': array([0.46035447, 0.51500363, 0.49715844, 0.53948314, 0.48334562]), 'test_recall_weighted': array([0.25437666, 0.27248607, 0.22340143, 0.35526665, 0.31732555])}


In [None]:
#1 samples, 0.5 features:

In [14]:
minor = MinorClassifiers(1, 0.5, 'average', 60)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([1144.51913548, 1269.81832838, 1263.22022533, 1256.05737472,
       1240.44988084]), 'score_time': array([211.19423342, 209.85910654, 207.89694619, 200.09684467,
       199.42312169]), 'test_accuracy': array([0.75092838, 0.74529053, 0.77394534, 0.73520828, 0.67391881]), 'test_neg_log_loss': array([-1.38366357, -1.439146  , -1.34668502, -1.21918287, -1.37010345]), 'test_neg_mean_squared_error': array([-21.66551724, -22.20535951, -20.03183868, -22.8920138 ,
       -31.20986999]), 'test_roc_auc_ovr': array([0.9749813 , 0.97200358, 0.97707467, 0.9804277 , 0.97542686]), 'test_f1_weighted': array([0.76009923, 0.7503496 , 0.77723532, 0.74870659, 0.70294983]), 'test_precision_weighted': array([0.79059651, 0.78035193, 0.8017245 , 0.80192005, 0.79723029]), 'test_recall_weighted': array([0.75092838, 0.74529053, 0.77394534, 0.73520828, 0.67391881])}


In [15]:
minor = MinorClassifiers(1, 0.5, 'average', 50)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([1044.2687211 , 1040.26515889, 1041.57415724, 1042.31623793,
       1041.89934921]), 'score_time': array([183.3406446 , 181.64133978, 183.2573452 , 179.08049512,
       178.42865086]), 'test_accuracy': array([0.73421751, 0.72459538, 0.73175909, 0.65826479, 0.62616079]), 'test_neg_log_loss': array([-1.48145783, -1.55807806, -1.41340533, -1.50090212, -1.55875674]), 'test_neg_mean_squared_error': array([-22.6928382 , -25.35155214, -20.62032369, -32.97903953,
       -38.37145131]), 'test_roc_auc_ovr': array([0.97040658, 0.96526174, 0.97271853, 0.97107566, 0.96790718]), 'test_f1_weighted': array([0.74109874, 0.73185341, 0.73192907, 0.68531159, 0.65948155]), 'test_precision_weighted': array([0.77103594, 0.76543908, 0.7556795 , 0.77537577, 0.77205569]), 'test_recall_weighted': array([0.73421751, 0.72459538, 0.73175909, 0.65826479, 0.62616079])}


In [16]:
minor = MinorClassifiers(1, 0.5, 'average', 40)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([838.02691913, 837.94887495, 837.80009604, 838.25180888,
       850.23423314]), 'score_time': array([155.27617145, 155.37984633, 154.62994385, 153.26897717,
       152.96733308]), 'test_accuracy': array([0.64748011, 0.65083577, 0.66038737, 0.63995755, 0.62324224]), 'test_neg_log_loss': array([-1.77503673, -1.72117414, -1.72888205, -1.62944843, -1.64356928]), 'test_neg_mean_squared_error': array([-38.52572944, -35.80976386, -33.83205094, -38.2265853 ,
       -38.92438313]), 'test_roc_auc_ovr': array([0.94415328, 0.9509358 , 0.95207146, 0.96380261, 0.96234787]), 'test_f1_weighted': array([0.66870964, 0.6734906 , 0.67287001, 0.66943923, 0.65945725]), 'test_precision_weighted': array([0.73466962, 0.74101705, 0.72250256, 0.75152081, 0.75586274]), 'test_recall_weighted': array([0.64748011, 0.65083577, 0.66038737, 0.63995755, 0.62324224])}


In [17]:
minor = MinorClassifiers(1, 0.5, 'average', 30)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([633.9863584 , 634.42473125, 638.08588886, 634.94767046,
       637.3531487 ]), 'score_time': array([125.26790667, 125.49726915, 125.53473306, 122.38419485,
       123.00062919]), 'test_accuracy': array([0.59655172, 0.59246484, 0.58848501, 0.5492173 , 0.59379146]), 'test_neg_log_loss': array([-1.90122488, -1.91148248, -1.95839295, -1.93394028, -1.8043954 ]), 'test_neg_mean_squared_error': array([-44.48938992, -43.90846378, -48.4056779 , -52.99177501,
       -40.51764394]), 'test_roc_auc_ovr': array([0.92505652, 0.93132333, 0.91308393, 0.94424849, 0.94622469]), 'test_f1_weighted': array([0.62327918, 0.6298884 , 0.62105363, 0.59637197, 0.62669502]), 'test_precision_weighted': array([0.69840662, 0.72228934, 0.70891509, 0.73124812, 0.72065215]), 'test_recall_weighted': array([0.59655172, 0.59246484, 0.58848501, 0.5492173 , 0.59379146])}


In [24]:
minor = MinorClassifiers(1, 0.5, 'average', 26)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([568.19140482, 586.37390924, 543.64614224, 547.91092396,
       555.43582177]), 'score_time': array([119.62154579, 111.0768137 , 108.85527968, 109.19729185,
       106.73456955]), 'test_accuracy': array([0.56233422, 0.53091006, 0.53011409, 0.57203502, 0.46670204]), 'test_neg_log_loss': array([-2.01439551, -2.10124608, -2.11982117, -1.94219811, -2.13217459]), 'test_neg_mean_squared_error': array([-47.75278515, -53.95622181, -54.98142743, -45.92730167,
       -63.83788803]), 'test_roc_auc_ovr': array([0.91404782, 0.9061534 , 0.89225259, 0.94186771, 0.91659064]), 'test_f1_weighted': array([0.59710848, 0.57174781, 0.55837735, 0.60626789, 0.52083042]), 'test_precision_weighted': array([0.68701621, 0.69149985, 0.6638014 , 0.71241671, 0.70157132]), 'test_recall_weighted': array([0.56233422, 0.53091006, 0.53011409, 0.57203502, 0.46670204])}


In [23]:
minor = MinorClassifiers(1, 0.5, 'average', 25)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([523.83767247, 530.06394958, 527.52759123, 526.5650537 ,
       526.38066554]), 'score_time': array([107.04924083, 108.21573639, 107.67451191, 103.65020537,
       103.35913873]), 'test_accuracy': array([0.51167109, 0.51552136, 0.52878748, 0.50596975, 0.4863359 ]), 'test_neg_log_loss': array([-2.17556276, -2.10661232, -2.0813807 , -2.08934367, -2.14936271]), 'test_neg_mean_squared_error': array([-55.26339523, -55.70469621, -55.44945609, -59.34624569,
       -62.9501194 ]), 'test_roc_auc_ovr': array([0.88709836, 0.90270909, 0.90200267, 0.92932802, 0.91428256]), 'test_f1_weighted': array([0.54981647, 0.56483924, 0.5709089 , 0.55487018, 0.53365735]), 'test_precision_weighted': array([0.66403531, 0.6871574 , 0.68150203, 0.7082915 , 0.69579219]), 'test_recall_weighted': array([0.51167109, 0.51552136, 0.52878748, 0.50596975, 0.4863359 ])}


In [18]:
minor = MinorClassifiers(1, 0.5, 'average', 20)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([430.71590805, 432.09831929, 427.86892366, 433.28405523,
       427.37574673]), 'score_time': array([91.99243164, 91.34861422, 90.26054716, 88.93709803, 87.05512595]), 'test_accuracy': array([0.45437666, 0.45290528, 0.46882462, 0.42398514, 0.47625365]), 'test_neg_log_loss': array([-2.36626467, -2.31275286, -2.28573567, -2.30626271, -2.22556458]), 'test_neg_mean_squared_error': array([-63.24217507, -65.47545768, -64.63597771, -65.15521358,
       -59.68187848]), 'test_roc_auc_ovr': array([0.85233154, 0.87049967, 0.86608022, 0.89674724, 0.90152849]), 'test_f1_weighted': array([0.49946235, 0.51105574, 0.51777339, 0.4645238 , 0.52226306]), 'test_precision_weighted': array([0.63726175, 0.67427574, 0.65727094, 0.65099412, 0.673387  ]), 'test_recall_weighted': array([0.45437666, 0.45290528, 0.46882462, 0.42398514, 0.47625365])}


In [34]:
minor = MinorClassifiers(1, 0.5, 'average', 16)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([344.37322378, 341.1944437 , 342.74176288, 337.52514148,
       342.78057027]), 'score_time': array([73.14746165, 73.53495121, 73.35715365, 70.51329708, 70.69205379]), 'test_accuracy': array([0.41458886, 0.38604404, 0.40116742, 0.45529318, 0.45768108]), 'test_neg_log_loss': array([-2.46042492, -2.47081169, -2.48364511, -2.32623214, -2.32940396]), 'test_neg_mean_squared_error': array([-69.21564987, -76.18280711, -73.80657999, -62.93844521,
       -58.99124436]), 'test_roc_auc_ovr': array([0.83219034, 0.837032  , 0.82542127, 0.89066812, 0.8854798 ]), 'test_f1_weighted': array([0.46773261, 0.44584477, 0.45399347, 0.49362314, 0.49256735]), 'test_precision_weighted': array([0.63433521, 0.63957447, 0.62738081, 0.65653321, 0.63158795]), 'test_recall_weighted': array([0.41458886, 0.38604404, 0.40116742, 0.45529318, 0.45768108])}


In [19]:
minor = MinorClassifiers(1, 0.5, 'average', 15)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([326.70945597, 328.47471452, 326.34596229, 326.71032429,
       327.04601884]), 'score_time': array([70.79618835, 71.21680522, 71.14517403, 69.11581302, 65.8418386 ]), 'test_accuracy': array([0.3867374 , 0.38710533, 0.40196339, 0.49217299, 0.47784558]), 'test_neg_log_loss': array([-2.53810166, -2.47862545, -2.45818872, -2.34341286, -2.30864039]), 'test_neg_mean_squared_error': array([-75.49363395, -75.53409392, -72.07031043, -55.28601751,
       -54.17458212]), 'test_roc_auc_ovr': array([0.81146765, 0.83137794, 0.83139789, 0.88372071, 0.88146139]), 'test_f1_weighted': array([0.43869523, 0.44499755, 0.45288991, 0.525556  , 0.51345627]), 'test_precision_weighted': array([0.62449705, 0.62911972, 0.62526292, 0.65123637, 0.62964143]), 'test_recall_weighted': array([0.3867374 , 0.38710533, 0.40196339, 0.49217299, 0.47784558])}


In [20]:
minor = MinorClassifiers(1, 0.5, 'average', 10)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([226.11112976, 225.8259151 , 224.49206901, 224.9707346 ,
       225.79276156]), 'score_time': array([51.68074703, 52.54196978, 53.29293346, 50.91287112, 48.63521433]), 'test_accuracy': array([0.3066313 , 0.30167153, 0.32926506, 0.40275935, 0.37410454]), 'test_neg_log_loss': array([-2.73903816, -2.7107215 , -2.67694941, -2.55218217, -2.59221587]), 'test_neg_mean_squared_error': array([-81.8198939 , -84.2846909 , -83.62669143, -68.62695675,
       -71.05969753]), 'test_roc_auc_ovr': array([0.74987175, 0.76915836, 0.76872375, 0.83451344, 0.8181311 ]), 'test_f1_weighted': array([0.35381774, 0.3581975 , 0.37944302, 0.44759478, 0.40318974]), 'test_precision_weighted': array([0.56969301, 0.57485565, 0.58398694, 0.61060675, 0.58599073]), 'test_recall_weighted': array([0.3066313 , 0.30167153, 0.32926506, 0.40275935, 0.37410454])}


In [35]:
minor = MinorClassifiers(1, 0.5, 'average', 8)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([179.82750559, 179.64545727, 183.09334469, 181.48761129,
       179.73437428]), 'score_time': array([42.30403495, 43.29808927, 42.61553288, 40.9690814 , 39.6031239 ]), 'test_accuracy': array([0.30450928, 0.29291589, 0.26956752, 0.40063677, 0.38869727]), 'test_neg_log_loss': array([-2.75712741, -2.75287657, -2.78510721, -2.59472639, -2.62628442]), 'test_neg_mean_squared_error': array([-80.79920424, -84.22101353, -86.82196869, -60.27858849,
       -64.47598833]), 'test_roc_auc_ovr': array([0.74813251, 0.75229819, 0.74027378, 0.8242897 , 0.81024702]), 'test_f1_weighted': array([0.33371906, 0.33715083, 0.31239475, 0.43189899, 0.41319022]), 'test_precision_weighted': array([0.53747322, 0.54262175, 0.55384802, 0.57525002, 0.56775877]), 'test_recall_weighted': array([0.30450928, 0.29291589, 0.26956752, 0.40063677, 0.38869727])}


In [22]:
minor = MinorClassifiers(1, 0.5, 'average', 7)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([144.9206686 , 166.44730639, 166.02456474, 165.11131692,
       163.69526696]), 'score_time': array([39.73367906, 39.67960811, 41.04520416, 38.35888076, 37.21409965]), 'test_accuracy': array([0.29469496, 0.26956752, 0.26956752, 0.35791987, 0.36481825]), 'test_neg_log_loss': array([-2.77523933, -2.77462647, -2.84233997, -2.69538992, -2.66544632]), 'test_neg_mean_squared_error': array([-83.50026525, -87.75935261, -86.83841868, -64.85062351,
       -68.06155479]), 'test_roc_auc_ovr': array([0.7398455 , 0.74526962, 0.70472902, 0.79057637, 0.79033756]), 'test_f1_weighted': array([0.32864982, 0.31694594, 0.29976518, 0.38750127, 0.38391606]), 'test_precision_weighted': array([0.54722527, 0.52857479, 0.52732321, 0.55805479, 0.52695154]), 'test_recall_weighted': array([0.29469496, 0.26956752, 0.26956752, 0.35791987, 0.36481825])}


In [21]:
minor = MinorClassifiers(1, 0.5, 'average', 5)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.5
{'fit_time': array([124.74233699, 125.53978062, 125.0589819 , 124.47112346,
       125.220541  ]), 'score_time': array([31.10384727, 30.41570377, 30.91230941, 30.88739133, 28.72564602]), 'test_accuracy': array([0.24217507, 0.24038206, 0.23374901, 0.31732555, 0.29822234]), 'test_neg_log_loss': array([-2.84723597, -2.84477822, -2.88289271, -2.75462895, -2.80419528]), 'test_neg_mean_squared_error': array([-78.58275862, -89.99548952, -90.70496153, -68.02042982,
       -73.32050942]), 'test_roc_auc_ovr': array([0.71107852, 0.71907754, 0.69418005, 0.75087103, 0.7304167 ]), 'test_f1_weighted': array([0.27783853, 0.27036311, 0.25608045, 0.34873363, 0.3141416 ]), 'test_precision_weighted': array([0.50027735, 0.50000302, 0.49370853, 0.53404573, 0.48422591]), 'test_recall_weighted': array([0.24217507, 0.24038206, 0.23374901, 0.31732555, 0.29822234])}


In [12]:
#1 samples, 0.75 features:

In [13]:
minor = MinorClassifiers(1, 0.75, 'average', 90)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)





Samples: 1, features: 0.75
{'fit_time': array([2485.14245701, 2758.41878104, 2711.00652432, 2683.37046051,
       2689.85736632]), 'score_time': array([394.5682354 , 382.81458092, 375.632231  , 372.23697329,
       368.3742907 ]), 'test_accuracy': array([0.83209549, 0.82647917, 0.8161316 , 0.77500663, 0.77155744]), 'test_neg_log_loss': array([-0.86158139, -0.86407565, -0.85664187, -0.8522871 , -0.90533991]), 'test_neg_mean_squared_error': array([-10.41432361, -10.94560891, -10.87980897, -17.62058902,
       -16.99310162]), 'test_roc_auc_ovr': array([0.98883854, 0.9885634 , 0.98815734, 0.98549664, 0.98582243]), 'test_f1_weighted': array([0.83422692, 0.82658692, 0.81608649, 0.78283122, 0.78042286]), 'test_precision_weighted': array([0.84998284, 0.84351768, 0.83656139, 0.82085289, 0.82533265]), 'test_recall_weighted': array([0.83209549, 0.82647917, 0.8161316 , 0.77500663, 0.77155744])}


In [14]:
minor = MinorClassifiers(1, 0.75, 'average', 60)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)





Samples: 1, features: 0.75
{'fit_time': array([1798.53538442, 1795.19978571, 1792.9759295 , 1775.52126813,
       1770.79345608]), 'score_time': array([304.51403308, 302.0528996 , 303.0304215 , 292.75853252,
       292.95642781]), 'test_accuracy': array([0.77559682, 0.77925179, 0.7959671 , 0.71849297, 0.72433006]), 'test_neg_log_loss': array([-1.17758077, -1.14233697, -1.1032391 , -1.10648095, -1.11728439]), 'test_neg_mean_squared_error': array([-17.29124668, -16.16954099, -13.27009817, -23.20960467,
       -22.81958079]), 'test_roc_auc_ovr': array([0.98157503, 0.98105266, 0.9824414 , 0.98008731, 0.9801664 ]), 'test_f1_weighted': array([0.78280657, 0.78250286, 0.79707945, 0.73499423, 0.74104467]), 'test_precision_weighted': array([0.81143356, 0.803716  , 0.81320486, 0.79746926, 0.80725358]), 'test_recall_weighted': array([0.77559682, 0.77925179, 0.7959671 , 0.71849297, 0.72433006])}


In [20]:
minor = MinorClassifiers(1, 0.75, 'average', 50)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.75
{'fit_time': array([1486.54743075, 1495.73224521, 1462.87799549, 1450.93219233,
       1448.83346605]), 'score_time': array([270.70446253, 265.74816632, 259.24521017, 253.44904351,
       251.96427417]), 'test_accuracy': array([0.76366048, 0.75908729, 0.76253648, 0.70416556, 0.69010348]), 'test_neg_log_loss': array([-1.25750628, -1.27983697, -1.27009179, -1.24468553, -1.27804606]), 'test_neg_mean_squared_error': array([-17.31803714, -18.13982489, -17.96258955, -25.83788803,
       -28.10188379]), 'test_roc_auc_ovr': array([0.97631771, 0.97516494, 0.97572528, 0.97713241, 0.97558291]), 'test_f1_weighted': array([0.76706233, 0.76204539, 0.76572782, 0.72114488, 0.71397396]), 'test_precision_weighted': array([0.78430217, 0.78166589, 0.78380924, 0.78609726, 0.79884003]), 'test_recall_weighted': array([0.76366048, 0.75908729, 0.76253648, 0.70416556, 0.69010348])}


In [21]:
minor = MinorClassifiers(1, 0.75, 'average', 30)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.75
{'fit_time': array([885.26541567, 889.15791965, 884.92542577, 889.24983311,
       880.40921855]), 'score_time': array([178.69811726, 177.98836684, 178.55145884, 171.64732289,
       173.1729157 ]), 'test_accuracy': array([0.6403183 , 0.61581321, 0.62695675, 0.59538339, 0.62191563]), 'test_neg_log_loss': array([-1.69700025, -1.74256022, -1.72107563, -1.69499423, -1.61304476]), 'test_neg_mean_squared_error': array([-36.80901857, -40.05757495, -39.10002653, -41.71106394,
       -36.23985142]), 'test_roc_auc_ovr': array([0.93922891, 0.93801613, 0.93805882, 0.95460766, 0.95775476]), 'test_f1_weighted': array([0.65798986, 0.64189717, 0.64471948, 0.63211537, 0.65030909]), 'test_precision_weighted': array([0.71028783, 0.71324241, 0.70364226, 0.73442613, 0.73210802]), 'test_recall_weighted': array([0.6403183 , 0.61581321, 0.62695675, 0.59538339, 0.62191563])}


In [22]:
minor = MinorClassifiers(1, 0.75, 'average', 20)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.75
{'fit_time': array([605.34411168, 601.07349539, 603.72115779, 606.40535784,
       601.88770676]), 'score_time': array([129.9835999 , 128.09197903, 128.76151109, 123.91972136,
       121.82814431]), 'test_accuracy': array([0.5071618 , 0.47121252, 0.49907137, 0.51339878, 0.50782701]), 'test_neg_log_loss': array([-2.10747964, -2.1732108 , -2.1535978 , -2.03975202, -2.01506029]), 'test_neg_mean_squared_error': array([-55.82281167, -61.1992571 , -58.80790661, -52.22791191,
       -52.93844521]), 'test_roc_auc_ovr': array([0.88685735, 0.87970082, 0.87715391, 0.92408112, 0.92029032]), 'test_f1_weighted': array([0.54406733, 0.51699539, 0.53567016, 0.55393252, 0.54884971]), 'test_precision_weighted': array([0.65019913, 0.65703101, 0.6596353 , 0.68796526, 0.67665781]), 'test_recall_weighted': array([0.5071618 , 0.47121252, 0.49907137, 0.51339878, 0.50782701])}


In [30]:
minor = MinorClassifiers(1, 0.75, 'average', 18)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.75
{'fit_time': array([543.2848134 , 542.75682473, 545.0330205 , 543.90491152,
       544.73181605]), 'score_time': array([116.60086894, 114.68820333, 116.33433199, 115.2073648 ,
       109.53170824]), 'test_accuracy': array([0.47427056, 0.4460069 , 0.46166092, 0.49986734, 0.46908994]), 'test_neg_log_loss': array([-2.1958687 , -2.24638702, -2.24491859, -2.09166133, -2.16360361]), 'test_neg_mean_squared_error': array([-60.82281167, -66.05253383, -62.43565933, -55.53356328,
       -60.0628814 ]), 'test_roc_auc_ovr': array([0.87283345, 0.86794023, 0.86200725, 0.91351472, 0.90002562]), 'test_f1_weighted': array([0.52055165, 0.49776334, 0.50178686, 0.54129777, 0.51213135]), 'test_precision_weighted': array([0.65198465, 0.64723404, 0.64109106, 0.67440219, 0.65932564]), 'test_recall_weighted': array([0.47427056, 0.4460069 , 0.46166092, 0.49986734, 0.46908994])}


In [27]:
minor = MinorClassifiers(1, 0.75, 'average', 17)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.75
{'fit_time': array([520.56329083, 516.32255888, 512.81572843, 515.9530642 ,
       512.67361331]), 'score_time': array([111.91073942, 110.92907143, 111.13916683, 106.63560247,
       105.16092849]), 'test_accuracy': array([0.45437666, 0.42531175, 0.45104802, 0.47253914, 0.45608915]), 'test_neg_log_loss': array([-2.2771376 , -2.31048711, -2.29229084, -2.18134571, -2.20043312]), 'test_neg_mean_squared_error': array([-63.84403183, -68.31122314, -65.51737862, -59.57123906,
       -61.12841603]), 'test_roc_auc_ovr': array([0.85833698, 0.85531957, 0.85682158, 0.90094179, 0.89186044]), 'test_f1_weighted': array([0.49636403, 0.4826579 , 0.49590818, 0.51610359, 0.50086413]), 'test_precision_weighted': array([0.62881118, 0.65241621, 0.63746425, 0.66810244, 0.65034033]), 'test_recall_weighted': array([0.45437666, 0.42531175, 0.45104802, 0.47253914, 0.45608915])}


In [25]:
minor = MinorClassifiers(1, 0.75, 'average', 15)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.75
{'fit_time': array([458.18233275, 458.94922185, 455.71453381, 458.29120445,
       456.0075171 ]), 'score_time': array([101.73886085,  99.98978758,  99.31031156,  97.96914768,
        94.51711583]), 'test_accuracy': array([0.42546419, 0.40169806, 0.41390289, 0.45900769, 0.43141417]), 'test_neg_log_loss': array([-2.33640001, -2.38923984, -2.38524626, -2.26303343, -2.30677453]), 'test_neg_mean_squared_error': array([-68.5204244 , -70.16715309, -70.62589546, -61.31042717,
       -63.58105598]), 'test_roc_auc_ovr': array([0.84773667, 0.84433887, 0.83743299, 0.88821421, 0.87751997]), 'test_f1_weighted': array([0.47264245, 0.45858651, 0.46193671, 0.5006781 , 0.4730937 ]), 'test_precision_weighted': array([0.62460327, 0.63162272, 0.62337007, 0.65809789, 0.63443998]), 'test_recall_weighted': array([0.42546419, 0.40169806, 0.41390289, 0.45900769, 0.43141417])}


In [29]:
minor = MinorClassifiers(1, 0.75, 'average', 11)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.75
{'fit_time': array([340.96674538, 343.14382362, 342.93684602, 344.92226934,
       346.89476562]), 'score_time': array([77.52025342, 76.50937796, 81.18926787, 75.18470097, 70.77974772]), 'test_accuracy': array([0.3535809 , 0.3451844 , 0.33749005, 0.42478111, 0.4258424 ]), 'test_neg_log_loss': array([-2.53953884, -2.54133517, -2.59508552, -2.42667829, -2.40501375]), 'test_neg_mean_squared_error': array([-76.03554377, -79.36720616, -79.54629875, -64.91430088,
       -61.50941894]), 'test_roc_auc_ovr': array([0.80265122, 0.80967978, 0.78887726, 0.85483641, 0.85509092]), 'test_f1_weighted': array([0.40063736, 0.40347097, 0.3874721 , 0.46607858, 0.46576514]), 'test_precision_weighted': array([0.58141617, 0.58810097, 0.5817773 , 0.62356358, 0.61107113]), 'test_recall_weighted': array([0.3535809 , 0.3451844 , 0.33749005, 0.42478111, 0.4258424 ])}


In [23]:
minor = MinorClassifiers(1, 0.75, 'average', 10)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.75
{'fit_time': array([313.81512237, 315.01926184, 315.6699419 , 316.18299389,
       313.71451807]), 'score_time': array([71.72010326, 71.46488166, 72.00461149, 68.82487416, 65.875911  ]), 'test_accuracy': array([0.33156499, 0.33351021, 0.32475458, 0.4234545 , 0.39877952]), 'test_neg_log_loss': array([-2.60271595, -2.58509251, -2.64581455, -2.43916448, -2.45705621]), 'test_neg_mean_squared_error': array([-80.46525199, -81.33297957, -83.05226851, -63.88511542,
       -65.80366145]), 'test_roc_auc_ovr': array([0.78987905, 0.79998341, 0.77094091, 0.85266154, 0.83826409]), 'test_f1_weighted': array([0.3775018 , 0.39111919, 0.37290265, 0.46704879, 0.43357864]), 'test_precision_weighted': array([0.56420576, 0.58946058, 0.56980663, 0.61955335, 0.58746608]), 'test_recall_weighted': array([0.33156499, 0.33351021, 0.32475458, 0.4234545 , 0.39877952])}


In [26]:
minor = MinorClassifiers(1, 0.75, 'average', 7)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.75
{'fit_time': array([231.5862422 , 229.10810566, 230.60734105, 230.28611374,
       233.58469415]), 'score_time': array([52.39711094, 54.05704069, 54.2075491 , 53.4918735 , 52.97313595]), 'test_accuracy': array([0.28885942, 0.26983285, 0.27620058, 0.37357389, 0.36004245]), 'test_neg_log_loss': array([-2.72753288, -2.75358123, -2.7535398 , -2.6408754 , -2.61915646]), 'test_neg_mean_squared_error': array([-83.47108753, -86.71106394, -89.14141682, -69.43486336,
       -68.28548687]), 'test_roc_auc_ovr': array([0.75522442, 0.75151091, 0.7441868 , 0.79336908, 0.79429558]), 'test_f1_weighted': array([0.32590375, 0.31780174, 0.31691427, 0.41084908, 0.38053513]), 'test_precision_weighted': array([0.53483045, 0.53308999, 0.53195986, 0.56378041, 0.54676665]), 'test_recall_weighted': array([0.28885942, 0.26983285, 0.27620058, 0.37357389, 0.36004245])}


In [24]:
minor = MinorClassifiers(1, 0.75, 'average', 5)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.75
{'fit_time': array([174.13259482, 172.68857598, 173.33416319, 172.88527989,
       173.12973428]), 'score_time': array([43.16913557, 42.38694286, 41.9971118 , 41.39183974, 37.69438601]), 'test_accuracy': array([0.26763926, 0.23003449, 0.23852481, 0.32077474, 0.33589812]), 'test_neg_log_loss': array([-2.81462134, -2.83578053, -2.85630931, -2.74322816, -2.720186  ]), 'test_neg_mean_squared_error': array([-81.5933687 , -91.75510746, -91.68108252, -70.47068188,
       -67.91005572]), 'test_roc_auc_ovr': array([0.72636927, 0.72006857, 0.70560783, 0.74997932, 0.76318908]), 'test_f1_weighted': array([0.28466559, 0.2621218 , 0.26443505, 0.34843197, 0.34829801]), 'test_precision_weighted': array([0.49493684, 0.49677137, 0.49324667, 0.51578974, 0.48598147]), 'test_recall_weighted': array([0.26763926, 0.23003449, 0.23852481, 0.32077474, 0.33589812])}


In [28]:
minor = MinorClassifiers(1, 0.75, 'average', 4)
scores = cross_validate(minor, svd_x_train, y_train, cv=5, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
print(scores)

Samples: 1, features: 0.75
{'fit_time': array([144.6208396 , 145.08518982, 145.80395412, 144.94858265,
       144.84199882]), 'score_time': array([35.57569957, 36.61703134, 34.7765727 , 34.84597135, 31.93229795]), 'test_accuracy': array([0.20557029, 0.21172725, 0.2194216 , 0.2799151 , 0.29981427]), 'test_neg_log_loss': array([-2.87825814, -2.88095561, -2.91086788, -2.78753094, -2.79013936]), 'test_neg_mean_squared_error': array([-81.30344828, -91.65348899, -91.26850624, -68.51499071,
       -71.08224993]), 'test_roc_auc_ovr': array([0.69667086, 0.69754482, 0.67800833, 0.72023747, 0.73043098]), 'test_f1_weighted': array([0.24324792, 0.23637009, 0.2317789 , 0.3001158 , 0.30691546]), 'test_precision_weighted': array([0.48582467, 0.46314854, 0.4409292 , 0.48916202, 0.46735043]), 'test_recall_weighted': array([0.20557029, 0.21172725, 0.2194216 , 0.2799151 , 0.29981427])}


4

In [10]:
noises = [0.05, 0.1, 0.2, 0.4]

In [11]:
n_samples = [0.1, 0.35, 0.7]
n_features = [0.25, 0.5, 0.75]
minors = []

In [12]:
for n in n_samples:
    minors.append(MinorClassifiers(n, 1, 'average'))
    
for n in n_features:
    minors.append(MinorClassifiers(1, n, 'average'))

In [13]:
for n in noises:
    new_x_train = make_noise_data(svd_x_train, n)
    print("Noise on data: " + str(n))
    for minor in minors:
        scores = cross_validate(minor, new_x_train, y_train, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
        print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
        print(scores)

Noise on data: 0.05
Samples: 0.1, features: 1
{'fit_time': array([154.17217922, 150.05866861, 153.23058915, 151.83258295,
       156.07881832]), 'score_time': array([84.33436298, 87.54580879, 84.91181469, 84.54791069, 82.25219893]), 'test_accuracy': array([0.74535809, 0.74767843, 0.74422924, 0.7105333 , 0.71477846]), 'test_neg_log_loss': array([-1.03093654, -1.00874163, -1.02946624, -1.02003515, -0.98535182]), 'test_neg_mean_squared_error': array([-13.14615385, -13.65640754, -15.24860706, -17.15388697,
       -15.84505174]), 'test_roc_auc_ovr': array([0.96907094, 0.97211317, 0.97235117, 0.9662398 , 0.97053474]), 'test_f1_weighted': array([0.74770109, 0.74781188, 0.74619186, 0.71335642, 0.7177339 ]), 'test_precision_weighted': array([0.75464193, 0.75303739, 0.75324298, 0.72433503, 0.72822271]), 'test_recall_weighted': array([0.74535809, 0.74767843, 0.74422924, 0.7105333 , 0.71477846])}
Samples: 0.35, features: 1
{'fit_time': array([1257.66017866, 1234.03320789, 1240.05512547, 1274.67126

Samples: 1, features: 0.5
{'fit_time': array([3887.04473448, 3979.57309031, 3839.72717118, 3762.76277852,
       3931.58019376]), 'score_time': array([315.352211  , 317.44698024, 317.20530248, 313.24389148,
       318.80638123]), 'test_accuracy': array([0.87427056, 0.88458477, 0.88458477, 0.8315203 , 0.84186787]), 'test_neg_log_loss': array([-0.49783675, -0.49412846, -0.48165129, -0.61265261, -0.57796264]), 'test_neg_mean_squared_error': array([-7.04270557, -5.42504643, -5.10400637, -9.7224728 , -8.83921465]), 'test_roc_auc_ovr': array([0.9925213 , 0.99250592, 0.99320857, 0.98667697, 0.98915912]), 'test_f1_weighted': array([0.8749937 , 0.8840639 , 0.88483512, 0.83140918, 0.84108109]), 'test_precision_weighted': array([0.87692341, 0.88463625, 0.88603959, 0.8327402 , 0.84182772]), 'test_recall_weighted': array([0.87427056, 0.88458477, 0.88458477, 0.8315203 , 0.84186787])}
Samples: 1, features: 0.75
{'fit_time': array([5225.5091362 , 5437.13613319, 5360.36702347, 5503.38156438,
       523

Samples: 0.7, features: 1
{'fit_time': array([4754.13093948, 4343.07299876, 4221.38828945, 4277.95838237,
       4303.87766171]), 'score_time': array([501.75028634, 456.31798673, 454.77140641, 456.92040133,
       459.5886271 ]), 'test_accuracy': array([0.84137931, 0.85195012, 0.86017511, 0.79676307, 0.80711064]), 'test_neg_log_loss': array([-0.55641008, -0.54709967, -0.5256149 , -0.67119989, -0.65484963]), 'test_neg_mean_squared_error': array([ -9.15888594,  -8.13823295,  -6.79623242, -12.48686654,
       -11.6418148 ]), 'test_roc_auc_ovr': array([0.98904029, 0.98917666, 0.99016616, 0.98296026, 0.98423189]), 'test_f1_weighted': array([0.84208157, 0.85108148, 0.86040717, 0.79731334, 0.80726987]), 'test_precision_weighted': array([0.84379865, 0.85102208, 0.86160535, 0.79980474, 0.80919648]), 'test_recall_weighted': array([0.84137931, 0.85195012, 0.86017511, 0.79676307, 0.80711064])}
Samples: 1, features: 0.25
{'fit_time': array([2600.77160215, 2531.09497333, 2682.15796685, 2506.54630566

In [14]:
for n in noises:
    new_y_train = make_noise_label(y_train, n)
    print("Noise on labels: " + str(n))
    for minor in minors:
        scores = cross_validate(minor, svd_x_train, new_y_train, scoring=('accuracy', 'neg_log_loss', 'neg_mean_squared_error', 'roc_auc_ovr', 'f1_weighted', 'precision_weighted', 'recall_weighted'))
        print("Samples: " + str(minor.samp) + ", features: " + str(minor.feat))
        print(scores)

Noise on labels: 0.05
Samples: 0.1, features: 1
{'fit_time': array([147.3920486 , 145.00769901, 143.99762869, 149.63122702,
       154.37247801]), 'score_time': array([80.12688184, 79.6095295 , 82.08589077, 80.63044739, 94.53331995]), 'test_accuracy': array([0.69708223, 0.71371717, 0.71000265, 0.66569382, 0.6821438 ]), 'test_neg_log_loss': array([-1.31655301, -1.25115119, -1.30059008, -1.27777832, -1.22312223]), 'test_neg_mean_squared_error': array([-16.8403183 , -15.20854338, -16.46059963, -19.75643407,
       -18.91085168]), 'test_roc_auc_ovr': array([0.93925082, 0.9462846 , 0.94184798, 0.93913961, 0.94616271]), 'test_f1_weighted': array([0.69827101, 0.71298166, 0.71034135, 0.66821722, 0.68428299]), 'test_precision_weighted': array([0.70417301, 0.71538463, 0.71374635, 0.67812542, 0.69424198]), 'test_recall_weighted': array([0.69708223, 0.71371717, 0.71000265, 0.66569382, 0.6821438 ])}
Samples: 0.35, features: 1
{'fit_time': array([1306.8132813 , 1265.42759252, 1278.12685347, 1294.755

Samples: 1, features: 0.5
{'fit_time': array([4653.22140098, 4476.5926919 , 4587.05533099, 4499.70081139,
       4637.06842899]), 'score_time': array([335.92143774, 332.81360936, 335.58371782, 332.94712138,
       337.47496176]), 'test_accuracy': array([0.78037135, 0.78906872, 0.78615017, 0.73202441, 0.74794375]), 'test_neg_log_loss': array([-1.11186426, -1.10133025, -1.10412853, -1.23772095, -1.17725273]), 'test_neg_mean_squared_error': array([-13.34509284, -12.54842133, -12.282303  , -16.49907137,
       -15.77739453]), 'test_roc_auc_ovr': array([0.94062509, 0.9406932 , 0.94178923, 0.92906682, 0.93596105]), 'test_f1_weighted': array([0.78048486, 0.7884564 , 0.78634281, 0.73085363, 0.74749657]), 'test_precision_weighted': array([0.78330802, 0.78966377, 0.78795441, 0.73342932, 0.75044887]), 'test_recall_weighted': array([0.78037135, 0.78906872, 0.78615017, 0.73202441, 0.74794375])}
Samples: 1, features: 0.75
{'fit_time': array([6366.471174  , 6344.73523259, 6602.1999054 , 6388.5762136 

Samples: 0.7, features: 1
{'fit_time': array([6813.23192263, 6201.19724369, 6200.78530812, 6187.20400405,
       6221.60724497]), 'score_time': array([518.99287033, 516.62730479, 518.26143956, 520.71870971,
       518.77536559]), 'test_accuracy': array([0.47612732, 0.47413107, 0.4887238 , 0.46829398, 0.48182542]), 'test_neg_log_loss': array([-2.26653771, -2.26814716, -2.22658226, -2.26174909, -2.22657176]), 'test_neg_mean_squared_error': array([-33.59071618, -33.92677103, -33.54178827, -35.02281772,
       -34.78827275]), 'test_roc_auc_ovr': array([0.77576315, 0.77680069, 0.78608765, 0.7764357 , 0.78123108]), 'test_f1_weighted': array([0.47571956, 0.47390109, 0.48693187, 0.46741745, 0.48093589]), 'test_precision_weighted': array([0.47769613, 0.47683201, 0.48906016, 0.47025034, 0.48615523]), 'test_recall_weighted': array([0.47612732, 0.47413107, 0.4887238 , 0.46829398, 0.48182542])}
Samples: 1, features: 0.25
{'fit_time': array([3601.09884953, 3549.66617465, 3626.84223938, 3633.32772803

Wyniki jakości klasyfikatorów:

In [15]:
max_iters = 121
clfs = MinorClassifiers(n_samples, n_features)

TypeError: __init__() missing 1 required positional argument: 'voting'

In [None]:
m = svd_x_train.shape[0]
m2 = int(m*0.1)
svd_x_train_10 = svd_x_train[:m2]
y_train_10 = y_train[:m2]

In [None]:
svd_x_train_10.shape

In [None]:
y_train_40 = make_noise_label(y_train, 0.4)

In [None]:
print(y_train_40.shape)
print(svd_x_train.shape)

In [None]:
#UWAGA NIE PUSZCZAĆ BEZ POWODU - linijka niezakomentowana żeby uniknąć restartowania kernela
#cross-validation -> wybór metody fuzji decyzji?
scores = cross_validate(clfs, svd_x_train, y_train, cv=5, scoring=('accuracy', 
                                                    'neg_log_loss', 
                                                    'neg_mean_squared_error', 
                                                    'roc_auc_ovr', 
                                                    'f1_weighted', 
                                                    'precision_weighted', 
                                                    'recall_weighted'))
#print(scores['test_accuracy'])
#print(scores['test_neg_log_loss'])
#print(scores['test_precision_weighted'])
#print(scores['test_recall_weighted'])
#print(scores['test_f1_weighted'])
#print(scores['test_roc_auc_ovr'])
#print(scores['test_average_precision'])


In [None]:
scores

In [None]:
y_pred = clfs.predict(svd_x_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy score: {0:0.2f}'.format(accuracy))

In [None]:
y_pred_proba = clfs.predict_proba(svd_x_test)
loss = log_loss(y_test, y_pred_proba)
print('Loss score: {0:0.2f}'.format(loss))

In [None]:
average_precision = average_precision_score(y_test, y_pred, pos_label=1)
print('Precision-recall score: {0:0.2f}'.format(average_precision))

In [None]:
x_train

In [None]:
a = x_train.toarray()

In [6]:
b = np.array([[2.0,3.0], [2.0,4.], [4.,5.]])

In [17]:
c = make_noise_data(svd_x_train, 0.1)
print(c)

[[ 0.22475435 -0.06248808 -0.01441603 ... -0.0168442   0.02101432
  -0.0191214 ]
 [ 0.12599262 -0.08438557 -0.0357827  ... -0.01513339  0.00517915
   0.02194321]
 [ 0.3346583  -0.04556982 -0.07380376 ...  0.00602186  0.01392154
  -0.02985655]
 ...
 [ 0.1662913  -0.00550221 -0.0884303  ...  0.01776318 -0.00348805
   0.01692393]
 [ 0.20675388 -0.07267247  0.03741476 ...  0.03050179  0.02278233
  -0.02552414]
 [ 0.09054867 -0.09988643  0.00410151 ...  0.00048173 -0.02367268
   0.00355212]]
