# Classifiers evaluation and comparison

### Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn import datasets, dummy, metrics, tree, svm, linear_model, ensemble, naive_bayes
from sklearn import model_selection as ms
import warnings
warnings.filterwarnings('ignore')

## Functions

### Baseline

In [2]:
def get_baseline(data, target):
    baseline = dummy.DummyClassifier(strategy='most_frequent')
    baseline.fit(data, target)
    base_predictions = baseline.predict(data)
    base_accuracy = metrics.accuracy_score(target, base_predictions)

    print ("Baseline accuracy = {:.3f}".format(base_accuracy))
    print(metrics.classification_report(target, base_predictions))

### Cross-validation predictions

In [3]:
def get_cross(model, test_data, test_labels):
    predictions = ms.cross_val_predict(model, test_data, test_labels)

    f1 = metrics.f1_score(test_labels, predictions, average='weighted')
    print('Cross-validation predictions:')
    print ("F1 = {:.3f}".format(f1))

    print(metrics.classification_report(test_labels, predictions))

### Classifier settings

In [4]:
def load_classifier(classifier, params, train_data, test_data, train_labels, folds, f1_scorer): 
    grid = ms.GridSearchCV(classifier,
                           params,
                           refit=True,
                           scoring=f1_scorer,
                           iid = True,
                           cv=ms.StratifiedKFold(n_splits=folds))
    
    grid_best = grid.fit(train_data, train_labels)
    print("Best hyper-parameters: {}".format(grid_best.best_params_))
    predictions = grid_best.predict(test_data)
    f1 = metrics.f1_score(test_labels, predictions, average='weighted')
    print ("F1 = {:.3f}".format(f1))

## Load datasets

### Digits

In [5]:
digits = datasets.load_digits()

digits_frame = pd.DataFrame(digits.data)
digits_frame['target'] = digits.target

print ("Class distibution")
print (digits_frame['target'].value_counts())

Class distibution
3    183
5    182
1    182
6    181
4    181
9    180
7    179
0    178
2    177
8    174
Name: target, dtype: int64


In [6]:
target_d = digits_frame['target'] 
data_d = digits_frame.drop(columns = ['target'])

### Breast_cancer

In [7]:
breast_cancer = datasets.load_breast_cancer()

breast_cancer_frame = pd.DataFrame(breast_cancer.data)
breast_cancer_frame['target'] = breast_cancer.target

print ("Class distibution")
print (breast_cancer_frame['target'].value_counts())

Class distibution
1    357
0    212
Name: target, dtype: int64


In [8]:
target_b = breast_cancer_frame['target']
data_b = breast_cancer_frame.drop(columns = ['target'])

### Wine

In [9]:
wine = datasets.load_wine()

wine_frame = pd.DataFrame(wine.data)
wine_frame['target'] = wine.target

print ("Class distibution")
print (wine_frame['target'].value_counts())

Class distibution
1    71
0    59
2    48
Name: target, dtype: int64


In [10]:
target_w = wine_frame['target']
data_w = wine_frame.drop(columns = ['target'])

## Digit dataset

### Baseline

In [11]:
get_baseline(data_d, target_d)

Baseline accuracy = 0.102
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       178
           1       0.00      0.00      0.00       182
           2       0.00      0.00      0.00       177
           3       0.10      1.00      0.18       183
           4       0.00      0.00      0.00       181
           5       0.00      0.00      0.00       182
           6       0.00      0.00      0.00       181
           7       0.00      0.00      0.00       179
           8       0.00      0.00      0.00       174
           9       0.00      0.00      0.00       180

    accuracy                           0.10      1797
   macro avg       0.01      0.10      0.02      1797
weighted avg       0.01      0.10      0.02      1797



### Find best classifier

In [12]:
train_data, test_data, train_labels, test_labels = ms.train_test_split(data_d, target_d, test_size = 0.3, random_state=42)
folds = 5
f1_scorer  = metrics.make_scorer(metrics.f1_score, average='weighted')

### LogisticRegression

In [13]:
logistic_param = {'penalty': ['l1', 'l2'],
                  'C': [1, 10, 100],
                  'max_iter': [100, 300],
                  'tol': [1e-3, 1e-4]}

logistic_classifier = linear_model.LogisticRegression(random_state=1, solver='liblinear')
load_classifier(logistic_classifier, logistic_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(logistic_classifier, test_data, test_labels)

Best hyper-parameters: {'C': 1, 'max_iter': 100, 'penalty': 'l1', 'tol': 0.0001}
F1 = 0.963
Cross-validation predictions:
F1 = 0.959
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        53
           1       0.94      0.96      0.95        50
           2       1.00      0.98      0.99        47
           3       0.96      0.96      0.96        54
           4       0.95      0.98      0.97        60
           5       0.95      0.95      0.95        66
           6       0.96      0.98      0.97        53
           7       0.98      0.95      0.96        55
           8       0.95      0.91      0.93        43
           9       0.90      0.92      0.91        59

    accuracy                           0.96       540
   macro avg       0.96      0.96      0.96       540
weighted avg       0.96      0.96      0.96       540



### SVM

In [14]:
svc_param = {'kernel': ['linear','poly','rbf'],
            'gamma': ['auto', 'scale'],
            'C': [1, 10, 100]}

svc_classifier = svm.SVC(probability=True)
load_classifier(svc_classifier, svc_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(svc_classifier, test_data, test_labels)

Best hyper-parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
F1 = 0.989
Cross-validation predictions:
F1 = 0.972
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        53
           1       0.93      1.00      0.96        50
           2       1.00      0.96      0.98        47
           3       1.00      0.96      0.98        54
           4       0.98      1.00      0.99        60
           5       0.97      0.97      0.97        66
           6       0.98      1.00      0.99        53
           7       0.98      0.98      0.98        55
           8       0.90      0.88      0.89        43
           9       0.97      0.95      0.96        59

    accuracy                           0.97       540
   macro avg       0.97      0.97      0.97       540
weighted avg       0.97      0.97      0.97       540



### DecisionTreeClassifier

In [15]:
decision_param = {'presort':[True, False],
                  'max_depth' : [ 2, 3, 5, 7, 10],
                  'max_features' : ['auto', 1, 2, 3, None],
                  'min_samples_leaf' : [1, 2, 3, 4, 5]}

decision_classifier = tree.DecisionTreeClassifier(random_state=1)
load_classifier(decision_classifier, decision_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(decision_classifier, test_data, test_labels)

Best hyper-parameters: {'max_depth': 10, 'max_features': None, 'min_samples_leaf': 1, 'presort': True}
F1 = 0.847
Cross-validation predictions:
F1 = 0.806
              precision    recall  f1-score   support

           0       1.00      0.89      0.94        53
           1       0.70      0.70      0.70        50
           2       0.85      0.70      0.77        47
           3       0.84      0.87      0.85        54
           4       0.87      0.75      0.80        60
           5       0.79      0.85      0.82        66
           6       0.85      0.94      0.89        53
           7       0.77      0.84      0.80        55
           8       0.72      0.79      0.76        43
           9       0.71      0.71      0.71        59

    accuracy                           0.81       540
   macro avg       0.81      0.80      0.80       540
weighted avg       0.81      0.81      0.81       540



### NaiveBayes

In [16]:
naive_param = {'var_smoothing' : [ 1e-3, 1e-5, 1e-7, 1e-9]}

naive_classifier = naive_bayes.GaussianNB()
load_classifier(naive_classifier, naive_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(naive_classifier, test_data, test_labels)

Best hyper-parameters: {'var_smoothing': 0.001}
F1 = 0.896
Cross-validation predictions:
F1 = 0.871
              precision    recall  f1-score   support

           0       1.00      0.87      0.93        53
           1       0.85      0.80      0.82        50
           2       0.87      0.85      0.86        47
           3       0.87      0.85      0.86        54
           4       0.90      0.92      0.91        60
           5       0.91      0.94      0.93        66
           6       0.96      0.92      0.94        53
           7       0.78      0.98      0.87        55
           8       0.69      0.86      0.76        43
           9       0.91      0.69      0.79        59

    accuracy                           0.87       540
   macro avg       0.87      0.87      0.87       540
weighted avg       0.88      0.87      0.87       540



### RandomForestClassifier

In [17]:
forest_param = {'max_depth' : [None, 5, 10, 25],
                'max_features' : ['auto', 'log2', None],
                'n_estimators' : [50, 100, 250]}

forest_classifier = ensemble.RandomForestClassifier(random_state=1)
load_classifier(forest_classifier, forest_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(forest_classifier, test_data, test_labels)

Best hyper-parameters: {'max_depth': None, 'max_features': 'log2', 'n_estimators': 250}
F1 = 0.970
Cross-validation predictions:
F1 = 0.968
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        53
           1       0.94      1.00      0.97        50
           2       1.00      0.94      0.97        47
           3       0.96      0.96      0.96        54
           4       0.97      1.00      0.98        60
           5       0.97      0.97      0.97        66
           6       0.98      0.98      0.98        53
           7       0.96      0.98      0.97        55
           8       0.93      0.93      0.93        43
           9       0.96      0.92      0.94        59

    accuracy                           0.97       540
   macro avg       0.97      0.97      0.97       540
weighted avg       0.97      0.97      0.97       540



### AdaBoostClassifier

In [18]:
boost_param = {'n_estimators' : [20, 50, 100],
               'learning_rate' : [1, 1e-2],
               'algorithm': ['SAMME', 'SAMME.R']}

boost_classifier = ensemble.AdaBoostClassifier()
load_classifier(boost_classifier, boost_param,                
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(boost_classifier, test_data, test_labels)

Best hyper-parameters: {'algorithm': 'SAMME', 'learning_rate': 1, 'n_estimators': 100}
F1 = 0.798
Cross-validation predictions:
F1 = 0.200
              precision    recall  f1-score   support

           0       0.55      0.74      0.63        53
           1       0.00      0.00      0.00        50
           2       0.00      0.00      0.00        47
           3       0.00      0.00      0.00        54
           4       0.18      0.22      0.20        60
           5       0.40      0.41      0.41        66
           6       0.67      0.04      0.07        53
           7       0.19      0.38      0.25        55
           8       0.00      0.00      0.00        43
           9       0.20      0.73      0.31        59

    accuracy                           0.27       540
   macro avg       0.22      0.25      0.19       540
weighted avg       0.23      0.27      0.20       540



### VotingClassifier

In [19]:
voting_param = {'voting' : ['hard', 'soft'],
                'flatten_transform' :[True, False, None]}

voting_classifier = ensemble.VotingClassifier(estimators=[('lr', logistic_classifier), 
                                            ('rf', ensemble.RandomForestClassifier(n_estimators=50, random_state=1)), 
                                            ('gnb', naive_classifier)])
load_classifier(voting_classifier, voting_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(voting_classifier, test_data, test_labels)

Best hyper-parameters: {'flatten_transform': True, 'voting': 'hard'}
F1 = 0.961
Cross-validation predictions:
F1 = 0.966
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        53
           1       0.92      0.98      0.95        50
           2       1.00      0.96      0.98        47
           3       0.96      0.94      0.95        54
           4       0.97      1.00      0.98        60
           5       0.97      0.98      0.98        66
           6       0.98      0.98      0.98        53
           7       0.95      1.00      0.97        55
           8       0.93      0.93      0.93        43
           9       0.98      0.88      0.93        59

    accuracy                           0.97       540
   macro avg       0.97      0.97      0.97       540
weighted avg       0.97      0.97      0.97       540



## Breast_canсer dataset

### Baseline

In [20]:
get_baseline(data_b, target_b)

Baseline accuracy = 0.627
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       212
           1       0.63      1.00      0.77       357

    accuracy                           0.63       569
   macro avg       0.31      0.50      0.39       569
weighted avg       0.39      0.63      0.48       569



### Find best classifier

In [21]:
train_data, test_data, train_labels, test_labels = ms.train_test_split(data_b, target_b, test_size = 0.3, random_state=42)
folds = 5
f1_scorer  = metrics.make_scorer(metrics.f1_score, average='weighted')

### LogisticRegression

In [22]:
logistic_param = {'penalty': ['l1', 'l2'],
                   'C': [1, 10, 100],
                   'max_iter': [200, 500],
                   'tol': [1e-3, 1e-4]}

load_classifier(logistic_classifier, logistic_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(logistic_classifier, test_data, test_labels)

Best hyper-parameters: {'C': 100, 'max_iter': 200, 'penalty': 'l1', 'tol': 0.001}
F1 = 0.977
Cross-validation predictions:
F1 = 0.953
              precision    recall  f1-score   support

           0       0.95      0.92      0.94        63
           1       0.95      0.97      0.96       108

    accuracy                           0.95       171
   macro avg       0.95      0.95      0.95       171
weighted avg       0.95      0.95      0.95       171



### SVM

In [23]:
svc_param = {'kernel': ['linear','rbf'],
            'gamma': ['auto', 'scale'],
            'C': [1, 10]}

svc_classifier = svm.SVC()
load_classifier(svc_classifier, svc_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(svc_classifier, test_data, test_labels)

Best hyper-parameters: {'C': 10, 'gamma': 'auto', 'kernel': 'linear'}
F1 = 0.971
Cross-validation predictions:
F1 = 0.928
              precision    recall  f1-score   support

           0       0.98      0.83      0.90        63
           1       0.91      0.99      0.95       108

    accuracy                           0.93       171
   macro avg       0.94      0.91      0.92       171
weighted avg       0.93      0.93      0.93       171



### DecisionTreeClassifier

In [24]:
decision_param = {'presort':[True, False],
                  'max_depth' : [ 2, 3, 5, 7, 10],
                  'max_features' : ['auto', 1, 2, 3, None],
                  'min_samples_leaf' : [1, 2, 3, 4, 5]}

decision_classifier = tree.DecisionTreeClassifier(random_state=1)
load_classifier(decision_classifier, decision_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(decision_classifier, test_data, test_labels)

Best hyper-parameters: {'max_depth': 7, 'max_features': 1, 'min_samples_leaf': 2, 'presort': True}
F1 = 0.953
Cross-validation predictions:
F1 = 0.930
              precision    recall  f1-score   support

           0       0.90      0.90      0.90        63
           1       0.94      0.94      0.94       108

    accuracy                           0.93       171
   macro avg       0.92      0.92      0.92       171
weighted avg       0.93      0.93      0.93       171



### NaiveBayes

In [25]:
naive_param = {'var_smoothing' : [ 1e-3, 1e-5, 1e-7, 1e-9]}

naive_classifier = naive_bayes.GaussianNB()
load_classifier(naive_classifier, naive_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(naive_classifier, test_data, test_labels)

Best hyper-parameters: {'var_smoothing': 1e-09}
F1 = 0.941
Cross-validation predictions:
F1 = 0.941
              precision    recall  f1-score   support

           0       0.95      0.89      0.92        63
           1       0.94      0.97      0.95       108

    accuracy                           0.94       171
   macro avg       0.94      0.93      0.94       171
weighted avg       0.94      0.94      0.94       171



### RandomForestClassifier

In [26]:
forest_param = {'max_depth' : [None, 5, 10, 25],
                'max_features' : ['auto', 'log2', None],
                'n_estimators' : [50, 100, 250]}

forest_classifier = ensemble.RandomForestClassifier(random_state=1)
load_classifier(forest_classifier, forest_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(forest_classifier, test_data, test_labels)

Best hyper-parameters: {'max_depth': None, 'max_features': None, 'n_estimators': 50}
F1 = 0.959
Cross-validation predictions:
F1 = 0.947
              precision    recall  f1-score   support

           0       0.94      0.92      0.93        63
           1       0.95      0.96      0.96       108

    accuracy                           0.95       171
   macro avg       0.94      0.94      0.94       171
weighted avg       0.95      0.95      0.95       171



### AdaBoostClassifier

In [27]:
boost_param = {'n_estimators' : [20, 50, 100],
               'learning_rate' : [1, 1e-2],
               'algorithm': ['SAMME', 'SAMME.R']}

boost_classifier = ensemble.AdaBoostClassifier()
load_classifier(boost_classifier, boost_param,                
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(boost_classifier, test_data, test_labels)

Best hyper-parameters: {'algorithm': 'SAMME', 'learning_rate': 1, 'n_estimators': 50}
F1 = 0.971
Cross-validation predictions:
F1 = 0.953
              precision    recall  f1-score   support

           0       0.94      0.94      0.94        63
           1       0.96      0.96      0.96       108

    accuracy                           0.95       171
   macro avg       0.95      0.95      0.95       171
weighted avg       0.95      0.95      0.95       171



### VotingClassifier

In [28]:
voting_param = {'voting' : ['hard', 'soft'],
                'flatten_transform' :[True, False, None]}

voting_classifier = ensemble.VotingClassifier(estimators=[('lr', logistic_classifier), 
                                            ('rf', ensemble.RandomForestClassifier(n_estimators=50, random_state=1)), 
                                            ('gnb', naive_classifier)])
load_classifier(voting_classifier, voting_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(voting_classifier, test_data, test_labels)

Best hyper-parameters: {'flatten_transform': True, 'voting': 'hard'}
F1 = 0.971
Cross-validation predictions:
F1 = 0.959
              precision    recall  f1-score   support

           0       0.97      0.92      0.94        63
           1       0.95      0.98      0.97       108

    accuracy                           0.96       171
   macro avg       0.96      0.95      0.96       171
weighted avg       0.96      0.96      0.96       171



## Wine dataset

### Baseline

In [29]:
get_baseline(data_w, target_w)

Baseline accuracy = 0.399
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        59
           1       0.40      1.00      0.57        71
           2       0.00      0.00      0.00        48

    accuracy                           0.40       178
   macro avg       0.13      0.33      0.19       178
weighted avg       0.16      0.40      0.23       178



### Find best classifier

In [30]:
train_data, test_data, train_labels, test_labels = ms.train_test_split(data_w, target_w, test_size = 0.3, random_state=42)
folds = 5
f1_scorer  = metrics.make_scorer(metrics.f1_score, average='weighted')

### LogisticRegression

In [31]:
logistic_param = {'penalty': ['l1', 'l2'],
                   'C': [1, 10, 100],
                   'max_iter': [200, 500],
                   'tol': [1e-3, 1e-4]}

load_classifier(logistic_classifier, logistic_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(logistic_classifier, test_data, test_labels)

Best hyper-parameters: {'C': 100, 'max_iter': 200, 'penalty': 'l1', 'tol': 0.001}
F1 = 1.000
Cross-validation predictions:
F1 = 0.944
              precision    recall  f1-score   support

           0       0.94      0.89      0.92        19
           1       0.91      1.00      0.95        21
           2       1.00      0.93      0.96        14

    accuracy                           0.94        54
   macro avg       0.95      0.94      0.95        54
weighted avg       0.95      0.94      0.94        54



### SVM

In [32]:
svc_param = {'kernel': ['linear','rbf'],
            'gamma': ['auto', 'scale'],
            'C': [1, 10]}

svc_classifier = svm.SVC()
load_classifier(svc_classifier, svc_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(svc_classifier, test_data, test_labels)

Best hyper-parameters: {'C': 1, 'gamma': 'auto', 'kernel': 'linear'}
F1 = 0.982
Cross-validation predictions:
F1 = 0.625
              precision    recall  f1-score   support

           0       0.95      0.95      0.95        19
           1       0.60      1.00      0.75        21
           2       0.00      0.00      0.00        14

    accuracy                           0.72        54
   macro avg       0.52      0.65      0.57        54
weighted avg       0.57      0.72      0.62        54



### DecisionTreeClassifier

In [33]:
decision_param = {'presort':[True, False],
                  'max_depth' : [ 2, 3, 5, 7, 10],
                  'max_features' : ['auto', 1, 2, 3, None],
                  'min_samples_leaf' : [1, 2, 3, 4, 5]}

decision_classifier = tree.DecisionTreeClassifier(random_state=1)
load_classifier(decision_classifier, decision_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(decision_classifier, test_data, test_labels)

Best hyper-parameters: {'max_depth': 3, 'max_features': None, 'min_samples_leaf': 2, 'presort': True}
F1 = 0.963
Cross-validation predictions:
F1 = 0.925
              precision    recall  f1-score   support

           0       0.95      1.00      0.97        19
           1       0.95      0.90      0.93        21
           2       0.86      0.86      0.86        14

    accuracy                           0.93        54
   macro avg       0.92      0.92      0.92        54
weighted avg       0.93      0.93      0.93        54



### NaiveBayes

In [34]:
naive_param = {'var_smoothing' : [ 1e-3, 1e-5, 1e-7, 1e-9]}

naive_classifier = naive_bayes.GaussianNB()
load_classifier(naive_classifier, naive_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(naive_classifier, test_data, test_labels)

Best hyper-parameters: {'var_smoothing': 1e-09}
F1 = 1.000
Cross-validation predictions:
F1 = 1.000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        14

    accuracy                           1.00        54
   macro avg       1.00      1.00      1.00        54
weighted avg       1.00      1.00      1.00        54



### RandomForestClassifier

In [35]:
forest_param = {'max_depth' : [None, 5, 10, 25],
                'max_features' : ['auto', 'log2', None],
                'n_estimators' : [50, 100, 250]}

forest_classifier = ensemble.RandomForestClassifier(random_state=1)
load_classifier(forest_classifier, forest_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(forest_classifier, test_data, test_labels)

Best hyper-parameters: {'max_depth': None, 'max_features': 'auto', 'n_estimators': 100}
F1 = 0.982
Cross-validation predictions:
F1 = 0.982
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.95      0.98        21
           2       0.93      1.00      0.97        14

    accuracy                           0.98        54
   macro avg       0.98      0.98      0.98        54
weighted avg       0.98      0.98      0.98        54



### AdaBoostClassifier

In [36]:
boost_param = {'n_estimators' : [20, 50, 100],
               'learning_rate' : [1, 1e-2],
               'algorithm': ['SAMME', 'SAMME.R']}

boost_classifier = ensemble.AdaBoostClassifier()
load_classifier(boost_classifier, boost_param,                
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(boost_classifier, test_data, test_labels)

Best hyper-parameters: {'algorithm': 'SAMME', 'learning_rate': 1, 'n_estimators': 20}
F1 = 0.944
Cross-validation predictions:
F1 = 0.944
              precision    recall  f1-score   support

           0       0.95      1.00      0.97        19
           1       0.95      0.95      0.95        21
           2       0.92      0.86      0.89        14

    accuracy                           0.94        54
   macro avg       0.94      0.94      0.94        54
weighted avg       0.94      0.94      0.94        54



### VotingClassifier

In [37]:
voting_param = {'voting' : ['hard', 'soft'],
                'flatten_transform' :[True, False, None]}

voting_classifier = ensemble.VotingClassifier(estimators=[('lr', logistic_classifier), 
                                            ('rf', ensemble.RandomForestClassifier(n_estimators=50, random_state=1)), 
                                            ('gnb', naive_classifier)])
load_classifier(voting_classifier, voting_param,
                train_data, test_data, train_labels,
                folds, f1_scorer)

get_cross(voting_classifier, test_data, test_labels)

Best hyper-parameters: {'flatten_transform': True, 'voting': 'hard'}
F1 = 1.000
Cross-validation predictions:
F1 = 1.000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        14

    accuracy                           1.00        54
   macro avg       1.00      1.00      1.00        54
weighted avg       1.00      1.00      1.00        54



## Results

### Digits

<table>
    <tr><td>Classifier</td><td>F1</td></tr>
    <tr><td>LogisticRegression</td><td>0.963</td></tr>
    <tr><td>SVM</td><td>0.989</td></tr>
    <tr><td>DecisionTreeClassifier</td><td>0.847</td></tr>
    <tr><td>NaiveBayes</td><td>0.896</td></tr>
    <tr><td>RandomForestClassifier</td><td>0.970</td></tr>
    <tr><td>AdaBoostClassifier</td><td>0.798</td></tr>
    <tr><td>VotingClassifier</td><td>0.961</td></tr>
</table>

### Breast_cancer

<table>
    <tr><td>Classifier</td><td>F1</td></tr>
    <tr><td>LogisticRegression</td><td>0.977</td></tr>
    <tr><td>SVM</td><td>0.971</td></tr>
    <tr><td>DecisionTreeClassifier</td><td>0.953</td></tr>
    <tr><td>NaiveBayes</td><td>0.941</td></tr>
    <tr><td>RandomForestClassifier</td><td>0.959</td></tr>
    <tr><td>AdaBoostClassifier</td><td>0.971</td></tr>
    <tr><td>VotingClassifier</td><td>0.971</td></tr>
</table>

### Wine

<table>
    <tr><td>Classifier</td><td>F1</td></tr>
    <tr><td>LogisticRegression</td><td>1.000</td></tr>
    <tr><td>SVM</td><td>0.982</td></tr>
    <tr><td>DecisionTreeClassifier</td><td>0.963</td></tr>
    <tr><td>NaiveBayes</td><td>1.000</td></tr>
    <tr><td>RandomForestClassifier</td><td>0.982</td></tr>
    <tr><td>AdaBoostClassifier</td><td>0.944</td></tr>
    <tr><td>VotingClassifier</td><td>1.000</td></tr>
</table>