In [None]:
import itertools as it
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.decomposition import KernelPCA
from sklearn.manifold import TSNE
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split

from possibilearn import flatten

rs = 20190105

In [None]:
import sys
import logging

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# Create STDERR handler
handler_video = logging.StreamHandler(sys.stderr)
# Create file handler
handler_file = logging.FileHandler('axiom-classification.log')

# Create formatter and add it to the handler
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler_video.setFormatter(formatter)
handler_file.setFormatter(formatter)

# Set STDERR handler as the only handler 
logger.handlers = [handler_video, handler_file]

In [None]:
mu = np.load('mu.npy')

In [None]:
gram_jaccard = np.load('jaccard_similarity.npy')
dist_jaccard = 1 - gram_jaccard

In [None]:
dist_length = np.load('length_distance.npy')
gram_length = 1 - dist_length

In [None]:
dist_leven = np.load('levenshtein_distance.npy')
gram_leven = 1 - dist_leven

In [None]:
dist_hamming = np.load('hamming_distance.npy')
gram_hamming = 1 - dist_hamming

In [None]:
def alpha_cut(alpha):
    return lambda x: 1 if x >= alpha else 0


def cut_labels(mu, alpha):
    return np.array(list(map(alpha_cut(alpha), mu)))

In [None]:
reduced_data_dir = 'data/reduction/'

def learn_experiment(data_matrix,
                     reduction_procedure, reduced_data_filename,
                     mu,
                     learning_algorithm_factory,
                     d=2,
                     alpha_levels=(.2, .4, .5, .75, .9),
                     num_holdouts=5,
                     percentages=(.8, 0, .2),
                     hyperparams_set = {},
                     validation_metric = metrics.accuracy_score,
                     test_metric = metrics.accuracy_score,
                     verbose=False):
    
    # this is to check that any time we have no parameters the validation
    # percentage is 0 and vice versa
    
    assert((hyperparams_set!={} or percentages[1]==0) and \
           (percentages[1]!=0 or hyperparams_set=={}))
    
    hyperparams_iterator = ParameterGrid(hyperparams_set)
    
    
    reduced_data_fullname = reduced_data_dir + reduced_data_filename + '.npy'
    if os.path.isfile(reduced_data_fullname):
        logger.info('Found cached reduced data. Retrieving {}'.format(reduced_data_filename))
        X = np.load(reduced_data_fullname)
    else:
        logger.info('Performing reduction procedure...')
        X = reduction_procedure(n_components=d,
                                random_state=rs).fit_transform(data_matrix)
    
        logger.info('Done!')
        np.save(reduced_data_fullname, X)
        
    label_set = [cut_labels(mu, alpha) for alpha in alpha_levels]
    
    n = len(X)
    assert(n==len(mu))

    ##paired_X = [X[i:i+2] for i in range(0, n, 2)]
    
    best_test_median = float('inf')
    best_test_metric = []
    
    for alpha, y in zip(alpha_levels, label_set):
        
        logger.info('Checking alpha={:.2f}'.format(alpha))
    
        ##paired_y = [y[i:i+2] for i in range(0, n, 2)]

        metric_train = []
        metric_test = []

        for h in range(num_holdouts):
            (X_train,
             X_validate_test,
             y_train,
             y_validate_test) = train_test_split(X, y,
                                                 train_size=percentages[0],
                                                 test_size=1-percentages[0],
                                                 stratify=y)
            
            if percentages[1]==0: # no validation set
                X_validate = []
                X_test = X_validate_test
                y_validate = []
                y_test = y_validate_test
            else:
                val_perc_rel = percentages[1]/(percentages[1]+percentages[2])
                (X_validate,
                 X_test,
                 y_validate,
                 y_test) = train_test_split(X_validate_test, y_validate_test,
                                            train_size=val_perc_rel,
                                            test_size=1-val_perc_rel,
                                            stratify=y_validate_test)

            logger.info('holdout {} of {}'.format(h+1, num_holdouts))
            
            best_hyperparams = {}
            best_err = np.inf
            
            for hyperparams in hyperparams_iterator:
                if not hyperparams:
                    break
                    
                #logger.info('Checking {}'.format(hyperparams))
                
                learning_algorithm = learning_algorithm_factory(**hyperparams)
                learning_algorithm.fit(X_train, y_train)
                y_pred = learning_algorithm.predict(X_validate)
                error = validation_metric(y_validate, y_pred)
                if error < best_err:
                    best_err = error
                    best_hyperparams = hyperparams
            
            logger.info('Learning with best '
                        'hyperparams: {}'.format(best_hyperparams))
            learning_algorithm = learning_algorithm_factory(**best_hyperparams)
            if percentages[1] == 0: # no validation set
                X_train_val = X_train
                y_train_val = y_train
            else:
                X_train_val = np.vstack([X_train, X_validate])
                y_train_val = np.hstack([y_train, y_validate])
            assert(len(X_train_val) == len(X_train) + len(X_validate))
            assert(len(y_train_val) == len(y_train) + len(y_validate))
            learning_algorithm.fit(X_train_val, y_train_val)
            pred_train = learning_algorithm.predict(X_train_val)
            pred_test = learning_algorithm.predict(X_test)
            
            metric_train.append(test_metric(y_train_val, pred_train))
            metric_test.append(test_metric(y_test, pred_test))
        
        test_median = np.median(metric_test)
        if test_median < best_test_median:
            best_test_median = test_median
            best_test_metric = [alpha,
                                np.mean(metric_train),
                                np.median(metric_train),
                                np.std(metric_train),
                                np.mean(metric_test),
                                test_median,
                                np.std(metric_test)]
    names = ['alpha',
             'train_mean', 'train_median', 'train_std',
             'test_mean','test_median', 'test_std']
    return dict(zip(names, best_test_metric))

In [None]:
def SVC_custom(*args, **kwargs):
    return SVC(*args, **kwargs, max_iter=5000)

from sklearn.preprocessing import MinMaxScaler
        
class ScaledSVC:
    def __init__(self, *args, **kwargs):
        self.svc = SVC(*args, **kwargs, max_iter=5000)
        self.scaler = MinMaxScaler()
    
    def fit(self, X, y):
        self.scaler.fit(X)
        return self.svc.fit(self.scaler.transform(X), y)
    
    def predict(self, X):
        self.scaler.fit(X)
        return self.svc.predict(self.scaler.transform(X))

def RandomForestClassifier_custom(*args, **kwargs):
    return RandomForestClassifier(*args, **kwargs, n_estimators=100)

def MLPClassifier_custom(*args, **kwargs):
    return MLPClassifier(*args, **kwargs, max_iter=5000)

In [None]:
import json
import os

result_dir = './data/classification/'

In [None]:
# considered learning algorithms
names = ['Decision tree', 'Random forest', 'Naive Bayes', 'LDA', 'MLP',
         'SVC (linear)', 'SVC (gaussian)']
algorithms = [DecisionTreeClassifier, RandomForestClassifier,
              GaussianNB, LinearDiscriminantAnalysis, MLPClassifier_custom,
              ScaledSVC, ScaledSVC]
hyperp_sets = [{'criterion': ['gini', 'entropy'], 'max_leaf_nodes': [None, 2, 5, 10, 50, 100],
                'max_features': [None, 'sqrt', 'log2'], 'max_depth': [None, 2, 5, 10]},
               {'n_estimators': [5, 10, 50, 100, 200], 'criterion': ['gini', 'entropy'], 'max_leaf_nodes': [None, 2, 5, 10, 50, 100],
                'max_features': [None, 'sqrt', 'log2'], 'max_depth': [None, 2, 5, 10]},
               {}, {}, {'hidden_layer_sizes': [[2], [4], [6], [10], [20]]},
               {'C': [.001, .01, .1, 1, 10], 'kernel': ['linear']},
               {'C': [.001, .01, .1, 1, 10], 'kernel': ['rbf'], 'gamma': [.001, .01, .1, 1, 10, 30]}]
percentages = [(.8, .1, .1), (.8, .1, .2), (.8, 0, .2), (.8, 0, .2),
               (.8, .1, .1),(.8, .1, .1), (.8, .1, .1)]

# considered data matrices
data_names = ['hamming', 'jaccard', 'length', 'levenshtein']
data_matrices = [gram_hamming, gram_jaccard, gram_length, gram_leven]

# considered number of extracted components
components = [2, 3, 5, 10, 30]

In [15]:
num_experiments = len(algorithms) *  len(data_matrices) *  len(components)

learning_procedures = zip(names, algorithms, hyperp_sets, percentages)

data = zip(data_names, data_matrices)

experiments = it.product(learning_procedures, data, components)

performed_experiments = 0

re_generate_files = False
append = True

num_groups = 10
for g in range(num_groups):
    logger.info('Started experiment group {} of {}'.format(g+1, num_groups))
    for experiment in experiments:
        learning_procedure = experiment[0]
        data_item = experiment[1]
        comp = experiment[2]
        out_file = result_dir + 'class-' + \
                   learning_procedure[0].replace(' ', '_') + '-' + \
                   'pca-' + \
                   data_item[0] + '-' + \
                   str(comp) + '.json'
        logger.info('Considering {}'.format(out_file))
        if os.path.isfile(out_file) and not re_generate_files and not append:
            logger.info('already exists, skipping')
        else:
            access_flag = 'w' if not append else 'a'
            if append:
                logger.info('appending to existing file')
            elif re_generate_files:
                logger.info('re-executing experiment')
            else:
                logger.info('does not exist, starting experiment')
            # perform experiment
            res = learn_experiment(data_item[1],
                                   KernelPCA, 'PCA-' + experiment[1][0],
                                   mu,
                                   learning_procedure[1],
                                   d=comp,
                                   alpha_levels=(.2, .4, .5, .6, .8, .9),
                                   percentages=learning_procedure[3],
                                   hyperparams_set = learning_procedure[2])
            
            if access_flag == 'a':
                logger.info('opening result file in append')
            else:
                logger.info('creating result file')
            with open(out_file, access_flag) as f:
                f.write('\n')
                json.dump(res, f)

            logger.info('experiment finished')
        performed_experiments += 1
        perc_complete = 100 * performed_experiments / num_experiments
        logger.info('completed {:.2f}%'.format(perc_complete))
    logger.info('Ended experiment group {} of {}'.format(g+1, num_groups))

2020-01-24 16:49:27,286 - INFO - Started experiment group 1 of 10
2020-01-24 16:49:27,287 - INFO - Considering ./data/classification/class-Decision_tree-pca-hamming-2.json
2020-01-24 16:49:27,288 - INFO - appending to existing file
2020-01-24 16:49:27,288 - INFO - Found cached reduced data. Retrieving PCA-hamming
2020-01-24 16:49:27,297 - INFO - Checking alpha=0.20
2020-01-24 16:49:27,303 - INFO - holdout 1 of 5
2020-01-24 16:49:27,483 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2}
2020-01-24 16:49:27,487 - INFO - holdout 2 of 5
2020-01-24 16:49:27,653 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2}
2020-01-24 16:49:27,657 - INFO - holdout 3 of 5
2020-01-24 16:49:27,847 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:49:27,851 - INFO - ho

2020-01-24 16:49:34,158 - INFO - holdout 2 of 5
2020-01-24 16:49:34,337 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2, 'max_features': None, 'max_leaf_nodes': None}
2020-01-24 16:49:34,342 - INFO - holdout 3 of 5
2020-01-24 16:49:34,521 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:49:34,526 - INFO - holdout 4 of 5
2020-01-24 16:49:34,732 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:49:34,736 - INFO - holdout 5 of 5
2020-01-24 16:49:34,913 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 5}
2020-01-24 16:49:34,916 - INFO - Checking alpha=0.50
2020-01-24 16:49:34,919 - INFO - holdout 1 of 5
2020-01-24 16:49:35,105 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'm

2020-01-24 16:49:41,802 - INFO - holdout 5 of 5
2020-01-24 16:49:41,982 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:49:41,984 - INFO - Checking alpha=0.60
2020-01-24 16:49:41,988 - INFO - holdout 1 of 5
2020-01-24 16:49:42,181 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 10}
2020-01-24 16:49:42,187 - INFO - holdout 2 of 5
2020-01-24 16:49:42,360 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 2, 'max_features': 'log2', 'max_leaf_nodes': 10}
2020-01-24 16:49:42,364 - INFO - holdout 3 of 5
2020-01-24 16:49:42,550 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2}
2020-01-24 16:49:42,554 - INFO - holdout 4 of 5
2020-01-24 16:49:42,734 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 2

2020-01-24 16:49:48,593 - INFO - holdout 2 of 5
2020-01-24 16:49:48,772 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:49:48,776 - INFO - holdout 3 of 5
2020-01-24 16:49:48,959 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:49:48,963 - INFO - holdout 4 of 5
2020-01-24 16:49:49,146 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2}
2020-01-24 16:49:49,150 - INFO - holdout 5 of 5
2020-01-24 16:49:49,335 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2}
2020-01-24 16:49:49,337 - INFO - Checking alpha=0.90
2020-01-24 16:49:49,340 - INFO - holdout 1 of 5
2020-01-24 16:49:49,534 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': Non

2020-01-24 16:49:56,363 - INFO - holdout 5 of 5
2020-01-24 16:49:56,589 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:49:56,591 - INFO - opening result file in append
2020-01-24 16:49:56,592 - INFO - experiment finished
2020-01-24 16:49:56,592 - INFO - completed 3.57%
2020-01-24 16:49:56,593 - INFO - Considering ./data/classification/class-Decision_tree-pca-jaccard-2.json
2020-01-24 16:49:56,593 - INFO - appending to existing file
2020-01-24 16:49:56,594 - INFO - Found cached reduced data. Retrieving PCA-jaccard
2020-01-24 16:49:56,598 - INFO - Checking alpha=0.20
2020-01-24 16:49:56,602 - INFO - holdout 1 of 5
2020-01-24 16:49:56,722 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': None}
2020-01-24 16:49:56,726 - INFO - holdout 2 of 5
2020-01-24 16:49:56,847 - INFO - Learning with best hyperparams: {'criterion': 'gini

2020-01-24 16:50:01,971 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:01,976 - INFO - Checking alpha=0.40
2020-01-24 16:50:01,980 - INFO - holdout 1 of 5
2020-01-24 16:50:02,144 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:02,149 - INFO - holdout 2 of 5
2020-01-24 16:50:02,303 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2}
2020-01-24 16:50:02,308 - INFO - holdout 3 of 5
2020-01-24 16:50:02,461 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 5, 'max_features': 'log2', 'max_leaf_nodes': 2}
2020-01-24 16:50:02,466 - INFO - holdout 4 of 5
2020-01-24 16:50:02,617 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
202

2020-01-24 16:50:08,129 - INFO - holdout 3 of 5
2020-01-24 16:50:08,284 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2}
2020-01-24 16:50:08,288 - INFO - holdout 4 of 5
2020-01-24 16:50:08,442 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:08,447 - INFO - holdout 5 of 5
2020-01-24 16:50:08,599 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:08,601 - INFO - Checking alpha=0.60
2020-01-24 16:50:08,604 - INFO - holdout 1 of 5
2020-01-24 16:50:08,753 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2}
2020-01-24 16:50:08,758 - INFO - holdout 2 of 5
2020-01-24 16:50:08,909 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': Non

2020-01-24 16:50:14,452 - INFO - Checking alpha=0.80
2020-01-24 16:50:14,456 - INFO - holdout 1 of 5
2020-01-24 16:50:14,622 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:14,627 - INFO - holdout 2 of 5
2020-01-24 16:50:14,849 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2}
2020-01-24 16:50:14,857 - INFO - holdout 3 of 5
2020-01-24 16:50:15,029 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:15,034 - INFO - holdout 4 of 5
2020-01-24 16:50:15,194 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:15,199 - INFO - holdout 5 of 5
2020-01-24 16:50:15,363 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 

2020-01-24 16:50:20,512 - INFO - holdout 3 of 5
2020-01-24 16:50:20,690 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:20,695 - INFO - holdout 4 of 5
2020-01-24 16:50:20,858 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:20,862 - INFO - holdout 5 of 5
2020-01-24 16:50:21,004 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:21,006 - INFO - opening result file in append
2020-01-24 16:50:21,007 - INFO - experiment finished
2020-01-24 16:50:21,008 - INFO - completed 7.14%
2020-01-24 16:50:21,009 - INFO - Considering ./data/classification/class-Decision_tree-pca-length-2.json
2020-01-24 16:50:21,009 - INFO - appending to existing file
2020-01-24 16:50:21,010 - INFO - Found cached reduced data. Retr

2020-01-24 16:50:25,325 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 5, 'max_features': 'sqrt', 'max_leaf_nodes': None}
2020-01-24 16:50:25,329 - INFO - holdout 4 of 5
2020-01-24 16:50:25,445 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None}
2020-01-24 16:50:25,450 - INFO - holdout 5 of 5
2020-01-24 16:50:25,563 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None}
2020-01-24 16:50:25,566 - INFO - Checking alpha=0.40
2020-01-24 16:50:25,569 - INFO - holdout 1 of 5
2020-01-24 16:50:25,686 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 10}
2020-01-24 16:50:25,690 - INFO - holdout 2 of 5
2020-01-24 16:50:25,805 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 5, 'max_features': None, 'max_leaf_nodes': 

2020-01-24 16:50:29,663 - INFO - Checking alpha=0.50
2020-01-24 16:50:29,667 - INFO - holdout 1 of 5
2020-01-24 16:50:29,784 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 5, 'max_features': 'log2', 'max_leaf_nodes': 10}
2020-01-24 16:50:29,789 - INFO - holdout 2 of 5
2020-01-24 16:50:29,900 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 5, 'max_features': None, 'max_leaf_nodes': None}
2020-01-24 16:50:29,905 - INFO - holdout 3 of 5
2020-01-24 16:50:30,014 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2}
2020-01-24 16:50:30,019 - INFO - holdout 4 of 5
2020-01-24 16:50:30,128 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 10}
2020-01-24 16:50:30,133 - INFO - holdout 5 of 5
2020-01-24 16:50:30,248 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': No

2020-01-24 16:50:33,913 - INFO - holdout 3 of 5
2020-01-24 16:50:34,023 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 10, 'max_features': 'log2', 'max_leaf_nodes': 10}
2020-01-24 16:50:34,027 - INFO - holdout 4 of 5
2020-01-24 16:50:34,141 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 10}
2020-01-24 16:50:34,146 - INFO - holdout 5 of 5
2020-01-24 16:50:34,253 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2}
2020-01-24 16:50:34,255 - INFO - Checking alpha=0.80
2020-01-24 16:50:34,258 - INFO - holdout 1 of 5
2020-01-24 16:50:34,372 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:34,376 - INFO - holdout 2 of 5
2020-01-24 16:50:34,483 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, '

2020-01-24 16:50:38,481 - INFO - Checking alpha=0.90
2020-01-24 16:50:38,485 - INFO - holdout 1 of 5
2020-01-24 16:50:38,627 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 5, 'max_features': 'log2', 'max_leaf_nodes': 100}
2020-01-24 16:50:38,633 - INFO - holdout 2 of 5
2020-01-24 16:50:38,766 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None}
2020-01-24 16:50:38,772 - INFO - holdout 3 of 5
2020-01-24 16:50:38,889 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 5, 'max_features': 'log2', 'max_leaf_nodes': None}
2020-01-24 16:50:38,894 - INFO - holdout 4 of 5
2020-01-24 16:50:39,011 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2}
2020-01-24 16:50:39,016 - INFO - holdout 5 of 5
2020-01-24 16:50:39,134 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 5,

2020-01-24 16:50:47,045 - INFO - holdout 1 of 5
2020-01-24 16:50:47,297 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:47,302 - INFO - holdout 2 of 5
2020-01-24 16:50:47,572 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:47,577 - INFO - holdout 3 of 5
2020-01-24 16:50:47,833 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': None}
2020-01-24 16:50:47,841 - INFO - holdout 4 of 5
2020-01-24 16:50:48,073 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:48,078 - INFO - holdout 5 of 5
2020-01-24 16:50:48,315 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 5, 'max_features': 'sqrt', 'max_leaf_nodes': 5}
20

2020-01-24 16:50:56,677 - INFO - holdout 3 of 5
2020-01-24 16:50:56,960 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 5, 'max_features': 'sqrt', 'max_leaf_nodes': None}
2020-01-24 16:50:56,969 - INFO - holdout 4 of 5
2020-01-24 16:50:57,242 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:57,251 - INFO - holdout 5 of 5
2020-01-24 16:50:57,535 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:50:57,538 - INFO - Checking alpha=0.50
2020-01-24 16:50:57,543 - INFO - holdout 1 of 5
2020-01-24 16:50:57,823 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': 5}
2020-01-24 16:50:57,830 - INFO - holdout 2 of 5
2020-01-24 16:50:58,101 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2,

2020-01-24 16:51:07,235 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2}
2020-01-24 16:51:07,238 - INFO - Checking alpha=0.60
2020-01-24 16:51:07,245 - INFO - holdout 1 of 5
2020-01-24 16:51:07,529 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 100}
2020-01-24 16:51:07,538 - INFO - holdout 2 of 5
2020-01-24 16:51:07,806 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 5, 'max_features': 'sqrt', 'max_leaf_nodes': 5}
2020-01-24 16:51:07,813 - INFO - holdout 3 of 5
2020-01-24 16:51:08,058 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': None}
2020-01-24 16:51:08,067 - INFO - holdout 4 of 5
2020-01-24 16:51:08,335 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes'

2020-01-24 16:51:17,394 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 5, 'max_features': 'sqrt', 'max_leaf_nodes': None}
2020-01-24 16:51:17,399 - INFO - holdout 3 of 5
2020-01-24 16:51:17,626 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:51:17,631 - INFO - holdout 4 of 5
2020-01-24 16:51:17,866 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None}
2020-01-24 16:51:17,877 - INFO - holdout 5 of 5
2020-01-24 16:51:18,109 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 5, 'max_features': 'sqrt', 'max_leaf_nodes': 2}
2020-01-24 16:51:18,112 - INFO - Checking alpha=0.90
2020-01-24 16:51:18,120 - INFO - holdout 1 of 5
2020-01-24 16:51:18,359 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_node

KeyboardInterrupt: 

In [27]:
def TSNE_custom(*args, **kwargs):
    return TSNE(*args, **kwargs, method='exact')



algorithms = [DecisionTreeClassifier, RandomForestClassifier,
              GaussianNB, LinearDiscriminantAnalysis, MLPClassifier_custom,
              ScaledSVC, ScaledSVC]

hyperp_sets = [{'criterion': ['gini', 'entropy'], 'max_leaf_nodes': [None, 2, 5, 10, 50, 100],
                'max_features': [None, 'sqrt', 'log2'], 'max_depth': [None, 2, 5, 10]},
               {'n_estimators': [5, 10, 50, 100, 200], 'criterion': ['gini', 'entropy'], 'max_leaf_nodes': [None, 2, 5, 10, 50, 100],
                'max_features': [None, 'sqrt', 'log2'], 'max_depth': [None, 2, 5, 10]},
               {}, {}, {'hidden_layer_sizes': [[2], [4], [6], [10], [20]]},
               {'C': [.001, .01, .1, 1, 10], 'kernel': ['linear']},
               {'C': [.001, .01, .1, 1, 10], 'kernel': ['rbf'], 'gamma': [.001, .01, .1, 1, 10, 30]}]
percentages = [(.8, .1, .1), (.8, .1, .2), (.8, 0, .2), (.8, 0, .2),
               (.8, .1, .1),(.8, .1, .1), (.8, .1, .1)]


learning_procedures = zip(names, algorithms, hyperp_sets, percentages)

data_matrices = [dist_hamming, dist_jaccard, dist_length, dist_leven]
data = zip(data_names, data_matrices)

experiments = it.product(learning_procedures, data, components)

num_experiments = len(algorithms) *  len(data_matrices) *  len(components)



performed_experiments = 0

re_generate_files = False

append = True

num_groups = 10
for g in range(num_groups):
    logger.info('Started experiment group {} of {}'.format(g+1, num_groups))
    for experiment in experiments:
        learning_procedure = experiment[0]
        data_item = experiment[1]
        comp = experiment[2]
        out_file = result_dir + 'class-' + \
                   learning_procedure[0].replace(' ', '_') + '-' + \
                   'tsne-' + \
                   data_item[0] + '-' + \
                   str(comp) + '.json'
        logger.info('Considering {}'.format(out_file))
        if os.path.isfile(out_file) and not re_generate_files and not append:
            logger.info('already exists, skipping')
        else:
            access_flag = 'w' if not append else 'a'
            if append:
                logger.info('appending to existing file')
            elif re_generate_files:
                logger.info('re-executing experiment')
            else:
                logger.info('does not exist, starting experiment')
            # perform experiment
            res = learn_experiment(data_item[1],
                                   TSNE_custom,  'tsne-' + experiment[1][0],
                                   mu,
                                   learning_procedure[1],
                                   d=comp,
                                   alpha_levels=(.2, .4, .5, .6, .8, .9),
                                   percentages=learning_procedure[3],
                                   hyperparams_set = learning_procedure[2])

            if access_flag == 'a':
                logger.info('opening result file in append')
            else:
                logger.info('creating result file')
            with open(out_file, access_flag) as f:
                f.write('\n')
                json.dump(res, f)

            logger.info('experiment finished')
        performed_experiments += 1
        perc_complete = 100 * performed_experiments / num_experiments
        logger.info('completed {:.2f}%'.format(perc_complete))
    logger.info('Ended experiment group {} of {}'.format(g+1, num_groups))

2019-06-28 15:25:04,065 - INFO - Considering ./data/classification/class-Decision_tree-tsne-hamming-2.json
2019-06-28 15:25:04,066 - INFO - already exists, skipping
2019-06-28 15:25:04,067 - INFO - completed 0.71%
2019-06-28 15:25:04,068 - INFO - Considering ./data/classification/class-Decision_tree-tsne-hamming-3.json
2019-06-28 15:25:04,069 - INFO - already exists, skipping
2019-06-28 15:25:04,070 - INFO - completed 1.43%
2019-06-28 15:25:04,071 - INFO - Considering ./data/classification/class-Decision_tree-tsne-hamming-5.json
2019-06-28 15:25:04,071 - INFO - already exists, skipping
2019-06-28 15:25:04,072 - INFO - completed 2.14%
2019-06-28 15:25:04,073 - INFO - Considering ./data/classification/class-Decision_tree-tsne-hamming-10.json
2019-06-28 15:25:04,073 - INFO - already exists, skipping
2019-06-28 15:25:04,074 - INFO - completed 2.86%
2019-06-28 15:25:04,075 - INFO - Considering ./data/classification/class-Decision_tree-tsne-hamming-30.json
2019-06-28 15:25:04,075 - INFO - al

2019-06-28 15:25:09,225 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 50}
2019-06-28 15:25:09,229 - INFO - holdout 4 of 5
2019-06-28 15:25:09,404 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None}
2019-06-28 15:25:09,410 - INFO - holdout 5 of 5
2019-06-28 15:25:09,587 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 50}
2019-06-28 15:25:09,591 - INFO - experiment finished
2019-06-28 15:25:09,592 - INFO - completed 7.86%
2019-06-28 15:25:09,592 - INFO - Considering ./data/classification/class-Decision_tree-tsne-length-3.json
2019-06-28 15:25:09,592 - INFO - does not exist, starting experiment
2019-06-28 15:25:09,593 - INFO - Found cached reduced data. Retrieving tsne-length
2019-06-28 15:25:09,596 - INFO - Checking alpha=0.20
2019-06-28 15:25:09,599 - INFO -

2019-06-28 15:25:15,564 - INFO - holdout 4 of 5
2019-06-28 15:25:15,708 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 100}
2019-06-28 15:25:15,713 - INFO - holdout 5 of 5
2019-06-28 15:25:15,886 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': 50}
2019-06-28 15:25:15,889 - INFO - Checking alpha=0.40
2019-06-28 15:25:15,891 - INFO - holdout 1 of 5
2019-06-28 15:25:16,057 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None}
2019-06-28 15:25:16,062 - INFO - holdout 2 of 5
2019-06-28 15:25:16,239 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 50}
2019-06-28 15:25:16,246 - INFO - holdout 3 of 5
2019-06-28 15:25:16,390 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'm

2019-06-28 15:25:21,856 - INFO - Checking alpha=0.50
2019-06-28 15:25:21,859 - INFO - holdout 1 of 5
2019-06-28 15:25:22,023 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None}
2019-06-28 15:25:22,029 - INFO - holdout 2 of 5
2019-06-28 15:25:22,213 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 100}
2019-06-28 15:25:22,218 - INFO - holdout 3 of 5
2019-06-28 15:25:22,383 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None}
2019-06-28 15:25:22,391 - INFO - holdout 4 of 5
2019-06-28 15:25:22,556 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None}
2019-06-28 15:25:22,561 - INFO - holdout 5 of 5
2019-06-28 15:25:22,729 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_dep

2019-06-28 15:25:28,441 - INFO - holdout 3 of 5
2019-06-28 15:25:28,616 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None}
2019-06-28 15:25:28,622 - INFO - holdout 4 of 5
2019-06-28 15:25:28,804 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': None}
2019-06-28 15:25:28,810 - INFO - holdout 5 of 5
2019-06-28 15:25:28,979 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 100}
2019-06-28 15:25:28,983 - INFO - Checking alpha=0.80
2019-06-28 15:25:28,987 - INFO - holdout 1 of 5
2019-06-28 15:25:29,168 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 100}
2019-06-28 15:25:29,174 - INFO - holdout 2 of 5
2019-06-28 15:25:29,356 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'm

2019-06-28 15:42:57,167 - INFO - holdout 4 of 5
2019-06-28 15:43:50,497 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 15:43:50,507 - INFO - holdout 5 of 5
2019-06-28 15:44:49,073 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 15:44:49,081 - INFO - Checking alpha=0.80
2019-06-28 15:44:49,084 - INFO - holdout 1 of 5
2019-06-28 15:45:46,341 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 15:45:46,350 - INFO - holdout 2 of 5
2019-06-28 15:46:44,463 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'n_estimators': 5}
2019-06-28 15:46:44,472 - INFO - holdout 3 of 5
2019-06-28 15:47:39,666

2019-06-28 16:15:05,237 - INFO - holdout 4 of 5
2019-06-28 16:16:01,939 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 10, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 16:16:01,949 - INFO - holdout 5 of 5
2019-06-28 16:16:55,712 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2, 'max_features': None, 'max_leaf_nodes': 10, 'n_estimators': 10}
2019-06-28 16:16:55,725 - INFO - Checking alpha=0.90
2019-06-28 16:16:55,728 - INFO - holdout 1 of 5
2019-06-28 16:17:49,347 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 16:17:49,355 - INFO - holdout 2 of 5
2019-06-28 16:18:42,701 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2, 'n_estimators': 200}
2019-06-28 16:18:42,884 - INFO - holdout 3 of 5
2019-06-28 16:19:41,691 - INFO -

2019-06-28 16:47:29,102 - INFO - holdout 4 of 5
2019-06-28 16:48:26,931 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 16:48:26,940 - INFO - holdout 5 of 5
2019-06-28 16:49:26,449 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2, 'n_estimators': 50}
2019-06-28 16:49:26,502 - INFO - experiment finished
2019-06-28 16:49:26,502 - INFO - completed 16.43%
2019-06-28 16:49:26,503 - INFO - Considering ./data/classification/class-Random_forest-tsne-hamming-10.json
2019-06-28 16:49:26,503 - INFO - does not exist, starting experiment
2019-06-28 16:49:26,504 - INFO - Found cached reduced data. Retrieving tsne-hamming
2019-06-28 16:49:26,508 - INFO - Checking alpha=0.20
2019-06-28 16:49:26,511 - INFO - holdout 1 of 5
2019-06-28 16:50:27,584 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 

2019-06-28 17:18:05,328 - INFO - holdout 2 of 5
2019-06-28 17:18:57,764 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 17:18:57,773 - INFO - holdout 3 of 5
2019-06-28 17:19:55,548 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 17:19:55,557 - INFO - holdout 4 of 5
2019-06-28 17:20:49,491 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 17:20:49,501 - INFO - holdout 5 of 5
2019-06-28 17:21:45,866 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 17:21:45,876 - INFO - Checking alpha=0.40
2019-06-28 17:21:45,879 - INFO - holdout 1 of 5
2019-06-28 17:22:51,566 - INFO - Le

2019-06-28 17:51:14,403 - INFO - holdout 2 of 5
2019-06-28 17:52:06,631 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 17:52:06,638 - INFO - holdout 3 of 5
2019-06-28 17:52:58,313 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-28 17:52:58,325 - INFO - holdout 4 of 5
2019-06-28 17:53:50,677 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 5, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 17:53:50,685 - INFO - holdout 5 of 5
2019-06-28 17:54:40,951 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 50}
2019-06-28 17:54:41,007 - INFO - Checking alpha=0.50
2019-06-28 17:54:41,010 - INFO - holdout 1 of 5
2019-06-28 17:55:36,237 - I

2019-06-28 18:21:15,328 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-28 18:21:15,342 - INFO - holdout 2 of 5
2019-06-28 18:22:05,572 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 50}
2019-06-28 18:22:05,627 - INFO - holdout 3 of 5
2019-06-28 18:22:57,050 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 10, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 50}
2019-06-28 18:22:57,099 - INFO - holdout 4 of 5
2019-06-28 18:23:47,495 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 18:23:47,503 - INFO - holdout 5 of 5
2019-06-28 18:24:37,986 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 

2019-06-28 18:51:28,416 - INFO - Checking alpha=0.60
2019-06-28 18:51:28,419 - INFO - holdout 1 of 5
2019-06-28 18:52:21,523 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-28 18:52:21,536 - INFO - holdout 2 of 5
2019-06-28 18:53:11,992 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-28 18:53:12,007 - INFO - holdout 3 of 5
2019-06-28 18:54:02,090 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 5, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 18:54:02,099 - INFO - holdout 4 of 5
2019-06-28 18:54:52,051 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-28 18:54:52,066 - INFO - holdout 5 of 5
2019-06-28 18:55:41,643 - I

2019-06-28 19:22:16,057 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 19:22:16,064 - INFO - Checking alpha=0.80
2019-06-28 19:22:16,067 - INFO - holdout 1 of 5
2019-06-28 19:23:08,592 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 5, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 19:23:08,601 - INFO - holdout 2 of 5
2019-06-28 19:24:00,077 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-28 19:24:00,091 - INFO - holdout 3 of 5
2019-06-28 19:24:51,524 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 19:24:51,533 - INFO - holdout 4 of 5
2019-06-28 19:25:41,444 - INFO - Learning with best hyperparams: {'criteri

2019-06-28 19:50:56,765 - INFO - holdout 5 of 5
2019-06-28 19:51:48,015 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-28 19:51:48,026 - INFO - Checking alpha=0.90
2019-06-28 19:51:48,028 - INFO - holdout 1 of 5
2019-06-28 19:52:39,675 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 10, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 19:52:39,683 - INFO - holdout 2 of 5
2019-06-28 19:53:31,135 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 19:53:31,145 - INFO - holdout 3 of 5
2019-06-28 19:54:22,247 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-28 19:54:22,260 - INFO - holdout 4 of 5
2019-06-28 19:55:14,298 - INFO -

2019-06-28 20:20:57,265 - INFO - holdout 4 of 5
2019-06-28 20:21:52,805 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 50, 'n_estimators': 5}
2019-06-28 20:21:52,818 - INFO - holdout 5 of 5
2019-06-28 20:22:47,772 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 50, 'n_estimators': 5}
2019-06-28 20:22:47,782 - INFO - experiment finished
2019-06-28 20:22:47,783 - INFO - completed 22.14%
2019-06-28 20:22:47,784 - INFO - Considering ./data/classification/class-Random_forest-tsne-length-3.json
2019-06-28 20:22:47,784 - INFO - does not exist, starting experiment
2019-06-28 20:22:47,786 - INFO - Found cached reduced data. Retrieving tsne-length
2019-06-28 20:22:47,789 - INFO - Checking alpha=0.20
2019-06-28 20:22:47,791 - INFO - holdout 1 of 5
2019-06-28 20:23:42,530 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth

2019-06-28 20:50:53,854 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': 50, 'n_estimators': 5}
2019-06-28 20:50:53,865 - INFO - holdout 2 of 5
2019-06-28 20:51:46,448 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 10, 'max_features': None, 'max_leaf_nodes': None, 'n_estimators': 5}
2019-06-28 20:51:46,462 - INFO - holdout 3 of 5
2019-06-28 20:52:40,424 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 100, 'n_estimators': 10}
2019-06-28 20:52:40,448 - INFO - holdout 4 of 5
2019-06-28 20:53:33,794 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 10, 'max_features': 'log2', 'max_leaf_nodes': 50, 'n_estimators': 5}
2019-06-28 20:53:33,804 - INFO - holdout 5 of 5
2019-06-28 20:54:27,241 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': Non

2019-06-28 21:21:29,691 - INFO - Checking alpha=0.40
2019-06-28 21:21:29,694 - INFO - holdout 1 of 5
2019-06-28 21:22:23,921 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 50, 'n_estimators': 5}
2019-06-28 21:22:23,934 - INFO - holdout 2 of 5
2019-06-28 21:23:17,473 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': None, 'n_estimators': 5}
2019-06-28 21:23:17,485 - INFO - holdout 3 of 5
2019-06-28 21:24:11,600 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 50, 'n_estimators': 5}
2019-06-28 21:24:11,611 - INFO - holdout 4 of 5
2019-06-28 21:25:06,029 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'n_estimators': 5}
2019-06-28 21:25:06,050 - INFO - holdout 5 of 5
2019-06-28 21:26:0

2019-06-28 21:52:12,619 - INFO - holdout 5 of 5
2019-06-28 21:53:06,790 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'n_estimators': 5}
2019-06-28 21:53:06,806 - INFO - Checking alpha=0.50
2019-06-28 21:53:06,808 - INFO - holdout 1 of 5
2019-06-28 21:54:02,741 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 10, 'max_features': 'log2', 'max_leaf_nodes': 100, 'n_estimators': 5}
2019-06-28 21:54:02,752 - INFO - holdout 2 of 5
2019-06-28 21:54:58,836 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'n_estimators': 10}
2019-06-28 21:54:58,877 - INFO - holdout 3 of 5
2019-06-28 21:55:53,852 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 10, 'max_features': None, 'max_leaf_nodes': 100, 'n_estimators': 5}
2019-06-28 21:55:53,867 - INFO - holdout 4 of 5
2019-06-28 21:5

2019-06-28 22:23:06,613 - INFO - holdout 4 of 5
2019-06-28 22:24:00,759 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-28 22:24:00,772 - INFO - holdout 5 of 5
2019-06-28 22:24:55,853 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 22:24:55,859 - INFO - Checking alpha=0.60
2019-06-28 22:24:55,863 - INFO - holdout 1 of 5
2019-06-28 22:25:51,244 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-28 22:25:51,257 - INFO - holdout 2 of 5
2019-06-28 22:26:45,278 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 50, 'n_estimators': 5}
2019-06-28 22:26:45,287 - INFO - holdout 3 of 5
2019-06-28 22:27:39,021 - INF

2019-06-28 22:55:12,798 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 5, 'n_estimators': 5}
2019-06-28 22:55:12,808 - INFO - holdout 4 of 5
2019-06-28 22:56:09,785 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 22:56:09,793 - INFO - holdout 5 of 5
2019-06-28 22:57:04,985 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 5, 'n_estimators': 5}
2019-06-28 22:57:04,993 - INFO - Checking alpha=0.80
2019-06-28 22:57:04,997 - INFO - holdout 1 of 5
2019-06-28 22:57:58,340 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 22:57:58,348 - INFO - holdout 2 of 5
2019-06-28 22:58:52,363 - INFO - Learning with best hyperparams

2019-06-28 23:26:14,131 - INFO - holdout 3 of 5
2019-06-28 23:27:09,107 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': 10, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 23:27:09,115 - INFO - holdout 4 of 5
2019-06-28 23:28:04,845 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-28 23:28:04,860 - INFO - holdout 5 of 5
2019-06-28 23:29:00,236 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 23:29:00,243 - INFO - Checking alpha=0.90
2019-06-28 23:29:00,246 - INFO - holdout 1 of 5
2019-06-28 23:29:55,054 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 2, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 23:29:55,062 - INFO - holdout 2 of 5
2019-06-28 23:30:51,161 -

2019-06-28 23:58:12,787 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 23:58:12,797 - INFO - holdout 3 of 5
2019-06-28 23:59:08,614 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 2, 'max_features': None, 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-28 23:59:08,624 - INFO - holdout 4 of 5
2019-06-29 00:00:05,737 - INFO - Learning with best hyperparams: {'criterion': 'gini', 'max_depth': None, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 5}
2019-06-29 00:00:05,746 - INFO - holdout 5 of 5
2019-06-29 00:00:59,317 - INFO - Learning with best hyperparams: {'criterion': 'entropy', 'max_depth': 5, 'max_features': 'log2', 'max_leaf_nodes': 2, 'n_estimators': 10}
2019-06-29 00:00:59,329 - INFO - experiment finished
2019-06-29 00:00:59,330 - INFO - completed 27.86%
2019-06-29 00:00:59,330 - INFO - Considering ./data/classification/clas

2019-06-29 00:28:28,093 - INFO - already exists, skipping
2019-06-29 00:28:28,093 - INFO - completed 29.29%
2019-06-29 00:28:28,094 - INFO - Considering ./data/classification/class-Naive_Bayes-tsne-hamming-3.json
2019-06-29 00:28:28,094 - INFO - already exists, skipping
2019-06-29 00:28:28,095 - INFO - completed 30.00%
2019-06-29 00:28:28,095 - INFO - Considering ./data/classification/class-Naive_Bayes-tsne-hamming-5.json
2019-06-29 00:28:28,095 - INFO - already exists, skipping
2019-06-29 00:28:28,096 - INFO - completed 30.71%
2019-06-29 00:28:28,096 - INFO - Considering ./data/classification/class-Naive_Bayes-tsne-hamming-10.json
2019-06-29 00:28:28,096 - INFO - already exists, skipping
2019-06-29 00:28:28,097 - INFO - completed 31.43%
2019-06-29 00:28:28,097 - INFO - Considering ./data/classification/class-Naive_Bayes-tsne-hamming-30.json
2019-06-29 00:28:28,097 - INFO - already exists, skipping
2019-06-29 00:28:28,098 - INFO - completed 32.14%
2019-06-29 00:28:28,098 - INFO - Consi

2019-06-29 00:28:28,359 - INFO - holdout 5 of 5
2019-06-29 00:28:28,360 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,363 - INFO - Checking alpha=0.60
2019-06-29 00:28:28,366 - INFO - holdout 1 of 5
2019-06-29 00:28:28,367 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,372 - INFO - holdout 2 of 5
2019-06-29 00:28:28,372 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,377 - INFO - holdout 3 of 5
2019-06-29 00:28:28,377 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,382 - INFO - holdout 4 of 5
2019-06-29 00:28:28,383 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,388 - INFO - holdout 5 of 5
2019-06-29 00:28:28,388 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,391 - INFO - Checking alpha=0.80
2019-06-29 00:28:28,394 - INFO - holdout 1 of 5
2019-06-29 00:28:28,394 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,398 - INFO - holdout 2 of 5
2019-06-29 00:28:28,399 - INFO - L

2019-06-29 00:28:28,695 - INFO - holdout 4 of 5
2019-06-29 00:28:28,696 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,700 - INFO - holdout 5 of 5
2019-06-29 00:28:28,701 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,705 - INFO - Checking alpha=0.60
2019-06-29 00:28:28,707 - INFO - holdout 1 of 5
2019-06-29 00:28:28,708 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,713 - INFO - holdout 2 of 5
2019-06-29 00:28:28,714 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,718 - INFO - holdout 3 of 5
2019-06-29 00:28:28,719 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,724 - INFO - holdout 4 of 5
2019-06-29 00:28:28,725 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,730 - INFO - holdout 5 of 5
2019-06-29 00:28:28,730 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:28,734 - INFO - Checking alpha=0.80
2019-06-29 00:28:28,737 - INFO - holdout 1 of 5
2019-06-29 00:28:28,738 - INFO - L

2019-06-29 00:28:28,997 - INFO - completed 45.71%
2019-06-29 00:28:28,998 - INFO - Considering ./data/classification/class-LDA-tsne-hamming-30.json
2019-06-29 00:28:28,999 - INFO - already exists, skipping
2019-06-29 00:28:28,999 - INFO - completed 46.43%
2019-06-29 00:28:29,000 - INFO - Considering ./data/classification/class-LDA-tsne-jaccard-2.json
2019-06-29 00:28:29,001 - INFO - already exists, skipping
2019-06-29 00:28:29,002 - INFO - completed 47.14%
2019-06-29 00:28:29,002 - INFO - Considering ./data/classification/class-LDA-tsne-jaccard-3.json
2019-06-29 00:28:29,003 - INFO - already exists, skipping
2019-06-29 00:28:29,003 - INFO - completed 47.86%
2019-06-29 00:28:29,004 - INFO - Considering ./data/classification/class-LDA-tsne-jaccard-5.json
2019-06-29 00:28:29,005 - INFO - already exists, skipping
2019-06-29 00:28:29,005 - INFO - completed 48.57%
2019-06-29 00:28:29,006 - INFO - Considering ./data/classification/class-LDA-tsne-jaccard-10.json
2019-06-29 00:28:29,006 - INFO 

2019-06-29 00:28:29,344 - INFO - Checking alpha=0.80
2019-06-29 00:28:29,347 - INFO - holdout 1 of 5
2019-06-29 00:28:29,348 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,352 - INFO - holdout 2 of 5
2019-06-29 00:28:29,353 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,357 - INFO - holdout 3 of 5
2019-06-29 00:28:29,358 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,362 - INFO - holdout 4 of 5
2019-06-29 00:28:29,363 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,368 - INFO - holdout 5 of 5
2019-06-29 00:28:29,369 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,372 - INFO - Checking alpha=0.90
2019-06-29 00:28:29,375 - INFO - holdout 1 of 5
2019-06-29 00:28:29,375 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,381 - INFO - holdout 2 of 5
2019-06-29 00:28:29,381 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,386 - INFO - holdout 3 of 5
2019-06-29 00:28:29,387 - INFO - L

2019-06-29 00:28:29,702 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,706 - INFO - Checking alpha=0.80
2019-06-29 00:28:29,709 - INFO - holdout 1 of 5
2019-06-29 00:28:29,710 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,715 - INFO - holdout 2 of 5
2019-06-29 00:28:29,716 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,722 - INFO - holdout 3 of 5
2019-06-29 00:28:29,723 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,728 - INFO - holdout 4 of 5
2019-06-29 00:28:29,728 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,733 - INFO - holdout 5 of 5
2019-06-29 00:28:29,736 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,740 - INFO - Checking alpha=0.90
2019-06-29 00:28:29,742 - INFO - holdout 1 of 5
2019-06-29 00:28:29,743 - INFO - Learning with best hyperparams: {}
2019-06-29 00:28:29,748 - INFO - holdout 2 of 5
2019-06-29 00:28:29,749 - INFO - Learning with best hyperparams: {}
2019-06-29 00:

2019-06-29 00:28:29,992 - INFO - completed 62.86%
2019-06-29 00:28:29,992 - INFO - Considering ./data/classification/class-MLP-tsne-jaccard-10.json
2019-06-29 00:28:29,993 - INFO - already exists, skipping
2019-06-29 00:28:29,993 - INFO - completed 63.57%
2019-06-29 00:28:29,994 - INFO - Considering ./data/classification/class-MLP-tsne-jaccard-30.json
2019-06-29 00:28:29,996 - INFO - already exists, skipping
2019-06-29 00:28:29,997 - INFO - completed 64.29%
2019-06-29 00:28:29,997 - INFO - Considering ./data/classification/class-MLP-tsne-length-2.json
2019-06-29 00:28:29,998 - INFO - does not exist, starting experiment
2019-06-29 00:28:29,998 - INFO - Found cached reduced data. Retrieving tsne-length
2019-06-29 00:28:30,003 - INFO - Checking alpha=0.20
2019-06-29 00:28:30,006 - INFO - holdout 1 of 5
2019-06-29 00:28:32,459 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [10]}
2019-06-29 00:28:33,065 - INFO - holdout 2 of 5
2019-06-29 00:28:34,735 - INFO - Learning with 

2019-06-29 00:30:29,829 - INFO - holdout 3 of 5
2019-06-29 00:30:31,449 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [6]}
2019-06-29 00:30:32,039 - INFO - holdout 4 of 5
2019-06-29 00:30:33,962 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [2]}
2019-06-29 00:30:34,220 - INFO - holdout 5 of 5
2019-06-29 00:30:36,361 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [10]}
2019-06-29 00:30:36,638 - INFO - Checking alpha=0.80
2019-06-29 00:30:36,641 - INFO - holdout 1 of 5
2019-06-29 00:30:38,677 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [6]}
2019-06-29 00:30:39,245 - INFO - holdout 2 of 5
2019-06-29 00:30:41,223 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [4]}
2019-06-29 00:30:41,811 - INFO - holdout 3 of 5
2019-06-29 00:30:43,548 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [2]}
2019-06-29 00:30:43,842 - INFO - holdout 4 of 5
2019-06-29 00:30:46,007 - INFO - Learning with best 

2019-06-29 00:32:33,772 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [2]}
2019-06-29 00:32:34,414 - INFO - holdout 3 of 5
2019-06-29 00:32:36,659 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [10]}
2019-06-29 00:32:36,948 - INFO - holdout 4 of 5
2019-06-29 00:32:38,905 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [20]}
2019-06-29 00:32:39,281 - INFO - holdout 5 of 5
2019-06-29 00:32:41,858 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [6]}
2019-06-29 00:32:42,235 - INFO - Checking alpha=0.50
2019-06-29 00:32:42,238 - INFO - holdout 1 of 5
2019-06-29 00:32:44,953 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [4]}
2019-06-29 00:32:45,521 - INFO - holdout 2 of 5
2019-06-29 00:32:47,341 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [4]}
2019-06-29 00:32:48,159 - INFO - holdout 3 of 5
2019-06-29 00:32:50,098 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [2]}
2019-0

2019-06-29 00:34:49,030 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [4]}
2019-06-29 00:34:49,652 - INFO - holdout 5 of 5
2019-06-29 00:34:52,729 - INFO - Learning with best hyperparams: {'hidden_layer_sizes': [4]}
2019-06-29 00:34:53,251 - INFO - experiment finished
2019-06-29 00:34:53,252 - INFO - completed 67.86%
2019-06-29 00:34:53,252 - INFO - Considering ./data/classification/class-MLP-tsne-levenshtein-2.json
2019-06-29 00:34:53,253 - INFO - already exists, skipping
2019-06-29 00:34:53,253 - INFO - completed 68.57%
2019-06-29 00:34:53,254 - INFO - Considering ./data/classification/class-MLP-tsne-levenshtein-3.json
2019-06-29 00:34:53,254 - INFO - already exists, skipping
2019-06-29 00:34:53,256 - INFO - completed 69.29%
2019-06-29 00:34:53,256 - INFO - Considering ./data/classification/class-MLP-tsne-levenshtein-5.json
2019-06-29 00:34:53,256 - INFO - already exists, skipping
2019-06-29 00:34:53,257 - INFO - completed 70.00%
2019-06-29 00:34:53,258 - INFO - Con

2019-06-29 00:34:56,018 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:34:56,047 - INFO - holdout 5 of 5
2019-06-29 00:34:56,112 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:34:56,138 - INFO - experiment finished
2019-06-29 00:34:56,139 - INFO - completed 79.29%
2019-06-29 00:34:56,140 - INFO - Considering ./data/classification/class-SVC_(linear)-tsne-length-3.json
2019-06-29 00:34:56,141 - INFO - does not exist, starting experiment
2019-06-29 00:34:56,142 - INFO - Found cached reduced data. Retrieving tsne-length
2019-06-29 00:34:56,145 - INFO - Checking alpha=0.20
2019-06-29 00:34:56,149 - INFO - holdout 1 of 5
2019-06-29 00:34:56,211 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:34:56,240 - INFO - holdout 2 of 5
2019-06-29 00:34:56,305 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:34:56,334 - INFO - holdout 3 of 5
201

2019-06-29 00:35:00,532 - INFO - holdout 2 of 5
2019-06-29 00:35:00,599 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:00,629 - INFO - holdout 3 of 5
2019-06-29 00:35:00,695 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:00,726 - INFO - holdout 4 of 5
2019-06-29 00:35:00,792 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:00,821 - INFO - holdout 5 of 5
2019-06-29 00:35:00,887 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:00,914 - INFO - Checking alpha=0.80
2019-06-29 00:35:00,917 - INFO - holdout 1 of 5
2019-06-29 00:35:00,984 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:01,014 - INFO - holdout 2 of 5
2019-06-29 00:35:01,081 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:01,113 - INFO - holdout 3 of 5
2019-06-29 00:35:01,184

2019-06-29 00:35:04,966 - INFO - holdout 5 of 5
2019-06-29 00:35:05,031 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:05,056 - INFO - Checking alpha=0.40
2019-06-29 00:35:05,060 - INFO - holdout 1 of 5
2019-06-29 00:35:05,118 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:05,143 - INFO - holdout 2 of 5
2019-06-29 00:35:05,202 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:05,229 - INFO - holdout 3 of 5
2019-06-29 00:35:05,289 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:05,317 - INFO - holdout 4 of 5
2019-06-29 00:35:05,378 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:05,405 - INFO - holdout 5 of 5
2019-06-29 00:35:05,468 - INFO - Learning with best hyperparams: {'C': 0.001, 'kernel': 'linear'}
2019-06-29 00:35:05,494 - INFO - Checking alpha=0.50
2019-06-29 00:35:0

2019-06-29 00:35:09,030 - INFO - holdout 3 of 5
2019-06-29 00:35:09,736 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:35:09,783 - INFO - holdout 4 of 5
2019-06-29 00:35:10,521 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:35:10,571 - INFO - holdout 5 of 5
2019-06-29 00:35:11,298 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:35:11,355 - INFO - Checking alpha=0.40
2019-06-29 00:35:11,357 - INFO - holdout 1 of 5
2019-06-29 00:35:12,108 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:35:12,155 - INFO - holdout 2 of 5
2019-06-29 00:35:12,882 - INFO - Learning with best hyperparams: {'C': 10, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:35:12,958 - INFO - holdout 3 of 5
2019-06-29 00:35:13,681 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-0

2019-06-29 00:35:22,357 - INFO - holdout 5 of 5
2019-06-29 00:35:23,129 - INFO - Learning with best hyperparams: {'C': 10, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:35:23,207 - INFO - Checking alpha=0.80
2019-06-29 00:35:23,209 - INFO - holdout 1 of 5
2019-06-29 00:35:23,988 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:35:24,050 - INFO - holdout 2 of 5
2019-06-29 00:35:24,824 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:35:24,887 - INFO - holdout 3 of 5
2019-06-29 00:35:25,629 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:35:25,684 - INFO - holdout 4 of 5
2019-06-29 00:35:26,468 - INFO - Learning with best hyperparams: {'C': 10, 'gamma': 10, 'kernel': 'rbf'}
2019-06-29 00:35:26,550 - INFO - holdout 5 of 5
2019-06-29 00:35:27,277 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:35:27

2019-06-29 00:35:38,267 - INFO - holdout 5 of 5
2019-06-29 00:35:38,954 - INFO - Learning with best hyperparams: {'C': 10, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:35:39,024 - INFO - Checking alpha=0.50
2019-06-29 00:35:39,027 - INFO - holdout 1 of 5
2019-06-29 00:35:39,733 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:35:39,778 - INFO - holdout 2 of 5
2019-06-29 00:35:40,450 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:35:40,495 - INFO - holdout 3 of 5
2019-06-29 00:35:41,171 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:35:41,216 - INFO - holdout 4 of 5
2019-06-29 00:35:41,928 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:35:41,992 - INFO - holdout 5 of 5
2019-06-29 00:35:42,663 - INFO - Learning with best hyperparams: {'C': 10, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 0

2019-06-29 00:35:53,665 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:35:53,718 - INFO - holdout 5 of 5
2019-06-29 00:35:54,508 - INFO - Learning with best hyperparams: {'C': 10, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:35:54,584 - INFO - experiment finished
2019-06-29 00:35:54,584 - INFO - completed 94.29%
2019-06-29 00:35:54,585 - INFO - Considering ./data/classification/class-SVC_(gaussian)-tsne-length-5.json
2019-06-29 00:35:54,585 - INFO - does not exist, starting experiment
2019-06-29 00:35:54,585 - INFO - Found cached reduced data. Retrieving tsne-length
2019-06-29 00:35:54,589 - INFO - Checking alpha=0.20
2019-06-29 00:35:54,591 - INFO - holdout 1 of 5
2019-06-29 00:35:55,296 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:35:55,356 - INFO - holdout 2 of 5
2019-06-29 00:35:56,044 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:35:

2019-06-29 00:36:06,622 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:06,681 - INFO - holdout 2 of 5
2019-06-29 00:36:07,393 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:07,452 - INFO - holdout 3 of 5
2019-06-29 00:36:08,148 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:36:08,196 - INFO - holdout 4 of 5
2019-06-29 00:36:08,919 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:08,980 - INFO - holdout 5 of 5
2019-06-29 00:36:09,687 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:09,747 - INFO - Checking alpha=0.80
2019-06-29 00:36:09,749 - INFO - holdout 1 of 5
2019-06-29 00:36:10,482 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:10,543 - INFO - holdout 2 of 5
2019-06-29 00:36:11,259 - IN

2019-06-29 00:36:22,430 - INFO - holdout 2 of 5
2019-06-29 00:36:23,152 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:36:23,201 - INFO - holdout 3 of 5
2019-06-29 00:36:23,922 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:36:23,970 - INFO - holdout 4 of 5
2019-06-29 00:36:24,676 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:36:24,724 - INFO - holdout 5 of 5
2019-06-29 00:36:25,465 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:25,519 - INFO - Checking alpha=0.50
2019-06-29 00:36:25,521 - INFO - holdout 1 of 5
2019-06-29 00:36:26,270 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:26,331 - INFO - holdout 2 of 5
2019-06-29 00:36:27,070 - INFO - Learning with best hyperparams: {'C': 10, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00

2019-06-29 00:36:40,224 - INFO - holdout 4 of 5
2019-06-29 00:36:40,970 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:36:41,023 - INFO - holdout 5 of 5
2019-06-29 00:36:41,789 - INFO - Learning with best hyperparams: {'C': 10, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:41,857 - INFO - experiment finished
2019-06-29 00:36:41,858 - INFO - completed 95.71%
2019-06-29 00:36:41,858 - INFO - Considering ./data/classification/class-SVC_(gaussian)-tsne-length-30.json
2019-06-29 00:36:41,859 - INFO - does not exist, starting experiment
2019-06-29 00:36:41,859 - INFO - Found cached reduced data. Retrieving tsne-length
2019-06-29 00:36:41,863 - INFO - Checking alpha=0.20
2019-06-29 00:36:41,866 - INFO - holdout 1 of 5
2019-06-29 00:36:42,585 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:42,650 - INFO - holdout 2 of 5
2019-06-29 00:36:43,359 - INFO - Learning with best hyperparams: {'C': 1

2019-06-29 00:36:55,577 - INFO - holdout 4 of 5
2019-06-29 00:36:56,310 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:36:56,357 - INFO - holdout 5 of 5
2019-06-29 00:36:57,077 - INFO - Learning with best hyperparams: {'C': 10, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:57,147 - INFO - Checking alpha=0.80
2019-06-29 00:36:57,150 - INFO - holdout 1 of 5
2019-06-29 00:36:57,900 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:57,959 - INFO - holdout 2 of 5
2019-06-29 00:36:58,697 - INFO - Learning with best hyperparams: {'C': 1, 'gamma': 30, 'kernel': 'rbf'}
2019-06-29 00:36:58,758 - INFO - holdout 3 of 5
2019-06-29 00:36:59,498 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00:36:59,546 - INFO - holdout 4 of 5
2019-06-29 00:37:00,285 - INFO - Learning with best hyperparams: {'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
2019-06-29 00