### This notebook generates Paragraph Vector models with many several configurations and performs classification with randorm forest and 5 fold cross validation.

In [1]:
import os
import json
import statistics
import gensim
import datetime
import numpy as np
import pandas as pd
from collections import Counter
from collections import namedtuple
from collections import OrderedDict
from collections import defaultdict
from smart_open import smart_open
from gensim.models import doc2vec
import multiprocessing
from gensim.models.doc2vec import TaggedDocument
from gensim.test.test_doc2vec import ConcatenatedDoc2Vec

In [4]:
cores = multiprocessing.cpu_count()
assert gensim.models.doc2vec.FAST_VERSION > -1, "This will be painfully slow otherwise"
print('num of cores', cores)

num of cores 32


In [5]:
def load_label(partition=True, verbose=False):
    """load the labels (age, gender, YMRS)
    """
    # para partition: whether to partition labels into train/dev sets
    # para verbose: whether or not to output more statistical results
    # return: YMRS score and Mania level for train/dev set
    # return: YMRS score and Mania level for all dataset (if not partition)
    label = pd.read_csv("/home/ceccarelli/Work/Bipolar/labels_metadata.csv")
    id_list = label['SubjectID'].tolist()

    id_set = set()
    age_list = list()
    for id in id_list:
        id_set.add(id)
        age_list.extend(label[label.SubjectID == id]['Age'].tolist())

    gender_list = list()
    for sub in id_set:
        gender_list.append(sub[:1])
        if verbose:
            print("%s subject have %d instances" % (sub, id_list.count(sub)))
    
    classes_stats = Counter(label['ManiaLevel'].tolist())

    if verbose:
        print("All subjects", len(id_set))
        print("Male subjects ", gender_list.count('M'))
        print("Female subjects", gender_list.count('F'))
        print("Age range (%d, %d), Age median %d" % (min(age_list), max(age_list), statistics.median(age_list)))
        print("Class distribution stats", classes_stats)

    ymrs_score = pd.concat([label.iloc[:, 0], label.iloc[:, 4]], axis=1)
    mania_level = pd.concat([label.iloc[:, 0], label.iloc[:, 5]], axis=1)
    if partition:
        ymrs_dev = ymrs_score.iloc[:60, :]
        ymrs_train = ymrs_score.iloc[60:, :]
        level_dev = mania_level.iloc[:60, :]
        level_train = mania_level.iloc[60:, :]
        return ymrs_dev.values[:, 1], ymrs_train.values[:, 1], level_dev.values[:, 1], level_train.values[:, 1]
    else:
        return ymrs_score, mania_level, 0, 0

In [24]:
def infer_embedding(model, partition):
      
    save_dir = '../RESULTS/PV-DBOW' 
    name = str(model.vector_size)+'_'+str(model.window)+'_'+str(model.negative)+'_'+str(model.hs)
    save_dir = os.path.join(save_dir, name)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
        print("Directory " , save_dir ,  " Created ")
    else:    
        print("Directory " , save_dir ,  " already exists")
    
    infer_docs = []
    labels = dict()
    _, _, level_dev, level_train = load_label()
    labels['train'] = level_train
    labels['dev'] = level_dev
    in_path = '/home/ceccarelli/Work/Bipolar/transcripts_processed/'
    if (partition=='train'):
        doc = os.path.join(in_path, 'Train_transcripts.txt')
    else:
        doc = os.path.join(in_path, 'Dev_transcripts.txt')
        
    with smart_open(doc, 'rb', encoding='utf-8') as all_data:
        for line_no, line in enumerate(all_data):
            tokens = gensim.utils.to_unicode(line).split()
            words = tokens
            tags = [line_no]
            sentiment = [labels[partition][line_no]]
            infer_docs.append(interview_transcript(words, tags, sentiment))
        
    infer_vecs = [model.infer_vector(doc.words, alpha=.1) for doc in infer_docs]
    infer_labels = [doc.sentiment for doc in infer_docs]

    # save inferred vectors and labels
    print("\nsaving inferred vectors and labels to file")
    if os.path.isdir(save_dir):
            np.save(os.path.join(save_dir, 'vectors_%s' % partition), infer_vecs)
            np.save(os.path.join(save_dir, 'labels_%s' % partition), infer_labels)

In [51]:
def infer_embedding_combined(model, model1, partition, path):
    
      
    save_dir = '../RESULTS/'+path
    name = str(model1.vector_size)+'_'+str(model1.window)+'_'+str(model1.negative)+'_'+str(model1.hs)
    save_dir = os.path.join(save_dir, name)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
        print("Directory " , save_dir ,  " Created ")
    else:    
        print("Directory " , save_dir ,  " already exists")
    
    infer_docs = []
    labels = dict()
    _, _, level_dev, level_train = load_label()
    labels['train'] = level_train
    labels['dev'] = level_dev
    in_path = '/home/ceccarelli/Work/Bipolar/transcripts_processed/'
    if (partition=='train'):
        doc = os.path.join(in_path, 'Train_transcripts.txt')
    else:
        doc = os.path.join(in_path, 'Dev_transcripts.txt')
        
    with smart_open(doc, 'rb', encoding='utf-8') as all_data:
        for line_no, line in enumerate(all_data):
            tokens = gensim.utils.to_unicode(line).split()
            words = tokens
            tags = [line_no]
            sentiment = [labels[partition][line_no]]
            infer_docs.append(interview_transcript(words, tags, sentiment))
        
    infer_vecs = [model.infer_vector(doc.words, alpha=.1) for doc in infer_docs]
    infer_labels = [doc.sentiment for doc in infer_docs]

    # save inferred vectors and labels
    print("\nsaving inferred vectors and labels to file")
    if os.path.isdir(save_dir):
            np.save(os.path.join(save_dir, 'vectors_%s' % partition), infer_vecs)
            np.save(os.path.join(save_dir, 'labels_%s' % partition), infer_labels)

In [8]:
interview_transcript = namedtuple('Interview_Transcript', 'words tags sentiment')
labels = dict()
_, _, level_dev, level_train = load_label()
labels['train'] = level_train
labels['dev'] = level_dev

In [9]:
in_path = '/home/ceccarelli/Work/Bipolar/transcripts_processed/'
all_docs = []
for partition in ['train', 'dev']:
    if (partition=='train'):
        doc = os.path.join(in_path, 'Train_transcripts.txt')
    else:
        doc = os.path.join(in_path, 'Dev_transcripts.txt')
    with smart_open(doc, 'rb', encoding='utf-8') as all_data:
        for line_no, line in enumerate(all_data):
            tokens = gensim.utils.to_unicode(line).split()
            words = tokens
            tags = [line_no]
            sentiment = [labels[partition][line_no]] if partition != 'test' else [None]
            all_docs.append(interview_transcript(words, tags, sentiment))
            
corpus = False
if (corpus == True):
    corpus_path = '/storage/gluster/vol1/SHARED/HOMEFOLDERS/ceccarelli/Work/Bipolar/turkish_corpus.txt'
    with smart_open(corpus_path, 'rb', encoding='utf-8') as all_data:
        for line_no, line in enumerate(all_data):
            tokens = gensim.utils.to_unicode(line).split()
            words = tokens
            tags = [line_no]
            sentiment = [None]
            all_docs.append(interview_transcript(words, tags, sentiment))

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


In [10]:
#vector_size [25,50,75,100]
#window [5,10]
#negative [5,10]
#hs [0,1]
len(all_docs)

164

In [11]:
#dm averaging
models = [
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0)
]

In [12]:
count = 0
for model in models:
    print("\nbuilding vocabulary for doc2vec model %i" %count)
    model.build_vocab(all_docs)
    print("\nvocabulary scanned & built.")
    count += 1


building vocabulary for doc2vec model 0

vocabulary scanned & built.

building vocabulary for doc2vec model 1

vocabulary scanned & built.

building vocabulary for doc2vec model 2

vocabulary scanned & built.

building vocabulary for doc2vec model 3

vocabulary scanned & built.

building vocabulary for doc2vec model 4

vocabulary scanned & built.

building vocabulary for doc2vec model 5

vocabulary scanned & built.

building vocabulary for doc2vec model 6

vocabulary scanned & built.

building vocabulary for doc2vec model 7

vocabulary scanned & built.

building vocabulary for doc2vec model 8

vocabulary scanned & built.

building vocabulary for doc2vec model 9

vocabulary scanned & built.

building vocabulary for doc2vec model 10

vocabulary scanned & built.

building vocabulary for doc2vec model 11

vocabulary scanned & built.

building vocabulary for doc2vec model 12

vocabulary scanned & built.

building vocabulary for doc2vec model 13

vocabulary scanned & built.

building vocabu

In [13]:
from gensim.models.callbacks import CallbackAny2Vec
class EpochLogger(CallbackAny2Vec):
    '''Callback to log information about training'''
    
    def __init__(self):
        self.epoch = 0
    
    def on_epoch_begin(self, model):
        print("Epoch #{} start".format(self.epoch))
    
    def on_epoch_end(self, model):
        print("Epoch #{} end".format(self.epoch))
        self.epoch += 1

In [14]:
epoch_logger = EpochLogger()
count = 0
for model in models:
    model.train(all_docs, total_examples=len(all_docs), epochs=model.epochs,callbacks=[epoch_logger])
    print("\nmodel %i trained" %count)
    count += 1
    

Epoch #0 start
Epoch #0 end
Epoch #1 start
Epoch #1 end
Epoch #2 start
Epoch #2 end
Epoch #3 start
Epoch #3 end
Epoch #4 start
Epoch #4 end
Epoch #5 start
Epoch #5 end
Epoch #6 start
Epoch #6 end
Epoch #7 start
Epoch #7 end
Epoch #8 start
Epoch #8 end
Epoch #9 start
Epoch #9 end
Epoch #10 start
Epoch #10 end
Epoch #11 start
Epoch #11 end
Epoch #12 start
Epoch #12 end
Epoch #13 start
Epoch #13 end
Epoch #14 start
Epoch #14 end
Epoch #15 start
Epoch #15 end
Epoch #16 start
Epoch #16 end
Epoch #17 start
Epoch #17 end
Epoch #18 start
Epoch #18 end
Epoch #19 start
Epoch #19 end
Epoch #20 start
Epoch #20 end
Epoch #21 start
Epoch #21 end
Epoch #22 start
Epoch #22 end
Epoch #23 start
Epoch #23 end
Epoch #24 start
Epoch #24 end
Epoch #25 start
Epoch #25 end
Epoch #26 start
Epoch #26 end
Epoch #27 start
Epoch #27 end
Epoch #28 start
Epoch #28 end
Epoch #29 start
Epoch #29 end
Epoch #30 start
Epoch #30 end
Epoch #31 start
Epoch #31 end
Epoch #32 start
Epoch #32 end
Epoch #33 start
Epoch #33 end


Epoch #265 end
Epoch #266 start
Epoch #266 end
Epoch #267 start
Epoch #267 end
Epoch #268 start
Epoch #268 end
Epoch #269 start
Epoch #269 end
Epoch #270 start
Epoch #270 end
Epoch #271 start
Epoch #271 end
Epoch #272 start
Epoch #272 end
Epoch #273 start
Epoch #273 end
Epoch #274 start
Epoch #274 end
Epoch #275 start
Epoch #275 end
Epoch #276 start
Epoch #276 end
Epoch #277 start
Epoch #277 end
Epoch #278 start
Epoch #278 end
Epoch #279 start
Epoch #279 end
Epoch #280 start
Epoch #280 end
Epoch #281 start
Epoch #281 end
Epoch #282 start
Epoch #282 end
Epoch #283 start
Epoch #283 end
Epoch #284 start
Epoch #284 end
Epoch #285 start
Epoch #285 end
Epoch #286 start
Epoch #286 end
Epoch #287 start
Epoch #287 end
Epoch #288 start
Epoch #288 end
Epoch #289 start
Epoch #289 end
Epoch #290 start
Epoch #290 end
Epoch #291 start
Epoch #291 end
Epoch #292 start
Epoch #292 end
Epoch #293 start
Epoch #293 end
Epoch #294 start
Epoch #294 end
Epoch #295 start
Epoch #295 end
Epoch #296 start
Epoch #2

Epoch #522 end
Epoch #523 start
Epoch #523 end
Epoch #524 start
Epoch #524 end
Epoch #525 start
Epoch #525 end
Epoch #526 start
Epoch #526 end
Epoch #527 start
Epoch #527 end
Epoch #528 start
Epoch #528 end
Epoch #529 start
Epoch #529 end
Epoch #530 start
Epoch #530 end
Epoch #531 start
Epoch #531 end
Epoch #532 start
Epoch #532 end
Epoch #533 start
Epoch #533 end
Epoch #534 start
Epoch #534 end
Epoch #535 start
Epoch #535 end
Epoch #536 start
Epoch #536 end
Epoch #537 start
Epoch #537 end
Epoch #538 start
Epoch #538 end
Epoch #539 start
Epoch #539 end
Epoch #540 start
Epoch #540 end
Epoch #541 start
Epoch #541 end
Epoch #542 start
Epoch #542 end
Epoch #543 start
Epoch #543 end
Epoch #544 start
Epoch #544 end
Epoch #545 start
Epoch #545 end
Epoch #546 start
Epoch #546 end
Epoch #547 start
Epoch #547 end
Epoch #548 start
Epoch #548 end
Epoch #549 start
Epoch #549 end
Epoch #550 start
Epoch #550 end
Epoch #551 start
Epoch #551 end
Epoch #552 start
Epoch #552 end
Epoch #553 start
Epoch #5

Epoch #777 end
Epoch #778 start
Epoch #778 end
Epoch #779 start
Epoch #779 end
Epoch #780 start
Epoch #780 end
Epoch #781 start
Epoch #781 end
Epoch #782 start
Epoch #782 end
Epoch #783 start
Epoch #783 end
Epoch #784 start
Epoch #784 end
Epoch #785 start
Epoch #785 end
Epoch #786 start
Epoch #786 end
Epoch #787 start
Epoch #787 end
Epoch #788 start
Epoch #788 end
Epoch #789 start
Epoch #789 end
Epoch #790 start
Epoch #790 end
Epoch #791 start
Epoch #791 end
Epoch #792 start
Epoch #792 end
Epoch #793 start
Epoch #793 end
Epoch #794 start
Epoch #794 end
Epoch #795 start
Epoch #795 end
Epoch #796 start
Epoch #796 end
Epoch #797 start
Epoch #797 end
Epoch #798 start
Epoch #798 end
Epoch #799 start
Epoch #799 end

model 7 trained
Epoch #800 start
Epoch #800 end
Epoch #801 start
Epoch #801 end
Epoch #802 start
Epoch #802 end
Epoch #803 start
Epoch #803 end
Epoch #804 start
Epoch #804 end
Epoch #805 start
Epoch #805 end
Epoch #806 start
Epoch #806 end
Epoch #807 start
Epoch #807 end
Epoch #8

Epoch #1031 end
Epoch #1032 start
Epoch #1032 end
Epoch #1033 start
Epoch #1033 end
Epoch #1034 start
Epoch #1034 end
Epoch #1035 start
Epoch #1035 end
Epoch #1036 start
Epoch #1036 end
Epoch #1037 start
Epoch #1037 end
Epoch #1038 start
Epoch #1038 end
Epoch #1039 start
Epoch #1039 end
Epoch #1040 start
Epoch #1040 end
Epoch #1041 start
Epoch #1041 end
Epoch #1042 start
Epoch #1042 end
Epoch #1043 start
Epoch #1043 end
Epoch #1044 start
Epoch #1044 end
Epoch #1045 start
Epoch #1045 end
Epoch #1046 start
Epoch #1046 end
Epoch #1047 start
Epoch #1047 end
Epoch #1048 start
Epoch #1048 end
Epoch #1049 start
Epoch #1049 end
Epoch #1050 start
Epoch #1050 end
Epoch #1051 start
Epoch #1051 end
Epoch #1052 start
Epoch #1052 end
Epoch #1053 start
Epoch #1053 end
Epoch #1054 start
Epoch #1054 end
Epoch #1055 start
Epoch #1055 end
Epoch #1056 start
Epoch #1056 end
Epoch #1057 start
Epoch #1057 end
Epoch #1058 start
Epoch #1058 end
Epoch #1059 start
Epoch #1059 end
Epoch #1060 start
Epoch #1060 en

Epoch #1271 end
Epoch #1272 start
Epoch #1272 end
Epoch #1273 start
Epoch #1273 end
Epoch #1274 start
Epoch #1274 end
Epoch #1275 start
Epoch #1275 end
Epoch #1276 start
Epoch #1276 end
Epoch #1277 start
Epoch #1277 end
Epoch #1278 start
Epoch #1278 end
Epoch #1279 start
Epoch #1279 end
Epoch #1280 start
Epoch #1280 end
Epoch #1281 start
Epoch #1281 end
Epoch #1282 start
Epoch #1282 end
Epoch #1283 start
Epoch #1283 end
Epoch #1284 start
Epoch #1284 end
Epoch #1285 start
Epoch #1285 end
Epoch #1286 start
Epoch #1286 end
Epoch #1287 start
Epoch #1287 end
Epoch #1288 start
Epoch #1288 end
Epoch #1289 start
Epoch #1289 end
Epoch #1290 start
Epoch #1290 end
Epoch #1291 start
Epoch #1291 end
Epoch #1292 start
Epoch #1292 end
Epoch #1293 start
Epoch #1293 end
Epoch #1294 start
Epoch #1294 end
Epoch #1295 start
Epoch #1295 end
Epoch #1296 start
Epoch #1296 end
Epoch #1297 start
Epoch #1297 end
Epoch #1298 start
Epoch #1298 end
Epoch #1299 start
Epoch #1299 end

model 12 trained
Epoch #1300 st

Epoch #1511 end
Epoch #1512 start
Epoch #1512 end
Epoch #1513 start
Epoch #1513 end
Epoch #1514 start
Epoch #1514 end
Epoch #1515 start
Epoch #1515 end
Epoch #1516 start
Epoch #1516 end
Epoch #1517 start
Epoch #1517 end
Epoch #1518 start
Epoch #1518 end
Epoch #1519 start
Epoch #1519 end
Epoch #1520 start
Epoch #1520 end
Epoch #1521 start
Epoch #1521 end
Epoch #1522 start
Epoch #1522 end
Epoch #1523 start
Epoch #1523 end
Epoch #1524 start
Epoch #1524 end
Epoch #1525 start
Epoch #1525 end
Epoch #1526 start
Epoch #1526 end
Epoch #1527 start
Epoch #1527 end
Epoch #1528 start
Epoch #1528 end
Epoch #1529 start
Epoch #1529 end
Epoch #1530 start
Epoch #1530 end
Epoch #1531 start
Epoch #1531 end
Epoch #1532 start
Epoch #1532 end
Epoch #1533 start
Epoch #1533 end
Epoch #1534 start
Epoch #1534 end
Epoch #1535 start
Epoch #1535 end
Epoch #1536 start
Epoch #1536 end
Epoch #1537 start
Epoch #1537 end
Epoch #1538 start
Epoch #1538 end
Epoch #1539 start
Epoch #1539 end
Epoch #1540 start
Epoch #1540 en

Epoch #1751 end
Epoch #1752 start
Epoch #1752 end
Epoch #1753 start
Epoch #1753 end
Epoch #1754 start
Epoch #1754 end
Epoch #1755 start
Epoch #1755 end
Epoch #1756 start
Epoch #1756 end
Epoch #1757 start
Epoch #1757 end
Epoch #1758 start
Epoch #1758 end
Epoch #1759 start
Epoch #1759 end
Epoch #1760 start
Epoch #1760 end
Epoch #1761 start
Epoch #1761 end
Epoch #1762 start
Epoch #1762 end
Epoch #1763 start
Epoch #1763 end
Epoch #1764 start
Epoch #1764 end
Epoch #1765 start
Epoch #1765 end
Epoch #1766 start
Epoch #1766 end
Epoch #1767 start
Epoch #1767 end
Epoch #1768 start
Epoch #1768 end
Epoch #1769 start
Epoch #1769 end
Epoch #1770 start
Epoch #1770 end
Epoch #1771 start
Epoch #1771 end
Epoch #1772 start
Epoch #1772 end
Epoch #1773 start
Epoch #1773 end
Epoch #1774 start
Epoch #1774 end
Epoch #1775 start
Epoch #1775 end
Epoch #1776 start
Epoch #1776 end
Epoch #1777 start
Epoch #1777 end
Epoch #1778 start
Epoch #1778 end
Epoch #1779 start
Epoch #1779 end
Epoch #1780 start
Epoch #1780 en

Epoch #1991 end
Epoch #1992 start
Epoch #1992 end
Epoch #1993 start
Epoch #1993 end
Epoch #1994 start
Epoch #1994 end
Epoch #1995 start
Epoch #1995 end
Epoch #1996 start
Epoch #1996 end
Epoch #1997 start
Epoch #1997 end
Epoch #1998 start
Epoch #1998 end
Epoch #1999 start
Epoch #1999 end

model 19 trained
Epoch #2000 start
Epoch #2000 end
Epoch #2001 start
Epoch #2001 end
Epoch #2002 start
Epoch #2002 end
Epoch #2003 start
Epoch #2003 end
Epoch #2004 start
Epoch #2004 end
Epoch #2005 start
Epoch #2005 end
Epoch #2006 start
Epoch #2006 end
Epoch #2007 start
Epoch #2007 end
Epoch #2008 start
Epoch #2008 end
Epoch #2009 start
Epoch #2009 end
Epoch #2010 start
Epoch #2010 end
Epoch #2011 start
Epoch #2011 end
Epoch #2012 start
Epoch #2012 end
Epoch #2013 start
Epoch #2013 end
Epoch #2014 start
Epoch #2014 end
Epoch #2015 start
Epoch #2015 end
Epoch #2016 start
Epoch #2016 end
Epoch #2017 start
Epoch #2017 end
Epoch #2018 start
Epoch #2018 end
Epoch #2019 start
Epoch #2019 end
Epoch #2020 st

Epoch #2233 end
Epoch #2234 start
Epoch #2234 end
Epoch #2235 start
Epoch #2235 end
Epoch #2236 start
Epoch #2236 end
Epoch #2237 start
Epoch #2237 end
Epoch #2238 start
Epoch #2238 end
Epoch #2239 start
Epoch #2239 end
Epoch #2240 start
Epoch #2240 end
Epoch #2241 start
Epoch #2241 end
Epoch #2242 start
Epoch #2242 end
Epoch #2243 start
Epoch #2243 end
Epoch #2244 start
Epoch #2244 end
Epoch #2245 start
Epoch #2245 end
Epoch #2246 start
Epoch #2246 end
Epoch #2247 start
Epoch #2247 end
Epoch #2248 start
Epoch #2248 end
Epoch #2249 start
Epoch #2249 end
Epoch #2250 start
Epoch #2250 end
Epoch #2251 start
Epoch #2251 end
Epoch #2252 start
Epoch #2252 end
Epoch #2253 start
Epoch #2253 end
Epoch #2254 start
Epoch #2254 end
Epoch #2255 start
Epoch #2255 end
Epoch #2256 start
Epoch #2256 end
Epoch #2257 start
Epoch #2257 end
Epoch #2258 start
Epoch #2258 end
Epoch #2259 start
Epoch #2259 end
Epoch #2260 start
Epoch #2260 end
Epoch #2261 start
Epoch #2261 end
Epoch #2262 start
Epoch #2262 en

In [18]:
count = 0
for model in models:
    infer_embedding(model, 'train')
    infer_embedding(model, 'dev')
    print("\nembedding inffered for model %i" %count)
    count += 1

Directory  ../RESULTS/PV-DM-AV/25_5_5_0  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DM-AV/25_5_5_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 0
Directory  ../RESULTS/PV-DM-AV/25_5_10_0  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DM-AV/25_5_10_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 1
Directory  ../RESULTS/PV-DM-AV/25_5_0_1  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DM-AV/25_5_0_1  already exists

saving inferred vectors and labels to file

embedding inffered for model 2
Directory  ../RESULTS/PV-DM-AV/25_10_5_0  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DM-AV/25_10_5_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 3
Directory  ../RESULTS/PV-DM-AV/25_10_10_0  Created 

saving inferred vectors and labels to f

In [19]:
#dm conc
models = [
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0)
]

In [20]:
count = 0
for model in models:
    print("\nbuilding vocabulary for doc2vec model %i" %count)
    model.build_vocab(all_docs)
    print("\nvocabulary scanned & built.")
    count += 1


building vocabulary for doc2vec model 0

vocabulary scanned & built.

building vocabulary for doc2vec model 1

vocabulary scanned & built.

building vocabulary for doc2vec model 2

vocabulary scanned & built.

building vocabulary for doc2vec model 3

vocabulary scanned & built.

building vocabulary for doc2vec model 4

vocabulary scanned & built.

building vocabulary for doc2vec model 5

vocabulary scanned & built.

building vocabulary for doc2vec model 6

vocabulary scanned & built.

building vocabulary for doc2vec model 7

vocabulary scanned & built.

building vocabulary for doc2vec model 8

vocabulary scanned & built.

building vocabulary for doc2vec model 9

vocabulary scanned & built.

building vocabulary for doc2vec model 10

vocabulary scanned & built.

building vocabulary for doc2vec model 11

vocabulary scanned & built.

building vocabulary for doc2vec model 12

vocabulary scanned & built.

building vocabulary for doc2vec model 13

vocabulary scanned & built.

building vocabu

In [22]:
from gensim.models.callbacks import CallbackAny2Vec
class EpochLogger(CallbackAny2Vec):
    '''Callback to log information about training'''
    
    def __init__(self):
        self.epoch = 0
    
    def on_epoch_begin(self, model):
        print("Epoch #{} start".format(self.epoch))
    
    def on_epoch_end(self, model):
        print("Epoch #{} end".format(self.epoch))
        self.epoch += 1
epoch_logger = EpochLogger()
count = 0
for model in models:
    model.train(all_docs, total_examples=len(all_docs), epochs=model.epochs)
    print("\nmodel %i trained" %count)
    count += 1


model 0 trained

model 1 trained

model 2 trained

model 3 trained

model 4 trained

model 5 trained

model 6 trained

model 7 trained

model 8 trained

model 9 trained

model 10 trained

model 11 trained

model 12 trained

model 13 trained

model 14 trained

model 15 trained

model 16 trained

model 17 trained

model 18 trained

model 19 trained

model 20 trained

model 21 trained

model 22 trained

model 23 trained


In [23]:
count = 0
for model in models:
    infer_embedding(model, 'train')
    infer_embedding(model, 'dev')
    print("\nembedding inffered for model %i" %count)
    count += 1

Directory  ../RESULTS/PV-DM-CONC/25_5_5_0  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DM-CONC/25_5_5_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 0
Directory  ../RESULTS/PV-DM-CONC/25_5_10_0  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DM-CONC/25_5_10_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 1
Directory  ../RESULTS/PV-DM-CONC/25_5_0_1  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DM-CONC/25_5_0_1  already exists

saving inferred vectors and labels to file

embedding inffered for model 2
Directory  ../RESULTS/PV-DM-CONC/25_10_5_0  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DM-CONC/25_10_5_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 3
Directory  ../RESULTS/PV-DM-CONC/25_10_10_0  Created 

saving inferred vecto

In [25]:
#dbow 
models = [
    doc2vec.Doc2Vec(dm=0, vector_size=25, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=25, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=25, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=0, vector_size=50, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=50, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=50, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=0, vector_size=75, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=75, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=75, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=0, vector_size=100, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=100, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=100, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
]

In [26]:
count = 0
for model in models:
    print("\nbuilding vocabulary for doc2vec model %i" %count)
    model.build_vocab(all_docs)
    print("\nvocabulary scanned & built.")
    count += 1


building vocabulary for doc2vec model 0

vocabulary scanned & built.

building vocabulary for doc2vec model 1

vocabulary scanned & built.

building vocabulary for doc2vec model 2

vocabulary scanned & built.

building vocabulary for doc2vec model 3

vocabulary scanned & built.

building vocabulary for doc2vec model 4

vocabulary scanned & built.

building vocabulary for doc2vec model 5

vocabulary scanned & built.

building vocabulary for doc2vec model 6

vocabulary scanned & built.

building vocabulary for doc2vec model 7

vocabulary scanned & built.

building vocabulary for doc2vec model 8

vocabulary scanned & built.

building vocabulary for doc2vec model 9

vocabulary scanned & built.

building vocabulary for doc2vec model 10

vocabulary scanned & built.

building vocabulary for doc2vec model 11

vocabulary scanned & built.


In [27]:
from gensim.models.callbacks import CallbackAny2Vec
class EpochLogger(CallbackAny2Vec):
    '''Callback to log information about training'''
    
    def __init__(self):
        self.epoch = 0
    
    def on_epoch_begin(self, model):
        print("Epoch #{} start".format(self.epoch))
    
    def on_epoch_end(self, model):
        print("Epoch #{} end".format(self.epoch))
        self.epoch += 1
epoch_logger = EpochLogger()
count = 0
for model in models:
    model.train(all_docs, total_examples=len(all_docs), epochs=model.epochs)
    print("\nmodel %i trained" %count)
    count += 1


model 0 trained

model 1 trained

model 2 trained

model 3 trained

model 4 trained

model 5 trained

model 6 trained

model 7 trained

model 8 trained

model 9 trained

model 10 trained

model 11 trained


In [28]:
count = 0
for model in models:
    infer_embedding(model, 'train')
    infer_embedding(model, 'dev')
    print("\nembedding inffered for model %i" %count)
    count += 1

Directory  ../RESULTS/PV-DBOW/25_5_5_0  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DBOW/25_5_5_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 0
Directory  ../RESULTS/PV-DBOW/25_5_10_0  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DBOW/25_5_10_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 1
Directory  ../RESULTS/PV-DBOW/25_5_0_1  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DBOW/25_5_0_1  already exists

saving inferred vectors and labels to file

embedding inffered for model 2
Directory  ../RESULTS/PV-DBOW/50_5_5_0  Created 

saving inferred vectors and labels to file
Directory  ../RESULTS/PV-DBOW/50_5_5_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 3
Directory  ../RESULTS/PV-DBOW/50_5_10_0  Created 

saving inferred vectors and labels to file
Director

In [38]:
import itertools as it
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import precision_recall_fscore_support


class RandomForest():
   
    def __init__(self, feature_name, X_train, y_train, X_dev, y_dev, line):
        self.config = json.load(open('../model.json', 'r'))
        self.model_name = 'RF'
        self.feature_name = feature_name
        self.X_train = X_train
        self.X_dev = X_dev
        self.y_train = y_train
        self.y_dev = y_dev
        self.parameters = dict()
        self.parameters['n_estimators'] = None
        self.parameters['max_features'] = None
        self.parameters['max_depth'] = None
        self.parameters['criterion'] = None
        self.model = None
        self.line = line

    def run(self):
        """main function for the model"""
        filename = os.path.join(self.line, '%s_%s_params.json' % (self.model_name, self.feature_name))

        if os.path.isfile(filename):
            self.parameters = json.load(open(filename, 'r'))
        
        if not self.parameters['n_estimators'] or not self.parameters['max_features'] or not self.parameters['max_depth'] or not self.parameters['criterion']:
            print("\nhyperparameters are not tuned yet")
            self.tune_dev()
        
        # build RF model
        self.model = RandomForestClassifier(
            n_estimators=self.parameters['n_estimators'], 
            max_features=self.parameters['max_features'], 
            max_depth=self.parameters['max_depth'], 
            criterion=self.parameters['criterion'], 
            verbose=1, n_jobs=-1,
            class_weight="balanced")
        self.train()

    def train(self):
        """train the model
        """
        print("\ntraining a Random Forest Classifier ...")
        self.model.fit(self.X_train, self.y_train)

    def evaluate(self):
        """evaluate the model
        """
        print("\nevaluating the Random Forest Classifier ...")
        y_pred_train = self.model.predict(self.X_train)
        y_pred_dev = self.model.predict(self.X_dev)
        
        precision, recall, fscore, _ = precision_recall_fscore_support(self.y_dev, y_pred_dev, average='macro')
        
        filename = os.path.join(self.line, '%s_%s_score.json' % (self.model_name, self.feature_name))
        file = open(filename,"w") 
        file.write("\nprecision on dev set: %.4f" % precision) 
        file.write("\nrecall on dev set: %.4f" % recall) 
        file.write("\nfscore on dev set: %.4f" % fscore) 
        file.write("\naccuracy on dev set: %.4f" % metrics.accuracy_score(y_pred_dev, y_dev)) 
        file.close() 

        print("\naccuracy on training set: %.4f" % metrics.accuracy_score(y_pred_train, self.y_train))
        print("\naccuracy on development set: %.4f" % metrics.accuracy_score(y_pred_dev, self.y_dev))
        print("\nprecision on dev set: %.4f" % precision)
        print("\nrecall on dev set: %.4f" % recall)
        print("\nfscore on dev set: %.4f" % fscore)

        
        return y_pred_train, y_pred_dev

    def tune_dev(self):
        """fine tune hyperparameters for the model with given dev set
        """
        parameters = {
            "n_estimators": self.config['baseline']['random_forest']['n_estimators'],
            "max_features": self.config['baseline']['random_forest']['max_features'],
            "max_depth": self.config['baseline']['random_forest']['max_depth'],
            "criterion": ["entropy"]
        }
        print("\nrunning the validation on development set ...")
        allnames = sorted(parameters)
        parameters_set = list(it.product(*(parameters[name] for name in allnames)))
        results = np.zeros((len(parameters_set), 5))

        for i in range(len(parameters_set)):
            para = parameters_set[i]
            clf = RandomForestClassifier(
                    n_estimators=para[3],
                    max_features=para[2], 
                    max_depth=para[1], 
                    criterion=para[0], 
                    verbose=1, n_jobs=-1,
                    class_weight="balanced")
            
            for j in range(5):
                clf.fit(self.X_train, self.y_train)
                y_pred_dev = clf.predict(self.X_dev)
                recall = metrics.recall_score(self.y_dev, y_pred_dev, average='macro')
                print("\nrecall for this hyparameter setting is %.3f\n" % recall)
                results[i,j] = recall
        
        results_avg = [np.mean(res) for res in results]
        parameters_id = np.argmax(results_avg)

        self.parameters['n_estimators'] = parameters_set[parameters_id][3]
        self.parameters['max_features'] = parameters_set[parameters_id][2]
        self.parameters['max_depth'] = parameters_set[parameters_id][1]
        self.parameters['criterion'] = parameters_set[parameters_id][0]

        filename = os.path.join(self.line, '%s_%s_params.json' % (self.model_name, self.feature_name))
        
        # write to model json file
        with open(filename, 'w') as output:
            json.dump(self.parameters, output)
            output.write("\n")
        output.close()

    

In [6]:
import itertools as it
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import precision_recall_fscore_support

class RandomForestCV():
   
    def __init__(self, feature_name, X_train, y_train, X_dev, y_dev, line):
        self.config = json.load(open('../model.json', 'r'))
        self.model_name = 'RF_CV'
        self.feature_name = feature_name
        self.X_train = X_train
        self.X_dev = X_dev
        self.y_train = y_train
        self.y_dev = y_dev
        self.parameters = dict()
        self.parameters['n_estimators'] = None
        self.parameters['max_features'] = None
        self.parameters['max_depth'] = None
        self.parameters['criterion'] = None
        self.model = None
        self.line = line

    def run(self):
        """main function for the model"""
        filename = os.path.join(self.line, '%s_%s_params.json' % (self.model_name, self.feature_name))

        if os.path.isfile(filename):
            self.parameters = json.load(open(filename, 'r'))
        
        if not self.parameters['n_estimators'] or not self.parameters['max_features'] or not self.parameters['max_depth'] or not self.parameters['criterion']:
            print("\nhyperparameters are not tuned yet")
            self.crossvalidate()
        else:
            self.model = RandomForestClassifier(
                n_estimators=self.parameters['n_estimators'], 
                max_features=self.parameters['max_features'], 
                max_depth=self.parameters['max_depth'], 
                criterion=self.parameters['criterion'], 
                verbose=1, n_jobs=-1,
                class_weight="balanced")
            self.model.fit(self.X_train, self.y_train)
            y_pred_train = self.model.predict(self.X_train)
            y_pred_dev = self.model.predict(self.X_dev)
            precision, recall, fscore, _ = precision_recall_fscore_support(self.y_dev, y_pred_dev, average='macro')
            print("\naccuracy on training set: %.4f" % metrics.accuracy_score(y_pred_train, self.y_train))
            print("\naccuracy on development set: %.4f" % metrics.accuracy_score(y_pred_dev, self.y_dev))
            print("\nprecision on dev set: %.4f" % precision)
            print("\nrecall on dev set: %.4f" % recall)
            print("\nfscore on dev set: %.4f" % fscore)
        
            
    def crossvalidate(self):
        
        parameters = {
            "n_estimators": self.config['baseline']['random_forest']['n_estimators'],
            "max_features": self.config['baseline']['random_forest']['max_features'],
            "max_depth": self.config['baseline']['random_forest']['max_depth'],
            "criterion": ["entropy"]
        }
        
        print("\nrunning the Grid Search for Random Forest CV classifier ...")
        clf = GridSearchCV(RandomForestClassifier(), 
                        parameters, 
                        cv=5, 
                        n_jobs=-1, 
                        verbose=0, 
                        scoring='recall_macro')
        
        clf.fit(self.X_train, self.y_train)
        print("\nfinal score for the tuned model\n", clf.score(self.X_dev, self.y_dev))
        print("\nbest hyperparameters for the tuned model\n", clf.best_params_)
        print("\ncross validation results (MEAN)\n", clf.cv_results_['mean_test_score'])
        print("\ncross validation results (STD)\n", clf.cv_results_['std_test_score'])

        self.parameters['n_estimators'] = clf.best_params_['n_estimators']
        self.parameters['max_features'] = clf.best_params_['max_features']
        self.parameters['max_depth'] = clf.best_params_['max_depth']
        self.parameters['criterion'] = clf.best_params_['criterion']
        
        filename = os.path.join(self.line, '%s_%s_params.json' % (self.model_name, self.feature_name))
        
        # write to model json file
        with open(filename, 'w') as output:
            json.dump(self.parameters, output)
            output.write("\n")
        output.close()
        
        best_grid = clf.best_estimator_
        print("\nevaluating the Random Forest Classifier ...")
        y_pred_train = best_grid.predict(self.X_train)
        y_pred_dev = best_grid.predict(self.X_dev)
        
        precision, recall, fscore, _ = precision_recall_fscore_support(self.y_dev, y_pred_dev, average='macro')
        
        filename = os.path.join(self.line, '%s_%s_score.json' % (self.model_name, self.feature_name))
        file = open(filename,"w") 
        file.write("\nprecision on dev set: %.4f" % precision) 
        file.write("\nrecall on dev set: %.4f" % recall) 
        file.write("\nfscore on dev set: %.4f" % fscore) 
        file.write("\naccuracy on dev set: %.4f" % metrics.accuracy_score(y_pred_dev, y_dev)) 
        file.close() 

        print("\naccuracy on training set: %.4f" % metrics.accuracy_score(y_pred_train, self.y_train))
        print("\naccuracy on development set: %.4f" % metrics.accuracy_score(y_pred_dev, self.y_dev))
        print("\nprecision on dev set: %.4f" % precision)
        print("\nrecall on dev set: %.4f" % recall)
        print("\nfscore on dev set: %.4f" % fscore)
        
        

In [4]:
line = '../RESULTS/PV-DM-AV/' 
names = ['25_5_5_0', '25_5_10_0','25_5_0_1','25_10_5_0','25_10_10_0','25_10_0_1',
         '50_5_5_0', '50_5_10_0','50_5_0_1','50_10_5_0','50_10_10_0','50_10_0_1',
         '75_5_5_0', '75_5_10_0','75_5_0_1','75_10_5_0','75_10_10_0','75_10_0_1',
         '100_5_5_0', '100_5_10_0','100_5_0_1','100_10_5_0','100_10_10_0','100_10_0_1']

In [5]:
for i in names:
    path = os.path.join(line, i)
    X_train = np.load(os.path.join(path, 'vectors_train.npy'))
    X_dev = np.load(os.path.join(path, 'vectors_dev.npy'))
    y_train = np.load(os.path.join(path, 'labels_train.npy'))
    y_dev = np.load(os.path.join(path, 'labels_dev.npy'))
    y_train = np.ravel(y_train)
    y_dev = np.ravel(y_dev)
    
    feature_name = 'TEXT_'+str(i)
    save_dir = '../RESULTS/PV-DM-AV/' 
    random_forest = RandomForestCV(feature_name, X_train, y_train, X_dev, y_dev, save_dir)
    random_forest.run()
    print ("------"  * 10)
    


hyperparameters are not tuned yet

running the Grid Search for Random Forest CV classifier ...
Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400 
[CV] criterion=entropy, max_depth=2, ma

[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.375, total=   0.9s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.5357142857142857, total=   0.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=400, score=0.4166666666666667, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.4259259259259259, total=   0.9s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.44047619047619047, total=   0.9s
[CV]

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.5s


[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.5773809523809524, total=   3.4s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.34722222222222215, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.2, n_estimators=200, score=0.46759259259259256, total=   0.9s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.5, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, scor

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.4185185185185185, total=   1.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3333333333333333, total=   0.8s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4583333333333333, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.38888888888888884, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.49404761904761907, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estima

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4166666666666667, total=   3.3s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.5773809523809524, total=   3.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.46759259259259256, total=   3.4s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4583333333333333, total=   3.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.375, total=   4.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.5357142857142857, total=   3.4s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.4185185185185185, total=   1.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3988095238095238, total=   3.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.4166666666666667, 

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   11.0s remaining:    0.1s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.5773809523809524, total=   2.7s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   11.4s finished



final score for the tuned model
 0.31746031746031744

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 2, 'max_features': 0.1, 'n_estimators': 400}

cross validation results (MEAN)
 [0.46350351 0.48770732 0.50453424 0.47929385 0.43901989 0.44627595
 0.44667659 0.48009514 0.41267933 0.45451135 0.45491199 0.44707723
 0.42382987 0.42179233 0.46270223 0.47151633 0.50419083 0.45234788
 0.45821505 0.42752595 0.44655194 0.4549654  0.4404215  0.44767755
 0.43316417 0.4058112  0.42223748 0.45531263 0.43714133 0.44024344
 0.41951313 0.42770401 0.45115868 0.46478047 0.4622965  0.46269714]

cross validation results (STD)
 [0.05056487 0.06526603 0.0669746  0.07161769 0.08438899 0.07116028
 0.0494738  0.05784031 0.04668499 0.05633106 0.03490266 0.04360259
 0.05742675 0.09306388 0.06483502 0.0631165  0.08100086 0.07350258
 0.08088842 0.06074446 0.05175474 0.0536295  0.05661688 0.08078448
 0.07225026 0.06025821 0.06479805 0.04682346 0.05278605 0.03392311
 0.047178   0.0

  'precision', 'predicted', average, warn_for)


[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400 
[CV] criterion=entropy, max_depth=2, max_features=0

[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.35185185185185186, total=   0.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.3333333333333333, total=   0.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.3988095238095238, total=   0.9s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.35714285714285715, total=   0.9s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=400, score=0.375, total=   1.8s
[CV

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.8s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.35714285714285715, total=   1.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.3333333333333333, total=   1.6s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.5357142857142857, total=   3.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.44642857142857145, total=   1.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.39351851851851855, total=   1.7s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.375, total=   3.4s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV] criterion=entropy, max_depth=

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.44166666666666665, total=   1.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.35833333333333334, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.48809523809523814, total=   3.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.5297619047619048, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.2916666666666667, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estim

[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.44285714285714284, total=   3.9s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.48809523809523814, total=   4.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.2916666666666667, total=   4.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.2916666666666667, total=   3.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.38425925925925924, total=   3.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.44166666666666665, total=   3.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.38888888888888884, total=   1.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.39999999999999997, total=   1.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   11.7s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   11.8s finished



final score for the tuned model
 0.4259259259259259

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.4, 'n_estimators': 100}

cross validation results (MEAN)
 [0.42011345 0.40364265 0.37880291 0.3690667  0.42205942 0.44445589
 0.38761701 0.40346459 0.40971459 0.36788258 0.38356481 0.37793803
 0.43529838 0.39248703 0.37239265 0.39625305 0.38360933 0.3951834
 0.3997558  0.40187983 0.39197828 0.41954619 0.41171144 0.40095772
 0.42928241 0.40072243 0.38465227 0.40472884 0.41134768 0.43657535
 0.42712022 0.40937118 0.44505113 0.39769536 0.4221917  0.41778465]

cross validation results (STD)
 [0.0954908  0.089477   0.08546871 0.06634655 0.05598751 0.04061833
 0.05034963 0.06619248 0.0474782  0.02664199 0.05085594 0.04100086
 0.05224038 0.06022014 0.04253648 0.06459244 0.04961209 0.09535636
 0.06737702 0.0680604  0.08792461 0.06489598 0.06131853 0.06512038
 0.09285982 0.08530899 0.06756135 0.06596876 0.07168665 0.0423848
 0.07625755 0.0661

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.36309523809523814, total=   0.4s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.5, total=   1.6s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.34259259259259256, total=   1.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3511904761904762, total=   1.6s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.2916666666666667, total=   1.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, scor

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.5s


[CV]  criterion=entropy, max_depth=4, max_features=0.2, n_estimators=200, score=0.3055555555555555, total=   0.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.3333333333333333, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.3796296296296296, total=   3.4s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.36904761904761907, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.17857142857142858, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estim

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3333333333333333, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.3796296296296296, total=   1.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3333333333333333, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.2261904761904762, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3821428571428571, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimato

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.48333333333333334, total=   3.3s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.38425925925925924, total=   3.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.23214285714285712, total=   4.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.375, total=   4.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3333333333333333, total=   3.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.31547619047619047, total=   3.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.36309523809523814, total=   3.4s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.375, total=   1.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.38425925925925924, total=  

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   11.4s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   11.5s finished



final score for the tuned model
 0.4814814814814814

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.1, 'n_estimators': 400}

cross validation results (MEAN)
 [0.37637999 0.37043396 0.36968992 0.36894587 0.32657967 0.34645274
 0.36899038 0.36115563 0.37031314 0.32813772 0.33689459 0.33615054
 0.33494862 0.28773657 0.32922517 0.35480896 0.32086895 0.36820818
 0.35292023 0.31263355 0.36778846 0.34415064 0.31914555 0.32144765
 0.32269409 0.33190883 0.38371235 0.37668396 0.31826542 0.36832774
 0.35985704 0.35093483 0.35649929 0.38207036 0.3423433  0.35109509]

cross validation results (STD)
 [0.03310401 0.03222905 0.0699109  0.06694403 0.09849505 0.08398695
 0.07912455 0.07577877 0.08615906 0.10495028 0.0748567  0.09823672
 0.0656777  0.06586124 0.07908731 0.04877988 0.08338711 0.04395007
 0.07767782 0.07526153 0.03325099 0.07680099 0.09203825 0.07634849
 0.05007546 0.05583129 0.06425569 0.0586879  0.05311678 0.07257153
 0.05572002 0.08

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.4761904761904762, total=   0.5s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.375, total=   1.6s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   1.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.44047619047619047, total=   1.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.375, total=   0.9s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.38425925

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.9s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.44629629629629625, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.375, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.48796296296296293, total=   3.6s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.38690476190476186, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.36309523809523814, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, 

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4166666666666667, total=   0.9s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.375, total=   3.5s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.31547619047619047, total=   0.8s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4595238095238095, total=   0.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.4761904761904762, total=   3.5s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]

[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.31547619047619047, total=   4.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.48333333333333334, total=   3.4s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.47129629629629627, total=   3.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4761904761904762, total=   4.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.43452380952380953, total=   3.3s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.40925925925925927, total=   1.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.5916666666666667, total=   3.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.35714285714285715, total=   3.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   11.5s remaining:    0.1s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.375, total=   2.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.43452380952380953, total=   2.6s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   11.7s finished



final score for the tuned model
 0.3862433862433862

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.1, 'n_estimators': 800}

cross validation results (MEAN)
 [0.3752353  0.39240563 0.40001781 0.42525819 0.40162037 0.43403795
 0.40650819 0.42373575 0.43599537 0.39853607 0.3897792  0.4454912
 0.40715558 0.35758547 0.42037037 0.46117852 0.41987307 0.45124262
 0.45124262 0.41428063 0.38965965 0.43000356 0.43279024 0.43343127
 0.43459376 0.43823896 0.41660434 0.46900819 0.43529202 0.43353429
 0.44184473 0.42541845 0.35589769 0.42431446 0.41173433 0.43841702]

cross validation results (STD)
 [0.05336162 0.05796813 0.04418484 0.04625481 0.02384373 0.03016281
 0.06697628 0.04969023 0.04245934 0.04980474 0.01786566 0.05613362
 0.04161714 0.05914586 0.06012148 0.0678653  0.0632288  0.05217957
 0.05217957 0.07520196 0.03792426 0.06997754 0.06788484 0.08059445
 0.04333426 0.08984172 0.0690024  0.07560221 0.07037702 0.07625676
 0.05282477 0.056

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.22023809523809523, total=   0.5s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.47222222222222215, total=   1.9s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3511904761904762, total=   1.9s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.25, total=   1.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   2.0s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.39351851851851855, total=   0.9s
[CV] criterion=entropy, max_depth=4

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.8s


[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.30952380952380953, total=   3.4s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.3511904761904762, total=   3.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.48333333333333334, total=   1.8s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.39351851851851855, total=   3.6s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.2, n_estimators=200, score=0.4601851851851852, total=   1.0s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_esti

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.6333333333333333, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.4601851851851852, total=   2.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.3333333333333333, total=   3.5s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.3988095238095238, total=   3.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.25, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score

[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.27499999999999997, total=   3.9s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.35714285714285715, total=   3.3s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.48333333333333334, total=   3.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4601851851851852, total=   3.7s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.41785714285714287, total=   4.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.35833333333333334, total=   3.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.2619047619047619, total=   4.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3761904761904762, total=   3.3s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.3

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   11.6s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   11.7s finished



final score for the tuned model
 0.3544973544973545

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.1, 'n_estimators': 400}

cross validation results (MEAN)
 [0.40044897 0.3826427  0.37913996 0.403401   0.42628968 0.39263584
 0.38031899 0.36679513 0.3558926  0.36773377 0.36268569 0.40197141
 0.41691595 0.39331756 0.44396368 0.42731481 0.37256563 0.38348469
 0.40391738 0.4286541  0.37614723 0.37638762 0.38920813 0.37278185
 0.42154558 0.41064815 0.38877315 0.40832443 0.35296856 0.37564993
 0.41766    0.39133216 0.38813212 0.40461564 0.39660282 0.38778872]

cross validation results (STD)
 [0.07232194 0.09225421 0.08191452 0.07702995 0.09323649 0.08364249
 0.07978016 0.04676203 0.07802775 0.05916029 0.05867847 0.0571569
 0.05599391 0.08338994 0.07161782 0.0547856  0.06180425 0.07327983
 0.06312837 0.06106731 0.05456102 0.09692485 0.08216638 0.08520396
 0.10105633 0.13353288 0.08941869 0.05396287 0.09431629 0.1021653
 0.06993158 0.0812

[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.39285714285714285, total=   0.5s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3796296296296296, total=   1.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=400, score=0.26851851851851855, total=   1.6s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.2916666666666667, total=   1.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.2976190476190476, total=   1.7s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estim

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.6s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.26851851851851855, total=   1.7s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.32857142857142857, total=   3.5s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.3452380952380952, total=   3.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.2916666666666667, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.28095238095238095, total=   1.8s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.22685185185185186, total=   3.6s
[CV]  criterion=entr

[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.48055555555555557, total=   2.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3333333333333333, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.28095238095238095, total=   3.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.2392857142857143, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.425, total=   2.1s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, sc

[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3333333333333333, total=   4.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3511904761904762, total=   4.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3101851851851852, total=   3.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.2392857142857143, total=   4.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.375, total=   3.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3452380952380952, total=   3.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.2392857142857143, total=   3.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.25, total=   3.3s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.39285714285714285, total=   1.5s


[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   11.3s remaining:    0.1s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.39285714285714285, total=   2.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.28095238095238095, total=   2.7s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   11.6s finished



final score for the tuned model
 0.44708994708994715

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.4, 'n_estimators': 100}

cross validation results (MEAN)
 [0.30547288 0.31332036 0.33013456 0.34868488 0.33706629 0.34698057
 0.37141967 0.32900895 0.31911884 0.36495218 0.35737434 0.37420126
 0.38348977 0.36930072 0.31183862 0.33804563 0.34953958 0.34234966
 0.38099435 0.34269307 0.39515415 0.38714133 0.35917659 0.3683341
 0.28581476 0.31883267 0.32179742 0.30422644 0.33768061 0.34511472
 0.34918346 0.34803877 0.33387388 0.34285333 0.33758267 0.34559549]

cross validation results (STD)
 [0.07398656 0.01911465 0.03265385 0.01916907 0.02822335 0.05282409
 0.05531246 0.06175473 0.05252479 0.03358651 0.04143176 0.03242608
 0.05562437 0.05801382 0.0410203  0.03095369 0.08307085 0.04872885
 0.04912205 0.04909386 0.06989345 0.08325369 0.08651647 0.08257665
 0.06258514 0.06970049 0.07761103 0.05241327 0.05461311 0.05923738
 0.06149146 0.06

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.18452380952380953, total=   0.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.5297619047619048, total=   1.7s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.39285714285714285, total=   1.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=400, score=0.4583333333333333, total=   1.8s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4166666666666667, total=   1.8s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estim

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.9s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.45092592592592595, total=   1.7s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.4583333333333333, total=   3.7s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.5357142857142857, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.2916666666666667, total=   3.8s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.2916666666666667, total=   1.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.34722222222222215, total=   3.9s
[CV] criterion=entropy

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4166666666666667, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.35714285714285715, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.25, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.43452380952380953, total=   3.8s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3511904761904762, total=   0.9s
[CV]

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.375, total=   3.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.31666666666666665, total=   3.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.44642857142857145, total=   3.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.34722222222222215, total=   3.7s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.35833333333333334, total=   4.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.44642857142857145, total=   4.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.34722222222222215, total=   2.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.41785714285714287, total=   4.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.351190476190

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   12.6s remaining:    0.1s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.49404761904761907, total=   3.3s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   13.0s finished



final score for the tuned model
 0.3650793650793651

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.1, 'n_estimators': 100}

cross validation results (MEAN)
 [0.38383954 0.38498423 0.38509234 0.41662215 0.41856812 0.37713675
 0.39316239 0.41033272 0.31126628 0.34486289 0.3784595  0.38761701
 0.4335928  0.38471459 0.40857499 0.39465049 0.39527371 0.38457087
 0.37101267 0.37942613 0.3508954  0.38449201 0.4111747  0.39557133
 0.38559091 0.36465456 0.4210699  0.36650259 0.33176129 0.38884056
 0.37548204 0.39092898 0.3514156  0.34067842 0.34166285 0.35941188]

cross validation results (STD)
 [0.04797237 0.05644477 0.08566465 0.06647795 0.05601649 0.06242285
 0.06437118 0.08370592 0.07257681 0.05164001 0.07905643 0.09279069
 0.06885824 0.08613576 0.08076437 0.08263062 0.08467505 0.05391803
 0.04193336 0.06487404 0.05786613 0.05815986 0.05486766 0.0313915
 0.05567981 0.06754295 0.05179216 0.0432238  0.01352781 0.03541659
 0.04433422 0.063

[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=400 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.44047619047619047, total=   1.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.44047619047619047, total=   1.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4166666666666667, total=   1.8s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3333333333333333, total=   1.8s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_de

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.8s


[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.35714285714285715, total=   3.6s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.44047619047619047, total=   3.6s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.375, total=   3.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.2, n_estimators=200, score=0.3814814814814815, total=   1.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.27314814814814814, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, s

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.3333333333333333, total=   3.7s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.375, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.32142857142857145, total=   1.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4583333333333333, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.3814814814814815, total=   2.3s
[CV]

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3333333333333333, total=   3.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4166666666666667, total=   3.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4047619047619048, total=   3.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.34047619047619043, total=   4.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.25, total=   4.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.44047619047619047, total=   4.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3988095238095238, total=   3.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.36944444444444446, total=   2.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.31666666666666665

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   12.4s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   12.5s finished



final score for the tuned model
 0.40211640211640215

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.2, 'n_estimators': 100}

cross validation results (MEAN)
 [0.38024013 0.41229141 0.39528643 0.40369989 0.37279329 0.37257072
 0.39741046 0.37983949 0.3267323  0.35499975 0.3628345  0.35540039
 0.37613579 0.38801765 0.36221128 0.37943885 0.33759412 0.35416412
 0.37257072 0.35540039 0.37014398 0.36950295 0.35708308 0.34826898
 0.35968788 0.38939764 0.37920355 0.39450804 0.42689764 0.40923509
 0.38807489 0.35778643 0.39035409 0.34083486 0.37641178 0.37919846]

cross validation results (STD)
 [0.03127091 0.06172441 0.04529712 0.03590438 0.03346585 0.04809542
 0.05158537 0.04448773 0.06627779 0.04714454 0.06702733 0.05635324
 0.03515348 0.04543738 0.06868171 0.04879684 0.04888865 0.05228335
 0.04809542 0.07733159 0.03671582 0.06458656 0.06242257 0.06128657
 0.04635192 0.06870746 0.05999592 0.03163133 0.05780051 0.06455304
 0.05811276 0.0

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.4369047619047619, total=   0.5s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.30357142857142855, total=   1.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3333333333333333, total=   1.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3452380952380952, total=   1.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3055555555555555, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estima

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.8s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.22685185185185186, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.25595238095238093, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.2916666666666667, total=   1.9s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.375, total=   1.9s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.30952380952380953, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, s

[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.3333333333333333, total=   3.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.375, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.23611111111111108, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.2619047619047619, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.30952380952380953, total=   3.8s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, sc

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.39285714285714285, total=   3.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.2916666666666667, total=   3.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.26851851851851855, total=   3.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.20833333333333334, total=   4.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.25, total=   3.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.30357142857142855, total=   4.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.30952380952380953, total=   3.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3761904761904762, total=   4.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.231481481481481

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   12.5s remaining:    0.1s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.3761904761904762, total=   3.4s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   12.9s finished



final score for the tuned model
 0.47883597883597884

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 2, 'max_features': 0.2, 'n_estimators': 200}

cross validation results (MEAN)
 [0.28179055 0.31773377 0.3240232  0.34079289 0.30537113 0.3589998
 0.30639499 0.30661757 0.34010607 0.33782306 0.27892883 0.25769485
 0.32001679 0.2617585  0.2913487  0.30891967 0.3274738  0.2979243
 0.3058926  0.28235653 0.25980617 0.28979701 0.32230617 0.29690044
 0.29347273 0.29365079 0.31789911 0.30125025 0.34892654 0.30434727
 0.2674883  0.29950524 0.31879324 0.3073807  0.31505011 0.30743793]

cross validation results (STD)
 [0.0723361  0.03571047 0.01655719 0.04954323 0.01633731 0.03956568
 0.06415791 0.05714724 0.10834965 0.04912742 0.05192458 0.04957044
 0.0748462  0.03641753 0.05096489 0.04931917 0.06134186 0.09140502
 0.11357551 0.01957444 0.04050378 0.05654657 0.04310928 0.05692026
 0.04027539 0.07478586 0.04843321 0.04901556 0.04487131 0.07145235
 0.06536534 0.032

[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.44047619047619047, total=   0.5s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4166666666666667, total=   1.8s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.48214285714285715, total=   1.8s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4212962962962963, total=   1.8s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.44047619047619047, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estim

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.0s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.5357142857142857, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.4166666666666667, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.4166666666666667, total=   3.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.44047619047619047, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estima

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.5, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.3988095238095238, total=   3.8s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.44166666666666665, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3988095238095238, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.45092592592592595, total=   2.3s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, scor

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.5416666666666666, total=   3.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.48333333333333334, total=   4.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.35714285714285715, total=   3.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3988095238095238, total=   4.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.44642857142857145, total=   3.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4166666666666667, total=   3.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3988095238095238, total=   4.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.5, total=   2.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.48796296296296293,

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   12.7s remaining:    0.1s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.44642857142857145, total=   3.2s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   12.9s finished



final score for the tuned model
 0.30158730158730157

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.1, 'n_estimators': 400}

cross validation results (MEAN)
 [0.42658094 0.40191926 0.4432298  0.44460979 0.39298433 0.37673611
 0.42680352 0.46120141 0.42523657 0.42638126 0.42540191 0.44222884
 0.41698845 0.45006359 0.47644867 0.46901455 0.46645045 0.43920177
 0.43852004 0.43852004 0.45609102 0.46450448 0.46113909 0.45574761
 0.41275819 0.43107321 0.45574761 0.45224486 0.44511345 0.43090787
 0.43966473 0.43988731 0.4463777  0.4656657  0.43032916 0.44285206]

cross validation results (STD)
 [0.04844006 0.07272304 0.02414459 0.02928477 0.03664064 0.03921783
 0.01094596 0.04760691 0.02910922 0.04063084 0.04097449 0.04246069
 0.03747731 0.04184771 0.04806771 0.05165443 0.06940731 0.04573957
 0.02044053 0.02895666 0.04589421 0.05954477 0.05611776 0.05381534
 0.08426731 0.04604168 0.05969779 0.06293178 0.06424172 0.04813108
 0.02068325 0.0

[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.34722222222222215, total=   1.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.2916666666666667, total=   1.6s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.25, total=   1.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3511904761904762, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=400, score=0.2916666666666667, total=   1.7s
[CV] 

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.6s


[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.2916666666666667, total=   3.6s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.3333333333333333, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.4305555555555555, total=   1.8s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.26785714285714285, total=   3.6s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.30952380952380953, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estim

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.5342592592592593, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.30952380952380953, total=   3.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.25, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.28690476190476194, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.31547619047619047, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV

[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.35833333333333334, total=   4.9s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4238095238095238, total=   3.4s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.16666666666666666, total=   3.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.31547619047619047, total=   3.7s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.20833333333333334, total=   4.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.17857142857142858, total=   4.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.31547619047619047, total=   4.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.425, total=   2.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.1785714285714

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   12.5s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   12.6s finished



final score for the tuned model
 0.41534391534391535

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.4, 'n_estimators': 200}

cross validation results (MEAN)
 [0.30814128 0.3207672  0.30988629 0.29305937 0.31693249 0.34273886
 0.3034188  0.31148886 0.29524064 0.30422009 0.3121248  0.2927732
 0.36104879 0.33261981 0.32791514 0.32791514 0.28428088 0.30772283
 0.29248067 0.32357041 0.33616326 0.32067562 0.29291692 0.29698565
 0.35587988 0.37357041 0.3392183  0.36880851 0.30283501 0.35914733
 0.29873067 0.35039047 0.33214413 0.40316061 0.3249504  0.35129477]

cross validation results (STD)
 [0.04167524 0.03969536 0.03760568 0.05609319 0.0426842  0.03233555
 0.02707064 0.04663875 0.05044576 0.06958572 0.07334428 0.06292784
 0.06353097 0.0710816  0.0599201  0.0599201  0.06702999 0.08623732
 0.06462357 0.10727937 0.10959546 0.09983148 0.07950901 0.08944352
 0.09939309 0.11130542 0.10133598 0.11599334 0.11817113 0.12199141
 0.09153535 0.14

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.48214285714285715, total=   0.5s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.22023809523809523, total=   2.0s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.44642857142857145, total=   2.0s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4166666666666667, total=   2.0s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=400, score=0.3055555555555555, total=   2.1s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_esti

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.0s


[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.48214285714285715, total=   3.6s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.1726190476190476, total=   1.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.44047619047619047, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.3055555555555555, total=   1.8s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.21428571428571427, total=   3.7s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_esti

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4259259259259259, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.44047619047619047, total=   3.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.2916666666666667, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.35833333333333334, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.28690476190476194, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_esti

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4166666666666667, total=   3.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.5499999999999999, total=   4.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.33055555555555555, total=   3.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.28690476190476194, total=   3.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.39999999999999997, total=   3.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.35833333333333334, total=   4.4s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3988095238095238, total=   3.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.08928571428571429, total=   4.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   12.6s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   12.7s finished



final score for the tuned model
 0.5291005291005291

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.1, 'n_estimators': 100}

cross validation results (MEAN)
 [0.3892005  0.37637999 0.3511396  0.33392476 0.39057412 0.37895172
 0.37188263 0.35425443 0.37929513 0.33862943 0.35347604 0.34489723
 0.26821963 0.32815553 0.34744353 0.3686775  0.32146546 0.36194673
 0.38464082 0.40261243 0.38562017 0.38797187 0.37400666 0.37029787
 0.44634463 0.37405118 0.37318758 0.36669719 0.36317282 0.31815731
 0.34678088 0.38706756 0.37022283 0.3905525  0.36589082 0.36100809]

cross validation results (STD)
 [0.0604867  0.10684717 0.07751871 0.11805584 0.11388455 0.11310504
 0.09712453 0.11370969 0.09153418 0.10128305 0.1270452  0.12270707
 0.05699287 0.11410258 0.0971522  0.08120724 0.0602336  0.11557417
 0.10516774 0.0944167  0.10980239 0.13462535 0.12841665 0.15130576
 0.08375557 0.08280416 0.05039991 0.04926615 0.10361443 0.05901327
 0.05987154 0.08

[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.44047619047619047, total=   1.9s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3511904761904762, total=   1.9s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   1.9s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.2916666666666667, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.34722222222222215, tota

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.2s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.375, total=   1.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.4583333333333333, total=   3.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.35185185185185186, total=   2.1s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.3148148148148148, total=   4.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.2, n_estimators=200, score=0.3101851851851852, total=   1.1s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.2916666666666667, total=   4.1s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4

[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.30952380952380953, total=   3.8s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4185185185185185, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.44166666666666665, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.35833333333333334, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4047619047619048, total=   0.9s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.30952380952380953, total=   1.0s
[CV] criterion=entro

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3511904761904762, total=   3.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.48333333333333334, total=   3.9s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4166666666666667, total=   4.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.31666666666666665, total=   5.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.37222222222222223, total=   5.4s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3511904761904762, total=   4.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.525, total=   5.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.28690476190476194, total=   5.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.522222222222222

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   13.9s remaining:    0.2s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.4238095238095238, total=   3.7s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   14.1s finished



final score for the tuned model
 0.43650793650793646

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.1, 'n_estimators': 100}

cross validation results (MEAN)
 [0.3838459  0.33606151 0.37713675 0.34423967 0.32711386 0.31945716
 0.34297416 0.38676486 0.35053419 0.41249618 0.35894765 0.38991784
 0.36811152 0.34529787 0.37228963 0.36371591 0.36160969 0.32582672
 0.30859916 0.34643747 0.32686075 0.3911541  0.36783679 0.38903007
 0.43582621 0.38743386 0.37328551 0.4141509  0.35306904 0.36116326
 0.35491326 0.35239494 0.34716372 0.38731303 0.39632173 0.36968483]

cross validation results (STD)
 [0.06123206 0.08539338 0.06242285 0.07808489 0.08070454 0.09253838
 0.06051682 0.08243002 0.04109401 0.07712168 0.08264282 0.08794031
 0.0495409  0.04927986 0.07592493 0.07092456 0.12577956 0.07681223
 0.07778104 0.08637111 0.13512678 0.12397763 0.09491298 0.08609943
 0.05426632 0.04684376 0.07434157 0.05422756 0.10224094 0.04689327
 0.02046246 0.0

[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.44642857142857145, total=   0.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.5238095238095238, total=   2.0s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.38425925925925924, total=   2.0s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.43452380952380953, total=   2.0s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estim

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.3s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.43452380952380953, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.3351851851851852, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.5, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.5238095238095238, total=   3.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.4166666666666667, total=   3.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.4583333333333333, total=   3.9s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV] criterion=entropy, max_depth=4, m

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4166666666666667, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.5, total=   1.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.48214285714285715, total=   3.8s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.43452380952380953, total=   1.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.44047619047619047, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.43452380952380953, total=   3.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.44166666666666665, total=   5.3s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4166666666666667, total=   4.1s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.2935185185185185, total=   5.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4166666666666667, total=   5.1s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.5904761904761905, total=   5.1s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4595238095238095, total=   5.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.2981481481481481, total=   2.3s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.4595

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   13.8s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   14.0s finished



final score for the tuned model
 0.44708994708994715

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.4, 'n_estimators': 200}

cross validation results (MEAN)
 [0.43162393 0.45165598 0.4590901  0.4590901  0.38394765 0.4349944
 0.40918803 0.41777956 0.42006894 0.41016738 0.42693707 0.42733771
 0.39357575 0.3940845  0.44069623 0.43342745 0.40489672 0.42346866
 0.44836564 0.41545584 0.42224384 0.46355566 0.41649751 0.43733084
 0.40678546 0.38568376 0.45103276 0.4150552  0.37190171 0.35701949
 0.40818707 0.41602183 0.43520808 0.46648097 0.4204416  0.39862892]

cross validation results (STD)
 [0.04818998 0.04369245 0.04943844 0.04943844 0.06465004 0.03918108
 0.05866368 0.08521196 0.08578904 0.06835846 0.08401664 0.07757448
 0.06673759 0.08257806 0.05886942 0.05251778 0.06845693 0.07861021
 0.05883255 0.05467979 0.05822295 0.09990119 0.08246997 0.09523686
 0.06347851 0.12386937 0.05567707 0.06664694 0.04742913 0.04814027
 0.05115248 0.04

[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3333333333333333, total=   1.7s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3988095238095238, total=   1.7s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3333333333333333, total=   1.8s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.34259259259259256, total=   1.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3511904761904762, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimat

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.1s


[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.34722222222222215, total=   3.9s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.4761904761904762, total=   3.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.3333333333333333, total=   3.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.2, n_estimators=200, score=0.38888888888888884, total=   1.1s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entrop

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.34259259259259256, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.35714285714285715, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4166666666666667, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3333333333333333, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.31547619047619047, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estim

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4583333333333333, total=   3.9s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3333333333333333, total=   5.4s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.36309523809523814, total=   3.7s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3055555555555555, total=   5.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4166666666666667, total=   4.9s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.31547619047619047, total=   5.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3988095238095238, total=   5.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.525, total=   2.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.4138888888888889,

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   13.8s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   13.9s finished



final score for the tuned model
 0.42857142857142855

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.1, 'n_estimators': 400}

cross validation results (MEAN)
 [0.35247507 0.37009056 0.35131766 0.40237714 0.41399445 0.40127696
 0.4002531  0.38474893 0.38651684 0.35916514 0.37524802 0.39470772
 0.41005291 0.39665369 0.42876221 0.4039797  0.34474206 0.39665369
 0.38806217 0.38743895 0.33741606 0.40943351 0.38453653 0.3534417
 0.31966702 0.35326363 0.3513749  0.39418625 0.36255342 0.37810338
 0.39190832 0.35740232 0.34155474 0.34394078 0.40550214 0.39288767]

cross validation results (STD)
 [0.05301201 0.04696929 0.02409296 0.02079037 0.09198222 0.048539
 0.0425339  0.04996897 0.0406486  0.03948553 0.05737873 0.07199878
 0.05997176 0.01420715 0.03158168 0.07992705 0.04644702 0.04005105
 0.04584171 0.03167468 0.04764593 0.05996551 0.03153935 0.04524259
 0.06068894 0.03454712 0.05585903 0.04000529 0.05226917 0.06726721
 0.06258712 0.0591

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.32142857142857145, total=   0.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.2619047619047619, total=   1.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.375, total=   1.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3055555555555555, total=   1.7s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   1.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, sco

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.9s


[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.3511904761904762, total=   3.7s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.4259259259259259, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.39999999999999997, total=   1.8s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.3055555555555555, total=   3.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.3511904761904762, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estima

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3101851851851852, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.39999999999999997, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3333333333333333, total=   1.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.2619047619047619, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.43452380952380953, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estima

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.34722222222222215, total=   3.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.34259259259259256, total=   5.3s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.39285714285714285, total=   3.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3511904761904762, total=   3.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.2916666666666667, total=   5.2s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3511904761904762, total=   5.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.22023809523809523, total=   5.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.34259259259259256, total=   2.4s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.4

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   13.6s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   13.7s finished



final score for the tuned model
 0.38359788359788355

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.1, 'n_estimators': 400}

cross validation results (MEAN)
 [0.37622736 0.35882173 0.35080891 0.35023021 0.36991249 0.35326363
 0.35938772 0.31715507 0.32808048 0.30153007 0.3332036  0.32711386
 0.34592109 0.3454416  0.39122151 0.36483644 0.36920279 0.37439967
 0.31028694 0.3431395  0.34959554 0.34225809 0.3499275  0.316977
 0.36000076 0.34762159 0.38985424 0.38985424 0.28313619 0.37622736
 0.38046144 0.36833918 0.36104879 0.32260379 0.338852   0.35858135]

cross validation results (STD)
 [0.02634517 0.05701612 0.06637712 0.06269102 0.07620185 0.03292693
 0.03553097 0.04055976 0.03910803 0.041927   0.06589959 0.05562913
 0.07341434 0.03861475 0.03053818 0.03121708 0.07605377 0.03198041
 0.05101123 0.04049256 0.04797947 0.03198911 0.05335099 0.0545231
 0.08435522 0.0612938  0.03689055 0.04982678 0.08352421 0.07788675
 0.04663596 0.0478

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.4583333333333333, total=   0.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.44047619047619047, total=   1.6s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3333333333333333, total=   1.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3333333333333333, total=   1.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.38690476190476186, total=   1.7s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estim

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    3.9s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.5046296296296297, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.3333333333333333, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.2916666666666667, total=   3.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.3333333333333333, total=   2.0s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.4583333333333333, total=   4.0s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.375, total=   3.9s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2,

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3333333333333333, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.39351851851851855, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4845238095238095, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3333333333333333, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.39285714285714285, tota

[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4305555555555555, total=   5.4s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.375, total=   3.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.30952380952380953, total=   3.9s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3333333333333333, total=   4.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3511904761904762, total=   3.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.35714285714285715, total=   5.1s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3333333333333333, total=   5.2s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.2619047619047619, total=   5.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.2916666666666667,

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   13.7s remaining:    0.2s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.30952380952380953, total=   3.7s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   14.0s finished



final score for the tuned model
 0.39947089947089953

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.4, 'n_estimators': 200}

cross validation results (MEAN)
 [0.3217211  0.36584249 0.39068223 0.3731685  0.36429716 0.39028159
 0.35628434 0.3643544  0.40219907 0.36836717 0.35057362 0.35800773
 0.36036706 0.36843712 0.39246286 0.35119684 0.34376272 0.36081222
 0.36570004 0.37678063 0.3003981  0.34125712 0.39334554 0.3582036
 0.3249682  0.38658679 0.33628409 0.37527091 0.38403922 0.37699939
 0.34033501 0.33530474 0.34940222 0.40310338 0.37207087 0.39410613]

cross validation results (STD)
 [0.06588237 0.07671737 0.0526869  0.05115823 0.01929463 0.03655121
 0.05858589 0.06137058 0.04161694 0.05889168 0.03812677 0.03985388
 0.08040715 0.06634118 0.06382377 0.09182119 0.07264234 0.08207307
 0.08956899 0.05852399 0.02569471 0.05809063 0.04346685 0.05803948
 0.03465696 0.05484118 0.04742965 0.06666313 0.09695503 0.04967824
 0.0326774  0.04

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.5547619047619048, total=   0.5s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.30357142857142855, total=   1.6s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.30092592592592593, total=   1.8s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.48214285714285715, total=   1.8s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.375, total=   1.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.5, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.0s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.30357142857142855, total=   1.8s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.26851851851851855, total=   2.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.2916666666666667, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.38425925925925924, total=   3.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.44166666666666665, total=   3.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.2916666666666667, total=   3.9s
[CV] criterion=entro

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.5083333333333333, total=   1.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.34722222222222215, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.16666666666666666, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.30952380952380953, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.44166666666666665, to

[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.26851851851851855, total=   5.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.30357142857142855, total=   3.9s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.39285714285714285, total=   3.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4166666666666667, total=   4.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4666666666666666, total=   5.5s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.31666666666666665, total=   5.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.44047619047619047, total=   5.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.30357142857142855, total=   5.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   13.6s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   13.7s finished



final score for the tuned model
 0.4867724867724868

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.4, 'n_estimators': 100}

cross validation results (MEAN)
 [0.40782585 0.36872329 0.391439   0.38376959 0.36603327 0.35858644
 0.38813085 0.37130393 0.40470085 0.3804042  0.37204798 0.38510887
 0.39837581 0.36248982 0.31618844 0.32123652 0.37156339 0.35067155
 0.35483822 0.34425621 0.38776582 0.3593254  0.35465507 0.35806115
 0.37056624 0.33795406 0.32025845 0.33416005 0.41175341 0.3590405
 0.37104192 0.33211742 0.41601674 0.40538258 0.37621083 0.36721866]

cross validation results (STD)
 [0.06606908 0.08748139 0.08505462 0.08379458 0.04000837 0.0425339
 0.08659753 0.06298157 0.08457008 0.10061115 0.06875058 0.07318489
 0.05692421 0.06320272 0.03864498 0.06864373 0.07665462 0.08065973
 0.08287628 0.0424178  0.08455949 0.04821799 0.06759022 0.07937876
 0.10599078 0.109683   0.05993204 0.06139101 0.11027138 0.12986841
 0.06515178 0.1064

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.5261904761904762, total=   0.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.34722222222222215, total=   1.8s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.5, total=   1.8s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.44047619047619047, total=   1.8s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.375, total=   1.8s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.392857142

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.2s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.44166666666666665, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.3351851851851852, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.5297619047619048, total=   1.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.5, total=   4.0s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.375, total=   4.0s
[CV] criterion=entropy, max_depth=4, max_features=0

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.375, total=   1.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4388888888888889, total=   1.1s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.3452380952380952, total=   4.2s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.46547619047619043, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.41785714285714287, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.41785714285714287, total=   3.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.4166666666666667, total=   4.1s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3972222222222222, total=   5.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.44166666666666665, total=   5.8s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4845238095238095, total=   5.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4583333333333333, total=   5.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.35714285714285715, total=   5.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.39999999999999997, total=   2.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.32

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   14.5s remaining:    0.2s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.3988095238095238, total=   3.9s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.4166666666666667, total=   4.2s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   14.9s finished



final score for the tuned model
 0.4523809523809524

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.2, 'n_estimators': 400}

cross validation results (MEAN)
 [0.41245676 0.41852361 0.41038996 0.40157585 0.36970264 0.38572828
 0.38629426 0.38555021 0.42160282 0.40301816 0.38797695 0.40163945
 0.36542786 0.42849893 0.4508954  0.39334046 0.39462887 0.44570487
 0.39692333 0.44225809 0.41657764 0.45650438 0.42006385 0.42761752
 0.44231532 0.4107906  0.37140313 0.41844348 0.4136307  0.45455332
 0.47961564 0.4266509  0.44592745 0.46127137 0.4031339  0.43388533]

cross validation results (STD)
 [0.07001289 0.10329255 0.05432858 0.06526334 0.03780475 0.07049583
 0.05012853 0.06543644 0.05280589 0.07792284 0.07135703 0.11069208
 0.08507043 0.04542723 0.06740346 0.09077835 0.06056428 0.06405625
 0.03592329 0.06609594 0.04523689 0.04293772 0.09521409 0.04476251
 0.08163755 0.03957292 0.08294404 0.05008924 0.05395381 0.0954381
 0.05881381 0.040

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.21428571428571427, total=   0.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4212962962962963, total=   2.2s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.25, total=   2.2s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.39285714285714285, total=   2.2s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3511904761904762, total=   2.2s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.2916666666666667, total=   2.2s
[CV] criterion=entropy, max_depth=4,

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.7s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.4166666666666667, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.3511904761904762, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.25, total=   2.1s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.38425925925925924, total=   4.2s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.34722222222222215, total=   2.1s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, sco

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.4138888888888889, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.375, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3452380952380952, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.2916666666666667, total=   1.1s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.30952380952380953, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]

[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3351851851851852, total=   5.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.375, total=   5.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3988095238095238, total=   4.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3511904761904762, total=   3.9s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4047619047619048, total=   5.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.3988095238095238, total=   5.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.3351851851851852, total=   2.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4166666666666667, total=   5.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.35833333333333334, 

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   15.0s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   15.0s finished



final score for the tuned model
 0.4682539682539682

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.2, 'n_estimators': 400}

cross validation results (MEAN)
 [0.33454798 0.36036706 0.34158145 0.37597934 0.34382631 0.36179792
 0.34457036 0.3861162  0.34245269 0.34101547 0.35097426 0.35418574
 0.36297187 0.36317791 0.36946734 0.34216015 0.3430988  0.35692664
 0.30980744 0.32473291 0.37294846 0.35829645 0.3645172  0.38529329
 0.29972782 0.34808455 0.36821454 0.36582087 0.34770808 0.36162113
 0.43707265 0.37366835 0.38036477 0.38390822 0.38633496 0.36842058]

cross validation results (STD)
 [0.04970195 0.01988165 0.06375134 0.04568402 0.05058601 0.06619248
 0.07920161 0.06000981 0.06898369 0.04125127 0.02621858 0.03594144
 0.06851522 0.06753414 0.07878425 0.05354769 0.08867903 0.05316354
 0.03641211 0.02934504 0.03986752 0.05960502 0.04090908 0.02931386
 0.03474341 0.04463755 0.04252431 0.05573194 0.04952763 0.04000849
 0.06145668 0.02

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.2261904761904762, total=   0.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.375, total=   2.1s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.25, total=   2.1s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4166666666666667, total=   2.1s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.21428571428571427, total=   2.1s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.482142857

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.7s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.22685185185185186, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.30357142857142855, total=   2.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.25, total=   2.1s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.26785714285714285, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.33796296296296297, total=   4.3s
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.17857142857142858, total=   4.2s
[CV] criterion=entropy, max_depth

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.1898148148148148, total=   1.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.25, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.2261904761904762, total=   1.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.375, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.43452380952380953, total=   1.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.268518518

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.17857142857142858, total=   4.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.375, total=   4.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.2916666666666667, total=   6.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.375, total=   5.7s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.26851851851851855, total=   6.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.17857142857142858, total=   5.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.2611111111111111, total=   2.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.2261904761904762, total=   2.3s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.48214285714285715, total=   5

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   15.4s remaining:    0.2s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.48214285714285715, total=   4.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.4166666666666667, total=   4.2s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   15.7s finished



final score for the tuned model
 0.41269841269841273

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 2, 'max_features': 0.1, 'n_estimators': 200}

cross validation results (MEAN)
 [0.35050366 0.36549908 0.34827152 0.32384514 0.31714235 0.33585801
 0.34118081 0.32475453 0.32094526 0.28367928 0.28310058 0.2999275
 0.28136828 0.31042048 0.29249339 0.29151404 0.32548967 0.30676129
 0.30601725 0.32671322 0.26593025 0.30112943 0.27532306 0.31847782
 0.34474715 0.2934155  0.33570538 0.34297416 0.35629452 0.29613986
 0.30627671 0.30553266 0.28770477 0.33516356 0.32646902 0.32532433]

cross validation results (STD)
 [0.07867468 0.08227811 0.09991314 0.08697988 0.0766418  0.07028328
 0.08580388 0.10270475 0.09409897 0.11300065 0.11741777 0.11743964
 0.06544622 0.09081118 0.06730425 0.07692845 0.05386235 0.05988972
 0.07934507 0.09319778 0.08238744 0.09234853 0.08650796 0.10116137
 0.09456586 0.09323786 0.10417154 0.10429034 0.16486015 0.10266943
 0.09995    0.08

  'precision', 'predicted', average, warn_for)


[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400 
[CV] criterion=entropy, max_depth=2, max_features=0

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.3988095238095238, total=   1.2s
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.2916666666666667, total=   1.3s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.4047619047619048, total=   1.3s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.3333333333333333, total=   1.4s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.46759259259259256, total=   1.5s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estima

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.6s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.375, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.30952380952380953, total=   2.1s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.3333333333333333, total=   4.3s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.3988095238095238, total=   4.3s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.375, total=   4.3s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.30952380

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3333333333333333, total=   1.1s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.48809523809523814, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.36309523809523814, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.3101851851851852, total=   2.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.375, total=   3.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV

[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.44166666666666665, total=   6.1s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.30952380952380953, total=   5.7s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.4166666666666667, total=   5.7s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.35714285714285715, total=   5.8s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.4259259259259259, total=   3.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.3333333333333333, total=   2.9s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.4583333333333333, total=   2.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.31547619047619047, total=   2.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.2, n_estimators=800, score=0.48

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   14.9s remaining:    0.2s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.31547619047619047, total=   3.9s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   15.2s finished



final score for the tuned model
 0.31746031746031744

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 2, 'max_features': 0.1, 'n_estimators': 200}

cross validation results (MEAN)
 [0.37816061 0.42766204 0.40242165 0.41083511 0.37105718 0.3805581
 0.39475224 0.4031657  0.37513863 0.37964871 0.37890466 0.37089184
 0.33672924 0.39458689 0.37781721 0.38755342 0.35083181 0.38362714
 0.36093305 0.36036706 0.3736569  0.36340049 0.3628345  0.37497329
 0.39036427 0.38996362 0.39414683 0.41017247 0.30950855 0.34236111
 0.40670406 0.40135836 0.36231303 0.40488909 0.37014779 0.36345772]

cross validation results (STD)
 [0.05556653 0.06445504 0.05496133 0.05328321 0.05578575 0.07571138
 0.06917216 0.05770571 0.04740655 0.06183974 0.05805979 0.05734306
 0.05469354 0.03219364 0.06168567 0.04485808 0.07942985 0.04208911
 0.04455697 0.05883375 0.07614641 0.03924492 0.03484066 0.04792694
 0.06280459 0.05182825 0.0820439  0.06044077 0.083569   0.0736786
 0.08281393 0.053

  'precision', 'predicted', average, warn_for)


[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=100 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400 
[CV] criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400 
[CV] criterion=entropy, max_depth=2, max_features=0

[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.4666666666666666, total=   1.2s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=400, score=0.25, total=   2.5s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=400, score=0.43452380952380953, total=   2.2s
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.375, total=   1.4s
[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=200, score=0.30952380952380953, total=   1.2s
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV] criterion=entropy, max_depth=4, max_features=0.1, n_estimators=200 
[CV]  criterion=e

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.6s


[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.49722222222222223, total=   4.1s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.49722222222222223, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.25, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.41785714285714287, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.3511904761904762, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, sc

[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.38690476190476186, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.3511904761904762, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.39285714285714285, total=   4.2s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.3511904761904762, total=   4.3s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, score=0.6601851851851852, total=   2.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estima

[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.6972222222222223, total=   5.9s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3511904761904762, total=   4.1s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.44047619047619047, total=   5.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.375, total=   5.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.5935185185185186, total=   2.6s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.30952380952380953, total=   5.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.3833333333333333, total=   2.6s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.44047619047619047, total=   2.5s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.3511904761904762

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   15.0s remaining:    0.2s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.39285714285714285, total=   4.1s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   15.4s finished



final score for the tuned model
 0.4338624338624339

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.1, 'n_estimators': 100}

cross validation results (MEAN)
 [0.36503231 0.3644536  0.37524802 0.40203373 0.38365639 0.39554335
 0.32689128 0.37406899 0.38137846 0.41950168 0.37887668 0.38873245
 0.36087709 0.41984509 0.3843254  0.41044719 0.39244124 0.41019027
 0.39087429 0.40375712 0.40746083 0.43538487 0.42819623 0.45325855
 0.4634361  0.40733491 0.44172009 0.3977297  0.38717058 0.34788487
 0.41062525 0.42859687 0.4039797  0.40742521 0.41409366 0.41334961]

cross validation results (STD)
 [0.10211766 0.07610819 0.0549442  0.03260391 0.05455442 0.05271363
 0.06214502 0.08634316 0.05964429 0.06535762 0.09018601 0.063491
 0.07630603 0.12492312 0.08256235 0.08329516 0.12858422 0.05352955
 0.0557243  0.06888896 0.10137718 0.09658718 0.13772448 0.13413703
 0.09612834 0.0578975  0.0840053  0.09857543 0.09189876 0.07682738
 0.04865037 0.0619

[CV]  criterion=entropy, max_depth=2, max_features=0.4, n_estimators=100, score=0.3452380952380952, total=   0.7s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3988095238095238, total=   1.9s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.3055555555555555, total=   1.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.21428571428571427, total=   1.9s
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV] criterion=entropy, max_depth=2, max_features=0.4, n_estimators=800 
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.375, total=   1.9s
[CV]  criterion=entropy, max_depth=2, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   1.9s
[CV] criterion=entropy, max_depth=4,

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    4.4s


[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.39999999999999997, total=   1.9s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.34722222222222215, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.44642857142857145, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.2, n_estimators=800 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=400, score=0.4583333333333333, total=   2.0s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=2, max_features=0.2, n_estimators=800, score=0.3333333333333333, total=   4.2s
[CV] criterion=entropy, max_depth=4, max_features=0.4, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_esti

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.375, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=4, max_features=0.1, n_estimators=800, score=0.2619047619047619, total=   4.1s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.1898148148148148, total=   1.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.30952380952380953, total=   0.9s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=100 
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=200, score=0.2916666666666667, total=   1.0s
[CV] criterion=entropy, max_depth=8, max_features=0.2, n_estimators=200 
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=400, sco

[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.3055555555555555, total=   4.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.375, total=   4.0s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.1898148148148148, total=   5.7s
[CV]  criterion=entropy, max_depth=8, max_features=0.1, n_estimators=800, score=0.30357142857142855, total=   3.9s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.39999999999999997, total=   6.0s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.23148148148148148, total=   2.4s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=400, score=0.5083333333333333, total=   2.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.27499999999999997, total=   5.4s
[CV]  criterion=entropy, max_depth=4, max_features=0.4, n_estimators=800, score=0.488095238095238

[Parallel(n_jobs=-1)]: Done 178 out of 180 | elapsed:   14.7s remaining:    0.2s


[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.35833333333333334, total=   4.2s
[CV]  criterion=entropy, max_depth=8, max_features=0.4, n_estimators=800, score=0.26785714285714285, total=   4.2s


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:   15.1s finished



final score for the tuned model
 0.47883597883597884

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.1, 'n_estimators': 400}

cross validation results (MEAN)
 [0.30326618 0.37466931 0.35080891 0.3760493  0.29272868 0.30034086
 0.31865588 0.35207443 0.33221408 0.33255749 0.33999161 0.35557718
 0.35872507 0.37433226 0.40015644 0.35305377 0.37775616 0.34144027
 0.3326834  0.3152905  0.31500941 0.27065909 0.30391229 0.32068198
 0.33080103 0.28580332 0.33326211 0.35115232 0.37108771 0.34167557
 0.31551307 0.35265313 0.32867699 0.36028821 0.31817511 0.32955332]

cross validation results (STD)
 [0.07900357 0.0719319  0.08314647 0.06891585 0.08325736 0.09914046
 0.07559321 0.08671182 0.10221612 0.07138206 0.07947609 0.10147916
 0.08634278 0.06900162 0.04635734 0.10270239 0.11693653 0.11708431
 0.09455576 0.12810215 0.05261722 0.0981117  0.08739937 0.1066247
 0.09474248 0.06109552 0.07647399 0.03926345 0.09613741 0.10751922
 0.07143715 0.07

In [7]:
line = '../RESULTS/PV-DM-CONC/' 
names = ['25_5_5_0', '25_5_10_0','25_5_0_1','25_10_5_0','25_10_10_0','25_10_0_1',
         '50_5_5_0', '50_5_10_0','50_5_0_1','50_10_5_0','50_10_10_0','50_10_0_1',
         '75_5_5_0', '75_5_10_0','75_5_0_1','75_10_5_0','75_10_10_0','75_10_0_1',
         '100_5_5_0', '100_5_10_0','100_5_0_1','100_10_5_0','100_10_10_0','100_10_0_1']

In [10]:
for i in names:
    print (i)
    path = os.path.join(line, i)
    X_train = np.load(os.path.join(path, 'vectors_train.npy'))
    X_dev = np.load(os.path.join(path, 'vectors_dev.npy'))
    y_train = np.load(os.path.join(path, 'labels_train.npy'))
    y_dev = np.load(os.path.join(path, 'labels_dev.npy'))
    y_train = np.ravel(y_train)
    y_dev = np.ravel(y_dev)
    
    feature_name = 'TEXT_'+str(i)
    save_dir = '../RESULTS/PV-DM-CONC/' 
    random_forest = RandomForestCV(feature_name, X_train, y_train, X_dev, y_dev, save_dir)
    random_forest.run()
    print ("------"  * 10)
    

25_5_5_0


[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 736 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    2.5s finished
[Parallel(n_jobs=32)]: Done 136 tasks      | elapsed:    0.1s
[Parallel(n_jobs=32)]: Done 386 tasks      | elapsed:    0.2s
[Parallel(n_jobs=32)]: Done 736 tasks      | elapsed:    0.4s
[Parallel(n_jobs=32)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=32)]: Done 136 tasks      | elapsed:    0.1s
[Parallel(n_jobs=32)]: Done 386 tasks      | elapsed:    0.2s
[Parallel(n_jobs=32)]: Done 736 tasks      | elapsed:    0.4s
[Parallel(n_jobs=32)]: Done 800 out of 800 | elapsed:    0.4s finished



accuracy on training set: 1.0000

accuracy on development set: 0.3500

precision on dev set: 0.3274

recall on dev set: 0.3386

fscore on dev set: 0.3207
------------------------------------------------------------
25_5_10_0


[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done 736 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    2.4s finished
[Parallel(n_jobs=32)]: Done 136 tasks      | elapsed:    0.1s
[Parallel(n_jobs=32)]: Done 386 tasks      | elapsed:    0.1s
[Parallel(n_jobs=32)]: Done 736 tasks      | elapsed:    0.3s
[Parallel(n_jobs=32)]: Done 800 out of 800 | elapsed:    0.3s finished
[Parallel(n_jobs=32)]: Done 136 tasks      | elapsed:    0.1s
[Parallel(n_jobs=32)]: Done 386 tasks      | elapsed:    0.2s
[Parallel(n_jobs=32)]: Done 736 tasks      | elapsed:    0.4s
[Parallel(n_jobs=32)]: Done 800 out of 800 | elapsed:    0.4s finished



accuracy on training set: 0.8846

accuracy on development set: 0.4167

precision on dev set: 0.4222

recall on dev set: 0.4233

fscore on dev set: 0.4144
------------------------------------------------------------
25_5_0_1


[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    1.2s finished
[Parallel(n_jobs=32)]: Done 136 tasks      | elapsed:    0.1s
[Parallel(n_jobs=32)]: Done 400 out of 400 | elapsed:    0.2s finished
[Parallel(n_jobs=32)]: Done 136 tasks      | elapsed:    0.1s
[Parallel(n_jobs=32)]: Done 400 out of 400 | elapsed:    0.2s finished



accuracy on training set: 1.0000

accuracy on development set: 0.4667

precision on dev set: 0.4614

recall on dev set: 0.4550

fscore on dev set: 0.4423
------------------------------------------------------------
25_10_5_0


[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 736 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    2.5s finished
[Parallel(n_jobs=32)]: Done 136 tasks      | elapsed:    0.1s
[Parallel(n_jobs=32)]: Done 386 tasks      | elapsed:    0.2s
[Parallel(n_jobs=32)]: Done 736 tasks      | elapsed:    0.4s
[Parallel(n_jobs=32)]: Done 800 out of 800 | elapsed:    0.4s finished
[Parallel(n_jobs=32)]: Done 136 tasks      | elapsed:    0.1s
[Parallel(n_jobs=32)]: Done 386 tasks      | elapsed:    0.2s



accuracy on training set: 1.0000

accuracy on development set: 0.3667

precision on dev set: 0.3479

recall on dev set: 0.3545

fscore on dev set: 0.3335
------------------------------------------------------------
25_10_10_0

hyperparameters are not tuned yet

running the Grid Search for Random Forest CV classifier ...


[Parallel(n_jobs=32)]: Done 736 tasks      | elapsed:    0.5s
[Parallel(n_jobs=32)]: Done 800 out of 800 | elapsed:    0.5s finished



final score for the tuned model
 0.35185185185185186

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.1, 'n_estimators': 200}

cross validation results (MEAN)
 [0.31333308 0.34600758 0.33690731 0.35233262 0.35277905 0.34715227
 0.37595772 0.35175392 0.34334046 0.33510506 0.35209732 0.3577241
 0.37076084 0.34466321 0.36990359 0.36464947 0.333285   0.3573807
 0.37140822 0.37942104 0.37180886 0.3488922  0.34674654 0.37180886
 0.32690909 0.38040039 0.34105744 0.34714718 0.32332112 0.32274242
 0.3639741  0.37100758 0.36328729 0.33418931 0.34428546 0.36259412]

cross validation results (STD)
 [0.06862583 0.07545148 0.04048758 0.05472057 0.06145626 0.07761264
 0.06492135 0.07116241 0.05087727 0.03249393 0.06940945 0.0665406
 0.06810931 0.05625784 0.04006701 0.04361779 0.09155963 0.05093304
 0.06459    0.05954144 0.10696793 0.06159113 0.05610828 0.06641293
 0.08221257 0.07940355 0.05764536 0.09513552 0.05504836 0.06436323
 0.06950187 0.0728

  'precision', 'predicted', average, warn_for)



final score for the tuned model
 0.4656084656084656

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.4, 'n_estimators': 100}

cross validation results (MEAN)
 [0.41690196 0.43751908 0.41880342 0.41994811 0.35859916 0.35058633
 0.36603327 0.36603327 0.34848138 0.37999847 0.36775666 0.35974384
 0.39123423 0.45716702 0.42527727 0.43260328 0.38653465 0.40673458
 0.43005698 0.44710648 0.47975936 0.45215456 0.44055377 0.4224448
 0.36849054 0.47669668 0.45185694 0.44200117 0.41182336 0.42278312
 0.44350071 0.45714031 0.35433074 0.43183761 0.47060185 0.45417557]

cross validation results (STD)
 [0.04805951 0.07325526 0.03002238 0.03222153 0.04038761 0.06192434
 0.04158663 0.04158663 0.08523519 0.07018378 0.04514934 0.04473549
 0.0898053  0.06297079 0.07508717 0.05056089 0.07073477 0.12577511
 0.1046634  0.05908336 0.11167291 0.09088762 0.1130559  0.07847799
 0.12871879 0.08978746 0.0907545  0.0952111  0.11483435 0.10683442
 0.07119932 0.097


hyperparameters are not tuned yet

running the Grid Search for Random Forest CV classifier ...

final score for the tuned model
 0.42328042328042326

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.4, 'n_estimators': 400}

cross validation results (MEAN)
 [0.33873245 0.35149573 0.38555021 0.36030983 0.37440731 0.35630342
 0.35790598 0.36271368 0.32421907 0.36369302 0.39197828 0.33610602
 0.37942104 0.39213853 0.39179513 0.4093661  0.41396902 0.33720111
 0.42597044 0.41755698 0.39335317 0.41715634 0.44132072 0.4099448
 0.41347044 0.39015822 0.41557667 0.41790039 0.38869429 0.3684562
 0.34287241 0.37572497 0.35109763 0.39255189 0.39393697 0.39022817]

cross validation results (STD)
 [0.06630864 0.09393697 0.06543644 0.06299499 0.08961641 0.05811787
 0.05591613 0.05364435 0.10348381 0.09698665 0.08840488 0.08615926
 0.06503031 0.09857732 0.08891508 0.09387844 0.13395343 0.09253081
 0.09833158 0.08707118 0.08731221 0.08718657 0.09981118


hyperparameters are not tuned yet

running the Grid Search for Random Forest CV classifier ...

final score for the tuned model
 0.35978835978835977

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.2, 'n_estimators': 100}

cross validation results (MEAN)
 [0.4108313  0.4440845  0.41884412 0.4222095  0.4048166  0.43080103
 0.44368386 0.42947828 0.42498728 0.43776709 0.45704238 0.44219577
 0.42510684 0.44456019 0.45337429 0.4625318  0.48515084 0.41739672
 0.44490359 0.41427172 0.45069063 0.47206197 0.44984356 0.45395299
 0.45842236 0.47398504 0.44902447 0.46505011 0.38650794 0.44936788
 0.45852539 0.47495167 0.40787927 0.43474512 0.43787012 0.46751755]

cross validation results (STD)
 [0.04310933 0.0802922  0.04399341 0.06063221 0.08572919 0.11657622
 0.08499374 0.08007326 0.0913731  0.0502633  0.07013784 0.05854132
 0.09916633 0.05704514 0.0716679  0.06955575 0.06909949 0.06463321
 0.07859954 0.06468142 0.11500297 0.09895301 0.061709

In [11]:
line = '../RESULTS/PV-DBOW/' 
names = ['25_5_5_0', '25_5_10_0','25_5_0_1',
         '50_5_5_0', '50_5_10_0','50_5_0_1',
         '75_5_5_0', '75_5_10_0','75_5_0_1',
         '100_5_5_0', '100_5_10_0','100_5_0_1']

for i in names:
    print (i)
    path = os.path.join(line, i)
    X_train = np.load(os.path.join(path, 'vectors_train.npy'))
    X_dev = np.load(os.path.join(path, 'vectors_dev.npy'))
    y_train = np.load(os.path.join(path, 'labels_train.npy'))
    y_dev = np.load(os.path.join(path, 'labels_dev.npy'))
    y_train = np.ravel(y_train)
    y_dev = np.ravel(y_dev)
    
    feature_name = 'TEXT_'+str(i)
    save_dir = '../RESULTS/PV-DBOW/' 
    random_forest = RandomForestCV(feature_name, X_train, y_train, X_dev, y_dev, save_dir)
    random_forest.run()
    print ("------"  * 10)

25_5_5_0

hyperparameters are not tuned yet

running the Grid Search for Random Forest CV classifier ...

final score for the tuned model
 0.48412698412698413

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.4, 'n_estimators': 100}

cross validation results (MEAN)
 [0.38046144 0.38400488 0.38440553 0.37541336 0.38500076 0.37342796
 0.39524573 0.38568758 0.39746642 0.37899115 0.40652091 0.38894994
 0.40582774 0.36575855 0.3712645  0.34864545 0.37979879 0.3968661
 0.36142654 0.39628739 0.46898148 0.40301307 0.40237714 0.44003739
 0.36278872 0.38814866 0.42430937 0.39419897 0.40856481 0.43216829
 0.42569444 0.39540598 0.38217974 0.39065553 0.35905703 0.40039174]

cross validation results (STD)
 [0.07400516 0.07609741 0.0833356  0.07042404 0.10400655 0.07198909
 0.09989018 0.07154257 0.07612881 0.10059399 0.1046911  0.08647301
 0.06745409 0.06773895 0.08097887 0.0988753  0.0923493  0.12106719
 0.10118666 0.12061672 0.05830537 0.08070596 


final score for the tuned model
 0.36772486772486773

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.2, 'n_estimators': 200}

cross validation results (MEAN)
 [0.40323947 0.40347477 0.40843127 0.41718814 0.37941595 0.38138863
 0.40818707 0.40957596 0.34692842 0.3911948  0.36519765 0.37343305
 0.33622685 0.34870905 0.38473621 0.38531492 0.34315222 0.44072421
 0.40142196 0.36179792 0.35391738 0.36668066 0.37735551 0.364535
 0.36289047 0.35479879 0.38166082 0.36824379 0.41025259 0.38019943
 0.31759259 0.366846   0.36589082 0.30024929 0.32525438 0.34580662]

cross validation results (STD)
 [0.08628875 0.07077564 0.05074729 0.07713514 0.05962311 0.06896129
 0.07128116 0.0524361  0.08640992 0.08699727 0.06205622 0.03342587
 0.06337578 0.06456923 0.06132135 0.04755793 0.07803374 0.11639397
 0.10235131 0.09034307 0.08892644 0.07011872 0.09930207 0.07747486
 0.06446427 0.04618191 0.11079221 0.08795312 0.08934053 0.1022245
 0.10701484 0.0766

In [47]:
#combined model avg
models1 = [
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=25, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=50, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=75, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05),
    doc2vec.Doc2Vec(dm=1, vector_size=100, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0,alpha=0.05)
]

models2 = [
    doc2vec.Doc2Vec(dm=0, vector_size=25, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=25, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=25, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=0, vector_size=50, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=50, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=50, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=0, vector_size=75, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=75, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=75, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=0, vector_size=100, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=100, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=100, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
]

In [48]:
count = 0
for model in models1:
    print("\nbuilding vocabulary for doc2vec model %i" %count)
    model.build_vocab(all_docs)
    print("\nvocabulary scanned & built.")
    count += 1
    
count = 0
for model in models2:
    print("\nbuilding vocabulary for doc2vec model %i" %count)
    model.build_vocab(all_docs)
    print("\nvocabulary scanned & built.")
    count += 1


building vocabulary for doc2vec model 0

vocabulary scanned & built.

building vocabulary for doc2vec model 1

vocabulary scanned & built.

building vocabulary for doc2vec model 2

vocabulary scanned & built.

building vocabulary for doc2vec model 3

vocabulary scanned & built.

building vocabulary for doc2vec model 4

vocabulary scanned & built.

building vocabulary for doc2vec model 5

vocabulary scanned & built.

building vocabulary for doc2vec model 6

vocabulary scanned & built.

building vocabulary for doc2vec model 7

vocabulary scanned & built.

building vocabulary for doc2vec model 8

vocabulary scanned & built.

building vocabulary for doc2vec model 9

vocabulary scanned & built.

building vocabulary for doc2vec model 10

vocabulary scanned & built.

building vocabulary for doc2vec model 11

vocabulary scanned & built.

building vocabulary for doc2vec model 12

vocabulary scanned & built.

building vocabulary for doc2vec model 13

vocabulary scanned & built.

building vocabu

In [49]:
from gensim.test.test_doc2vec import ConcatenatedDoc2Vec

models = [
    ConcatenatedDoc2Vec([models2[0], models1[0]]),
    ConcatenatedDoc2Vec([models2[1], models1[1]]),
    ConcatenatedDoc2Vec([models2[2], models1[2]]),
    ConcatenatedDoc2Vec([models2[0], models1[3]]),
    ConcatenatedDoc2Vec([models2[1], models1[4]]),
    ConcatenatedDoc2Vec([models2[2], models1[5]]),
    
    ConcatenatedDoc2Vec([models2[3], models1[6]]),
    ConcatenatedDoc2Vec([models2[4], models1[7]]),
    ConcatenatedDoc2Vec([models2[5], models1[8]]),
    ConcatenatedDoc2Vec([models2[3], models1[9]]),
    ConcatenatedDoc2Vec([models2[4], models1[10]]),
    ConcatenatedDoc2Vec([models2[5], models1[11]]),
    
    ConcatenatedDoc2Vec([models2[6], models1[12]]),
    ConcatenatedDoc2Vec([models2[7], models1[13]]),
    ConcatenatedDoc2Vec([models2[8], models1[14]]),
    ConcatenatedDoc2Vec([models2[6], models1[15]]),
    ConcatenatedDoc2Vec([models2[7], models1[16]]),
    ConcatenatedDoc2Vec([models2[8], models1[17]]),
    
    ConcatenatedDoc2Vec([models2[9], models1[18]]),
    ConcatenatedDoc2Vec([models2[10], models1[19]]),
    ConcatenatedDoc2Vec([models2[11], models1[20]]),
    ConcatenatedDoc2Vec([models2[9], models1[21]]),
    ConcatenatedDoc2Vec([models2[10], models1[22]]),
    ConcatenatedDoc2Vec([models2[11], models1[23]])

]

In [50]:
from gensim.models.callbacks import CallbackAny2Vec
class EpochLogger(CallbackAny2Vec):
    '''Callback to log information about training'''
    
    def __init__(self):
        self.epoch = 0
    
    def on_epoch_begin(self, model):
        print("Epoch #{} start".format(self.epoch))
    
    def on_epoch_end(self, model):
        print("Epoch #{} end".format(self.epoch))
        self.epoch += 1
        
epoch_logger = EpochLogger()
count = 0
for model in models1:
    model.train(all_docs, total_examples=len(all_docs), epochs=100)
    print("\nmodel %i trained" %count)
    count += 1
    
count = 0
for model in models2:
    model.train(all_docs, total_examples=len(all_docs), epochs=100)
    print("\nmodel %i trained" %count)
    count += 1


model 0 trained

model 1 trained

model 2 trained

model 3 trained

model 4 trained

model 5 trained

model 6 trained

model 7 trained

model 8 trained

model 9 trained

model 10 trained

model 11 trained

model 12 trained

model 13 trained

model 14 trained

model 15 trained

model 16 trained

model 17 trained

model 18 trained

model 19 trained

model 20 trained

model 21 trained

model 22 trained

model 23 trained

model 0 trained

model 1 trained

model 2 trained

model 3 trained

model 4 trained

model 5 trained

model 6 trained

model 7 trained

model 8 trained

model 9 trained

model 10 trained

model 11 trained


In [52]:
count = 0
for model in models:
    infer_embedding_combined(model, models1[count], 'train', 'PV-COMBINED-AV')
    infer_embedding_combined(model, models1[count], 'dev', 'PV-COMBINED-AV')
    print("\nembedding inffered for model %i" %count)
    count += 1

saving in  ../RESULTS/PV-COMBINED-AV
Directory  ../RESULTS/PV-COMBINED-AV/25_5_5_0  Created 


  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL



saving inferred vectors and labels to file
saving in  ../RESULTS/PV-COMBINED-AV
Directory  ../RESULTS/PV-COMBINED-AV/25_5_5_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 0
saving in  ../RESULTS/PV-COMBINED-AV
Directory  ../RESULTS/PV-COMBINED-AV/25_5_10_0  Created 

saving inferred vectors and labels to file
saving in  ../RESULTS/PV-COMBINED-AV
Directory  ../RESULTS/PV-COMBINED-AV/25_5_10_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 1
saving in  ../RESULTS/PV-COMBINED-AV
Directory  ../RESULTS/PV-COMBINED-AV/25_5_0_1  Created 

saving inferred vectors and labels to file
saving in  ../RESULTS/PV-COMBINED-AV
Directory  ../RESULTS/PV-COMBINED-AV/25_5_0_1  already exists

saving inferred vectors and labels to file

embedding inffered for model 2
saving in  ../RESULTS/PV-COMBINED-AV
Directory  ../RESULTS/PV-COMBINED-AV/25_10_5_0  Created 

saving inferred vectors and labels to file
saving in  ../RES

In [12]:
line = '../RESULTS/PV-COMBINED-AV/' 
names = ['25_5_5_0', '25_5_10_0','25_5_0_1','25_10_5_0','25_10_10_0','25_10_0_1',
         '50_5_5_0', '50_5_10_0','50_5_0_1','50_10_5_0','50_10_10_0','50_10_0_1',
         '75_5_5_0', '75_5_10_0','75_5_0_1','75_10_5_0','75_10_10_0','75_10_0_1',
         '100_5_5_0', '100_5_10_0','100_5_0_1','100_10_5_0','100_10_10_0','100_10_0_1']
for i in names:
    path = os.path.join(line, i)
    X_train = np.load(os.path.join(path, 'vectors_train.npy'))
    X_dev = np.load(os.path.join(path, 'vectors_dev.npy'))
    y_train = np.load(os.path.join(path, 'labels_train.npy'))
    y_dev = np.load(os.path.join(path, 'labels_dev.npy'))
    y_train = np.ravel(y_train)
    y_dev = np.ravel(y_dev)
    
    feature_name = 'TEXT_'+str(i)
    save_dir = '../RESULTS/PV-COMBINED-AV/' 
    random_forest = RandomForestCV(feature_name, X_train, y_train, X_dev, y_dev, save_dir)
    random_forest.run()
    print ("------"  * 10)


hyperparameters are not tuned yet

running the Grid Search for Random Forest CV classifier ...

final score for the tuned model
 0.41798941798941797

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.1, 'n_estimators': 800}

cross validation results (MEAN)
 [0.39400565 0.42782484 0.41888991 0.44487434 0.36962378 0.39497227
 0.36973189 0.38655881 0.41495854 0.39086284 0.38615817 0.40470849
 0.41354421 0.42190044 0.39028414 0.41552452 0.39062754 0.41377951
 0.42207341 0.40861187 0.36580052 0.396192   0.39683303 0.41093559
 0.38402396 0.36097502 0.40843381 0.44655703 0.39887057 0.41313848
 0.4211513  0.42932947 0.40085597 0.41712836 0.41086055 0.41250254]

cross validation results (STD)
 [0.09898925 0.09563343 0.06078618 0.05697607 0.08907156 0.08825231
 0.10961651 0.09902808 0.07284685 0.09061424 0.10819788 0.11148745
 0.08890736 0.12899902 0.08886897 0.0612755  0.09943719 0.06525882
 0.10906441 0.10143414 0.13458159 0.14561434 0.137450

  'precision', 'predicted', average, warn_for)



final score for the tuned model
 0.41269841269841273

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 2, 'max_features': 0.2, 'n_estimators': 200}

cross validation results (MEAN)
 [0.34640822 0.34624288 0.37595772 0.37577966 0.35975402 0.42461844
 0.42289504 0.37497838 0.36973062 0.37140822 0.37894536 0.35558735
 0.35220035 0.38445258 0.38277371 0.39879935 0.38817791 0.38197243
 0.37872278 0.38312983 0.3545813  0.35163818 0.34610551 0.35125661
 0.3682616  0.33385099 0.37035384 0.37762261 0.36129045 0.34829187
 0.40115868 0.34987663 0.38667201 0.39595035 0.37266865 0.33402905]

cross validation results (STD)
 [0.02470239 0.02625602 0.02753434 0.04130001 0.08748357 0.04203987
 0.0464605  0.02963446 0.07404613 0.08608235 0.0536676  0.03361227
 0.06360514 0.06466809 0.06909502 0.05050852 0.07959968 0.04135613
 0.04533073 0.06002349 0.05865348 0.07201123 0.03859734 0.04156334
 0.07099678 0.0337607  0.05426447 0.0504656  0.04804607 0.04362916
 0.03815063 0.0


final score for the tuned model
 0.4682539682539682

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.2, 'n_estimators': 100}

cross validation results (MEAN)
 [0.3638253  0.31363706 0.35546907 0.33120803 0.32614723 0.35563441
 0.35489036 0.33102997 0.35260735 0.32895172 0.36498652 0.35429004
 0.3117356  0.35497176 0.35675748 0.34834402 0.31518747 0.33826949
 0.36304691 0.33271266 0.31049934 0.32881436 0.33935185 0.32618793
 0.28413334 0.35328907 0.35062704 0.36036325 0.37254782 0.36721357
 0.34220849 0.33578042 0.34533857 0.32544388 0.34896088 0.34691824]

cross validation results (STD)
 [0.06845308 0.05236207 0.06991181 0.08369373 0.05290243 0.07748764
 0.09682403 0.08053591 0.06510199 0.07314808 0.02181762 0.08687452
 0.11923196 0.10741581 0.0760869  0.06276231 0.04268125 0.026653
 0.04728227 0.03591697 0.03304009 0.04794924 0.04969848 0.05198965
 0.08418472 0.10273881 0.05658226 0.05863942 0.0639882  0.09265139
 0.1164663  0.0956

  'precision', 'predicted', average, warn_for)



hyperparameters are not tuned yet

running the Grid Search for Random Forest CV classifier ...

final score for the tuned model
 0.41534391534391535

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.1, 'n_estimators': 100}

cross validation results (MEAN)
 [0.34901557 0.3838141  0.34776277 0.34908552 0.29653795 0.31405169
 0.33203602 0.36609051 0.3433748  0.32825855 0.37695869 0.3846281
 0.38604243 0.36836717 0.33242394 0.35115232 0.33390313 0.35584809
 0.35609737 0.33358135 0.31649242 0.31831247 0.3089769  0.31641102
 0.34224155 0.37030296 0.36938085 0.34908552 0.37802961 0.35152498
 0.32522512 0.36973698 0.32627315 0.32324481 0.33790827 0.3686368 ]

cross validation results (STD)
 [0.05084415 0.0475688  0.06224279 0.07679146 0.07190102 0.06174754
 0.03779445 0.05703485 0.04313688 0.03932303 0.07498445 0.05057959
 0.08243271 0.07046889 0.06511878 0.06138472 0.04354478 0.0862992
 0.04130637 0.03525632 0.02875531 0.03657511 0.02591557

  'precision', 'predicted', average, warn_for)



final score for the tuned model
 0.3650793650793651

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.2, 'n_estimators': 100}

cross validation results (MEAN)
 [0.34193758 0.33375941 0.33530474 0.33650666 0.37136879 0.36209046
 0.3280932  0.34606481 0.35866784 0.38683354 0.35243564 0.34098876
 0.31081985 0.33377722 0.41069394 0.39272232 0.41846637 0.35540039
 0.3645579  0.36107168 0.31861645 0.35243564 0.32033985 0.34516687
 0.29068096 0.30212149 0.31712836 0.35275488 0.38917379 0.35129095
 0.3849448  0.37040725 0.4088866  0.33791082 0.35300163 0.32800926]

cross validation results (STD)
 [0.05390996 0.04979156 0.05957396 0.05111309 0.07576072 0.04749638
 0.05769966 0.04746633 0.05510119 0.03608023 0.04129244 0.07570731
 0.08120232 0.0626888  0.02803752 0.07242244 0.04991397 0.02366139
 0.04131722 0.05529063 0.02707125 0.04567963 0.06697069 0.03608023
 0.03049293 0.01041296 0.05201823 0.04233957 0.02161574 0.044411
 0.04230307 0.0572


final score for the tuned model
 0.41269841269841273

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 2, 'max_features': 0.1, 'n_estimators': 800}

cross validation results (MEAN)
 [0.35510786 0.3799476  0.39562983 0.43808506 0.36087581 0.37770274
 0.41244404 0.38645961 0.37061203 0.37193478 0.388183   0.39585241
 0.42216372 0.3835521  0.37793803 0.395509   0.31984509 0.35832443
 0.37960419 0.37217008 0.38959224 0.39248703 0.39851445 0.38579696
 0.41438492 0.36954874 0.38418803 0.36846128 0.40164454 0.3992585
 0.4026684  0.36335597 0.36937068 0.38999796 0.38857855 0.36804792]

cross validation results (STD)
 [0.09453258 0.10761062 0.10354913 0.09286511 0.08173312 0.07915234
 0.06628236 0.06211113 0.04399172 0.0846136  0.0818289  0.06266996
 0.09425845 0.08922064 0.0740769  0.06748198 0.06298169 0.09384666
 0.07852704 0.08003568 0.03171583 0.07223441 0.07940734 0.08095477
 0.10437249 0.09601506 0.11022617 0.09538729 0.05946223 0.06313946
 0.08604745 0.07

  'precision', 'predicted', average, warn_for)



final score for the tuned model
 0.42857142857142855

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 2, 'max_features': 0.1, 'n_estimators': 100}

cross validation results (MEAN)
 [0.40060668 0.37193478 0.34635099 0.32283399 0.36183354 0.37866046
 0.38382937 0.33869429 0.32663436 0.33406848 0.37449379 0.3182209
 0.35841092 0.36762566 0.3503981  0.34460597 0.36051079 0.32579365
 0.34420533 0.34983211 0.34955103 0.34792557 0.33093839 0.3372062
 0.30091829 0.39132072 0.36567969 0.34999746 0.37143239 0.36221764
 0.35574379 0.34221993 0.33949557 0.36153083 0.38917506 0.36319063]

cross validation results (STD)
 [0.10738234 0.08315513 0.07981802 0.08922152 0.06404693 0.04646075
 0.07469708 0.0883596  0.03812269 0.0743048  0.04961605 0.05931656
 0.09060262 0.04187506 0.05664318 0.07296563 0.02309703 0.10258928
 0.07212424 0.06348686 0.05328879 0.03301911 0.04149917 0.04337529
 0.08957698 0.03423905 0.0517858  0.06804709 0.03097045 0.07382884
 0.08159531 0.050

  'precision', 'predicted', average, warn_for)



final score for the tuned model
 0.4444444444444444

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 2, 'max_features': 0.1, 'n_estimators': 200}

cross validation results (MEAN)
 [0.30524395 0.40096281 0.36121922 0.3273428  0.34613222 0.3019943
 0.30371769 0.29374618 0.352035   0.3239507  0.31026659 0.33515084
 0.37905601 0.26285231 0.27104319 0.36894205 0.34463395 0.33348469
 0.3483313  0.34414683 0.38344526 0.3562818  0.38338294 0.34961335
 0.3354828  0.37916412 0.39502951 0.37591321 0.37786427 0.36024751
 0.3124911  0.37188899 0.34810872 0.32819623 0.34327941 0.36863935]

cross validation results (STD)
 [0.14073206 0.07863415 0.092058   0.11645886 0.0930913  0.05967647
 0.08210576 0.10240101 0.04570146 0.08998969 0.11836935 0.09737946
 0.13532149 0.06306186 0.08761858 0.09160088 0.09238921 0.02139077
 0.03342972 0.07957909 0.05631452 0.0821502  0.10007142 0.05482036
 0.06094855 0.11441883 0.1274533  0.06495086 0.08415721 0.05772394
 0.10377061 0.059

In [55]:
models1 = [
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=25, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=50, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=75, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=5, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=5, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=5, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=10, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=10, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=10, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0)
]

models2 = [
    doc2vec.Doc2Vec(dm=0, vector_size=25, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=25, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=25, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=0, vector_size=50, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=50, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=50, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=0, vector_size=75, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=75, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=75, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
    doc2vec.Doc2Vec(dm=0, vector_size=100, negative=5, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=100, negative=10, hs=0, min_count=2, epochs=100, workers=cores,sample=0),
    doc2vec.Doc2Vec(dm=0, vector_size=100, negative=0, hs=1, min_count=2, epochs=100, workers=cores,sample=0),
    
]

In [56]:
from gensim.test.test_doc2vec import ConcatenatedDoc2Vec

models = [
    ConcatenatedDoc2Vec([models2[0], models1[0]]),
    ConcatenatedDoc2Vec([models2[1], models1[1]]),
    ConcatenatedDoc2Vec([models2[2], models1[2]]),
    ConcatenatedDoc2Vec([models2[0], models1[3]]),
    ConcatenatedDoc2Vec([models2[1], models1[4]]),
    ConcatenatedDoc2Vec([models2[2], models1[5]]),
    
    ConcatenatedDoc2Vec([models2[3], models1[6]]),
    ConcatenatedDoc2Vec([models2[4], models1[7]]),
    ConcatenatedDoc2Vec([models2[5], models1[8]]),
    ConcatenatedDoc2Vec([models2[3], models1[9]]),
    ConcatenatedDoc2Vec([models2[4], models1[10]]),
    ConcatenatedDoc2Vec([models2[5], models1[11]]),
    
    ConcatenatedDoc2Vec([models2[6], models1[12]]),
    ConcatenatedDoc2Vec([models2[7], models1[13]]),
    ConcatenatedDoc2Vec([models2[8], models1[14]]),
    ConcatenatedDoc2Vec([models2[6], models1[15]]),
    ConcatenatedDoc2Vec([models2[7], models1[16]]),
    ConcatenatedDoc2Vec([models2[8], models1[17]]),
    
    ConcatenatedDoc2Vec([models2[9], models1[18]]),
    ConcatenatedDoc2Vec([models2[10], models1[19]]),
    ConcatenatedDoc2Vec([models2[11], models1[20]]),
    ConcatenatedDoc2Vec([models2[9], models1[21]]),
    ConcatenatedDoc2Vec([models2[10], models1[22]]),
    ConcatenatedDoc2Vec([models2[11], models1[23]])

]

In [57]:
count = 0
for model in models1:
    print("\nbuilding vocabulary for doc2vec model %i" %count)
    model.build_vocab(all_docs)
    print("\nvocabulary scanned & built.")
    count += 1
    
count = 0
for model in models2:
    print("\nbuilding vocabulary for doc2vec model %i" %count)
    model.build_vocab(all_docs)
    print("\nvocabulary scanned & built.")
    count += 1


building vocabulary for doc2vec model 0

vocabulary scanned & built.

building vocabulary for doc2vec model 1

vocabulary scanned & built.

building vocabulary for doc2vec model 2

vocabulary scanned & built.

building vocabulary for doc2vec model 3

vocabulary scanned & built.

building vocabulary for doc2vec model 4

vocabulary scanned & built.

building vocabulary for doc2vec model 5

vocabulary scanned & built.

building vocabulary for doc2vec model 6

vocabulary scanned & built.

building vocabulary for doc2vec model 7

vocabulary scanned & built.

building vocabulary for doc2vec model 8

vocabulary scanned & built.

building vocabulary for doc2vec model 9

vocabulary scanned & built.

building vocabulary for doc2vec model 10

vocabulary scanned & built.

building vocabulary for doc2vec model 11

vocabulary scanned & built.

building vocabulary for doc2vec model 12

vocabulary scanned & built.

building vocabulary for doc2vec model 13

vocabulary scanned & built.

building vocabu

In [58]:
from gensim.models.callbacks import CallbackAny2Vec
class EpochLogger(CallbackAny2Vec):
    '''Callback to log information about training'''
    
    def __init__(self):
        self.epoch = 0
    
    def on_epoch_begin(self, model):
        print("Epoch #{} start".format(self.epoch))
    
    def on_epoch_end(self, model):
        print("Epoch #{} end".format(self.epoch))
        self.epoch += 1
        
epoch_logger = EpochLogger()
count = 0
for model in models1:
    model.train(all_docs, total_examples=len(all_docs), epochs=100)
    print("\nmodel %i trained" %count)
    count += 1
    
epoch_logger = EpochLogger()
count = 0
for model in models2:
    model.train(all_docs, total_examples=len(all_docs), epochs=100)
    print("\nmodel %i trained" %count)
    count += 1


model 0 trained

model 1 trained

model 2 trained

model 3 trained

model 4 trained

model 5 trained

model 6 trained

model 7 trained

model 8 trained

model 9 trained

model 10 trained

model 11 trained

model 12 trained

model 13 trained

model 14 trained

model 15 trained

model 16 trained

model 17 trained

model 18 trained

model 19 trained

model 20 trained

model 21 trained

model 22 trained

model 23 trained

model 0 trained

model 1 trained

model 2 trained

model 3 trained

model 4 trained

model 5 trained

model 6 trained

model 7 trained

model 8 trained

model 9 trained

model 10 trained

model 11 trained


In [59]:
count = 0
for model in models:
    infer_embedding_combined(model, models1[count], 'train', 'PV-COMBINED-CONC')
    infer_embedding_combined(model, models1[count], 'dev', 'PV-COMBINED-CONC')
    print("\nembedding inffered for model %i" %count)
    count += 1

saving in  ../RESULTS/PV-COMBINED-CONC
Directory  ../RESULTS/PV-COMBINED-CONC/25_5_5_0  Created 


  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL



saving inferred vectors and labels to file
saving in  ../RESULTS/PV-COMBINED-CONC
Directory  ../RESULTS/PV-COMBINED-CONC/25_5_5_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 0
saving in  ../RESULTS/PV-COMBINED-CONC
Directory  ../RESULTS/PV-COMBINED-CONC/25_5_10_0  Created 

saving inferred vectors and labels to file
saving in  ../RESULTS/PV-COMBINED-CONC
Directory  ../RESULTS/PV-COMBINED-CONC/25_5_10_0  already exists

saving inferred vectors and labels to file

embedding inffered for model 1
saving in  ../RESULTS/PV-COMBINED-CONC
Directory  ../RESULTS/PV-COMBINED-CONC/25_5_0_1  Created 

saving inferred vectors and labels to file
saving in  ../RESULTS/PV-COMBINED-CONC
Directory  ../RESULTS/PV-COMBINED-CONC/25_5_0_1  already exists

saving inferred vectors and labels to file

embedding inffered for model 2
saving in  ../RESULTS/PV-COMBINED-CONC
Directory  ../RESULTS/PV-COMBINED-CONC/25_10_5_0  Created 

saving inferred vectors and labels t

In [13]:
line = '../RESULTS/PV-COMBINED-CONC/' 
names = ['25_5_5_0', '25_5_10_0','25_5_0_1','25_10_5_0','25_10_10_0','25_10_0_1',
         '50_5_5_0', '50_5_10_0','50_5_0_1','50_10_5_0','50_10_10_0','50_10_0_1',
         '75_5_5_0', '75_5_10_0','75_5_0_1','75_10_5_0','75_10_10_0','75_10_0_1',
         '100_5_5_0', '100_5_10_0','100_5_0_1','100_10_5_0','100_10_10_0','100_10_0_1']
for i in names:
    path = os.path.join(line, i)
    X_train = np.load(os.path.join(path, 'vectors_train.npy'))
    X_dev = np.load(os.path.join(path, 'vectors_dev.npy'))
    y_train = np.load(os.path.join(path, 'labels_train.npy'))
    y_dev = np.load(os.path.join(path, 'labels_dev.npy'))
    y_train = np.ravel(y_train)
    y_dev = np.ravel(y_dev)
    
    feature_name = 'TEXT_'+str(i)
    save_dir = '../RESULTS/PV-COMBINED-CONC/' 
    random_forest = RandomForestCV(feature_name, X_train, y_train, X_dev, y_dev, save_dir)
    random_forest.run()
    print ("------"  * 10)


hyperparameters are not tuned yet

running the Grid Search for Random Forest CV classifier ...

final score for the tuned model
 0.41798941798941797

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.1, 'n_estimators': 800}

cross validation results (MEAN)
 [0.34795228 0.33643671 0.3534417  0.33621413 0.36608923 0.34532585
 0.37922009 0.37195131 0.36649751 0.34733415 0.35592567 0.35460292
 0.33853276 0.3549247  0.37930149 0.40304106 0.34802859 0.38675214
 0.40432311 0.38284875 0.39336335 0.38150438 0.36420177 0.38012439
 0.37443529 0.37700448 0.36532357 0.43936203 0.42796602 0.36744251
 0.37517933 0.39264728 0.40862714 0.39779838 0.37303368 0.42397741]

cross validation results (STD)
 [0.06275309 0.08641618 0.07264612 0.0737927  0.03721165 0.07650113
 0.12791404 0.11852791 0.08210765 0.09214296 0.09725729 0.09531692
 0.06556302 0.14635146 0.1337718  0.14182271 0.08308475 0.06816625
 0.07548971 0.07339079 0.06614477 0.05739646 0.072616


accuracy on training set: 1.0000

accuracy on development set: 0.4667

precision on dev set: 0.4444

recall on dev set: 0.4524

fscore on dev set: 0.4304
------------------------------------------------------------

hyperparameters are not tuned yet

running the Grid Search for Random Forest CV classifier ...

final score for the tuned model
 0.4021164021164021

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 4, 'max_features': 0.1, 'n_estimators': 100}

cross validation results (MEAN)
 [0.36984    0.36593661 0.35207443 0.36048789 0.32817333 0.34109687
 0.34500025 0.38666692 0.38623194 0.36389398 0.38873372 0.3704187
 0.40138889 0.3486251  0.34730235 0.38015491 0.37105973 0.36100427
 0.38544338 0.35611645 0.33659951 0.35457112 0.38155652 0.35944114
 0.3756105  0.36912012 0.36837607 0.38015491 0.33437882 0.32560541
 0.34259768 0.33854548 0.36684854 0.39303266 0.37526709 0.36917735]

cross validation results (STD)
 [0.10063966 0.09156692 0.0616121  0.0718


final score for the tuned model
 0.35714285714285715

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.4, 'n_estimators': 400}

cross validation results (MEAN)
 [0.37261523 0.38841829 0.39330103 0.3624911  0.39312296 0.37635328
 0.37653134 0.3843661  0.36581578 0.40277778 0.38733084 0.38658679
 0.40634921 0.40119302 0.38773148 0.39277956 0.37266229 0.3573807
 0.40722553 0.37555199 0.4088052  0.42158501 0.3957443  0.40415776
 0.36294516 0.35778134 0.38262108 0.39023326 0.38035969 0.37148326
 0.39119989 0.38204238 0.42328678 0.40960648 0.43404558 0.42667379]

cross validation results (STD)
 [0.05890461 0.05889727 0.06238021 0.05000062 0.07041375 0.08320892
 0.06538069 0.07320707 0.04534485 0.07896636 0.08353806 0.1002099
 0.02555427 0.04771963 0.02796276 0.07467823 0.04282901 0.04952024
 0.04361504 0.04409022 0.07440494 0.10122546 0.06185249 0.06984438
 0.08304844 0.05548102 0.07028581 0.06957733 0.05407223 0.06223591
 0.06135258 0.065


final score for the tuned model
 0.4021164021164021

best hyperparameters for the tuned model
 {'criterion': 'entropy', 'max_depth': 8, 'max_features': 0.1, 'n_estimators': 200}

cross validation results (MEAN)
 [0.36214133 0.31978785 0.33679284 0.31541005 0.3099855  0.32629095
 0.33430377 0.32002315 0.32742292 0.33010277 0.31160969 0.31713345
 0.36377824 0.30627671 0.32531161 0.32777905 0.33099562 0.30459783
 0.32274751 0.35616606 0.35252213 0.32875839 0.31020808 0.32686966
 0.33840812 0.36944572 0.33190502 0.33150438 0.35196632 0.32895808
 0.35748881 0.32176816 0.31206883 0.32734534 0.34270452 0.34049654]

cross validation results (STD)
 [0.09952215 0.07999854 0.06180823 0.05024051 0.06816735 0.04393327
 0.05190584 0.02246744 0.0867697  0.09783986 0.08231219 0.04144021
 0.09206697 0.05335652 0.08502516 0.08383169 0.05479643 0.08494486
 0.06716688 0.06512481 0.08127841 0.06445947 0.07857012 0.0605003
 0.10820713 0.08383169 0.09245372 0.08439032 0.06151741 0.06074369
 0.05663035 0.084