Packages:
* [NLTK](http://www.nltk.org/howto/classify.html)
* [SpaCy](https://spacy.io/)
* [AllenNLP](https://allennlp.org/tutorials)

Articles:
* [A Comprehensive Guide to Understand and Implement Text Classification in Python](https://www.analyticsvidhya.com/blog/2018/04/a-comprehensive-guide-to-understand-and-implement-text-classification-in-python/)
* [Machine Learning, NLP: Text Classification using scikit-learn, python and NLTK](https://towardsdatascience.com/machine-learning-nlp-text-classification-using-scikit-learn-python-and-nltk-c52b92a7c73a)
* [State-of-the-Art Text Classification using BERT model: “Predict the Happiness” Challenge](https://appliedmachinelearning.blog/2019/03/04/state-of-the-art-text-classification-using-bert-model-predict-the-happiness-hackerearth-challenge/)

In [1]:
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from sklearn import model_selection, preprocessing
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn import decomposition, ensemble
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from tqdm import tqdm
import numpy as np
import pandas as pd
import string
import time


stemmer = SnowballStemmer('english')
def lemmatize_stemming(text):
    return stemmer.stem(WordNetLemmatizer().lemmatize(text, pos='v'))

def preprocess(text):
    result = []
    result_stemmed = []
    for token in simple_preprocess(text, min_len = 2):
        result.append(token)
#         if token not in STOPWORDS:
        result_stemmed.append(lemmatize_stemming(token))
    
    return (result, result_stemmed)

with open("./data/raw/Hygiene/hygiene.dat.labels") as f:
    LABELS = [int(l) for l in f.readlines() if l[0].isdigit()]

ALL_RAW_TEXTS = []
ALL_TEXTS = []
ALL_STEMMED_TEXTS = []
ALL_CONCAT_STEMMED_TEXTS = []
LABELED_TEXTS = []
LABELED_CONCAT_TEXTS = []
LABELED_STEMMED_TEXTS = []
LABELED_CONCAT_STEMMED_TEXTS = []

with open("./data/raw/Hygiene/hygiene.dat") as f:
    ALL_RAW_TEXTS = f.readlines()

for _text in tqdm(ALL_RAW_TEXTS):
    _result, _result_stemmed = preprocess(_text)
    ALL_TEXTS.append(_result)
    ALL_STEMMED_TEXTS.append(_result_stemmed)

ALL_CONCAT_STEMMED_TEXTS = [" ".join(_text) for _text in ALL_STEMMED_TEXTS]

LABELED_TEXTS = ALL_TEXTS[0:len(LABELS)]
LABELED_CONCAT_TEXTS = [" ".join(_text) for _text in LABELED_TEXTS]

LABELED_STEMMED_TEXTS = ALL_STEMMED_TEXTS[0:len(LABELS)]
LABELED_CONCAT_STEMMED_TEXTS = [" ".join(_text) for _text in LABELED_STEMMED_TEXTS]

FEATURE_MORE =pd.read_csv("./data/raw/Hygiene/hygiene.dat.additional", header=None)

100%|██████████| 13299/13299 [15:03<00:00, 14.73it/s]  


In [2]:
# create a dataframe using texts and lables
labeled_df = pd.DataFrame()
labeled_df['concat_stemmed_text'] = LABELED_CONCAT_STEMMED_TEXTS
labeled_df['stemmed_text'] = LABELED_STEMMED_TEXTS
# labeled_df['concat_stemmed_text'] = LABELED_CONCAT_TEXTS
# labeled_df['stemmed_text'] = LABELED_TEXTS
labeled_df['label'] = LABELS

# split the dataset into training and validation datasets 
train_concat_stemmed_text, test_concat_stemmed_text, train_label, test_label = model_selection.train_test_split(labeled_df['concat_stemmed_text'], 
                                                                                  labeled_df['label'],
                                                                                  test_size = 0.2)
train_stemmed_text = labeled_df['stemmed_text'][train_concat_stemmed_text.index]
test_stemmed_text = labeled_df['stemmed_text'][test_concat_stemmed_text.index]

# # label encode the target variable 
# encoder = preprocessing.LabelEncoder()
# train_label = encoder.fit_transform(train_label)
# test_label = encoder.fit_transform(test_label)

In [3]:
# dictionary = corpora.Dictionary(processed_docs)
# print("Before prunn:%d"%(len(dictionary)))
# dictionary.filter_extremes(no_below = 2, no_above = 0.5)
# print("After prunn:%d"%(len(dictionary)))
# corpus = [dictionary.doc2bow(doc) for doc in processed_docs]

# Feature Engineering

## Count Vectors as features

In [4]:
class MyCountVectorizer:
    def __init__(self, max_df = 0.5):
        self.vect_model = CountVectorizer(analyzer='word', max_df = max_df)
        
    def fit(self, texts_concat):
        self.vect_model.fit(texts_concat)
    
    def transform(self, texts_concat):
        return self.vect_model.transform(texts_concat)

count_vect = MyCountVectorizer()
count_vect.fit(train_concat_stemmed_text)
train_count = count_vect.transform(train_concat_stemmed_text)
test_count =  count_vect.transform(test_concat_stemmed_text)

## TF-IDF Vectors as features

In [5]:
%%time

# word level tf-idf
class MyTfidfVectorizer(MyCountVectorizer):
    def __init__(self, analyzer='word', ngram_range = None, max_features=5000):
        if ngram_range is None:
            self.vect_model = TfidfVectorizer(analyzer = analyzer, max_features = max_features)
        else:
            self.vect_model = TfidfVectorizer(analyzer = analyzer, ngram_range = ngram_range, 
                                              max_features = max_features)
            
    def fit(self, texts_concat):   
        self.vect_model.fit(texts_concat)
        self.vocabulary = self.vect_model.vocabulary_


tfidf_vect = MyTfidfVectorizer()
tfidf_vect.fit(train_concat_stemmed_text)
train_tfidf =  tfidf_vect.transform(train_concat_stemmed_text)
test_tfidf =  tfidf_vect.transform(test_concat_stemmed_text)

# ngram level tf-idf 
tfidf_vect_ngram = MyTfidfVectorizer(ngram_range=(2,3))
tfidf_vect_ngram.fit(train_concat_stemmed_text)
train_tfidf_ngram =  tfidf_vect_ngram.transform(train_concat_stemmed_text)
test_tfidf_ngram =  tfidf_vect_ngram.transform(test_concat_stemmed_text)

# characters level tf-idf
tfidf_vect_ngram_chars = MyTfidfVectorizer(analyzer='char', ngram_range=(2,3))
tfidf_vect_ngram_chars.fit(train_concat_stemmed_text)
train_tfidf_ngram_chars =  tfidf_vect_ngram_chars.transform(train_concat_stemmed_text) 
test_tfidf_ngram_chars =  tfidf_vect_ngram_chars.transform(test_concat_stemmed_text) 

CPU times: user 14.4 s, sys: 227 ms, total: 14.6 s
Wall time: 14.1 s


## Word Embeddings

### Build from review corpus

In [6]:
# %%time

# from gensim.models.fasttext import FastText

# class MyFastTextTfidfVectorizer(MyCountVectorizer):
#     def __init__(self, tfidf_vectorizer, size = 100):
#         self.embedding_size = size
#         self.tfidf_vectorizer = tfidf_vectorizer
#         self.fasttext_model = FastText(size = size, window = 5, min_count = 5)

#     def tfidf2embedding(self, value_vector):
#         _weighted_value = np.zeros(self.embedding_size)
#         for key in self.tfidf_vectorizer.vocabulary:
#             _index = self.tfidf_vectorizer.vocabulary[key]
#             if value_vector[_index] != 0:
#                 _weighted_value += self.fasttext_model[key] * value_vector[_index]

#         return _weighted_value
    
#     def fit(self, texts):
#         _texts_concat = [" ".join(_text) for _text in texts]
#         self.tfidf_vectorizer = MyTfidfVectorizer()
#         self.tfidf_vectorizer.fit(_texts_concat)
        
#         self.fasttext_model.build_vocab(sentences = texts)
#         self.fasttext_model.train(sentences = texts, 
#                                   total_examples = len(texts), 
#                                   epochs=10)
        
#     def transform(self, texts):
#         _texts_concat = [" ".join(_text) for _text in texts]
#         _tfidf_values = self.tfidf_vectorizer.transform(_texts_concat)
#         return np.asarray([self.tfidf2embedding(_value.toarray()[0]) for _value in _tfidf_values])

# fasttext_tfidf_vect = MyFastTextTfidfVectorizer(tfidf_vect)
# fasttext_tfidf_vect.fit(ALL_STEMMED_TEXTS)
# train_fasttext_embedding = fasttext_tfidf_vect.transform(train_stemmed_text)
# test_fasttext_embedding = fasttext_tfidf_vect.transform(test_stemmed_text)

  from ipykernel import kernelapp as app


CPU times: user 22min 36s, sys: 13 s, total: 22min 49s
Wall time: 10min 29s


### Prebuilt Embedding

In [7]:
# %%time
# from keras.preprocessing import text, sequence
# from keras import layers, models, optimizers

# # load the pre-trained word-embedding vectors 
# embeddings_index = {}
# for i, line in enumerate(open('data/model/wiki-news-300d-1M.vec')):
#     values = line.split()
#     embeddings_index[values[0]] = np.asarray(values[1:], dtype='float32')

# # create a tokenizer 
# token = text.Tokenizer()
# token.fit_on_texts(LABELED_CONCAT_STEMMED_TEXTS)
# word_index = token.word_index

# # convert text to sequence of tokens and pad them to ensure equal length vectors 
# text_train_seq = sequence.pad_sequences(token.texts_to_sequences(train_text), maxlen=70)
# text_test_seq = sequence.pad_sequences(token.texts_to_sequences(test_text), maxlen=70)

# # create token-embedding mapping
# embedding_matrix = np.zeros((len(word_index) + 1, 300))
# for word, i in word_index.items():
#     embedding_vector = embeddings_index.get(word)
#     if embedding_vector is not None:
#         embedding_matrix[i] = embedding_vector

## Text / NLP based features

In [8]:
# %%time
# trainDF['char_count'] = trainDF['text'].apply(len)
# trainDF['word_count'] = trainDF['text'].apply(lambda x: len(x.split()))
# trainDF['word_density'] = trainDF['char_count'] / (trainDF['word_count']+1)
# trainDF['punctuation_count'] = trainDF['text'].apply(lambda x: len("".join(_ for _ in x if _ in string.punctuation))) 
# trainDF['title_word_count'] = trainDF['text'].apply(lambda x: len([wrd for wrd in x.split() if wrd.istitle()]))
# trainDF['upper_case_word_count'] = trainDF['text'].apply(lambda x: len([wrd for wrd in x.split() if wrd.isupper()]))

In [9]:
# %%time

# import textblob

# pos_family = {
#     'noun' : ['NN','NNS','NNP','NNPS'],
#     'pron' : ['PRP','PRP$','WP','WP$'],
#     'verb' : ['VB','VBD','VBG','VBN','VBP','VBZ'],
#     'adj' :  ['JJ','JJR','JJS'],
#     'adv' : ['RB','RBR','RBS','WRB']
# }

# # function to check and get the part of speech tag count of a words in a given sentence
# def check_pos_tag(x, flag):
#     cnt = 0
#     try:
#         wiki = textblob.TextBlob(x)
#         for tup in wiki.tags:
#             ppo = list(tup)[1]
#             if ppo in pos_family[flag]:
#                 cnt += 1
#     except:
#         pass
#     return cnt

# trainDF['noun_count'] = trainDF['text'].apply(lambda x: check_pos_tag(x, 'noun'))
# trainDF['verb_count'] = trainDF['text'].apply(lambda x: check_pos_tag(x, 'verb'))
# trainDF['adj_count'] = trainDF['text'].apply(lambda x: check_pos_tag(x, 'adj'))
# trainDF['adv_count'] = trainDF['text'].apply(lambda x: check_pos_tag(x, 'adv'))
# trainDF['pron_count'] = trainDF['text'].apply(lambda x: check_pos_tag(x, 'pron'))

## Topic Models as features

In [10]:
# # train a LDA Model
# lda_model = decomposition.LatentDirichletAllocation(n_components=20, learning_method='online', max_iter=20)
# X_topics = lda_model.fit_transform(text_train_count)
# topic_word = lda_model.components_
# vocab = count_vect.get_feature_names()

# # view the topic models
# n_top_words = 10
# topic_summaries = []
# for i, topic_dist in enumerate(topic_word):
#     topic_words = np.array(vocab)[np.argsort(topic_dist)][:-(n_top_words+1):-1]
#     topic_summaries.append(' '.join(topic_words))

In [11]:
%%time

import os
from gensim import corpora, models

class MyLDAVectorizer(MyCountVectorizer):
    mallet_path = ".." + os.sep + "mallet-2.0.8"+ os.sep + "bin" + os.sep +"mallet"
    
    def __init__(self, TOPIC_COUNT = 100):
        self.topic_count = TOPIC_COUNT
    
    def fit(self, texts):
        self.dictionary = corpora.Dictionary(texts)
        _corpus = [self.dictionary.doc2bow(_doc) for _doc in texts]
        self.tfidf_model = models.TfidfModel(_corpus)
        _tfidf_corpus = self.tfidf_model[_corpus]

#         self.vect_model = models.LdaModel(_tfidf_corpus, 
#                             num_topics = self.topic_count, 
#                             id2word = self.dictionary,
#                             random_state = 100,
#                             eval_every = 5, 
#                             alpha = 'auto', 
#                             gamma_threshold = 0.01)
        
        self.vect_model = models.wrappers.LdaMallet(self.mallet_path, 
                                                     corpus = _corpus, 
                                                     num_topics = self.topic_count, 
                                                     id2word = self.dictionary)
    
    def toarray(self, doc_topics):
        _doc_vect  = np.zeros((len(doc_topics), self.topic_count))
        
        for i, _doc in enumerate(doc_topics):
            for _topic, _weight in _doc:
                _doc_vect[i][_topic] = _weight
        
        return _doc_vect
        
    def transform(self, texts):
        _corpus = [self.dictionary.doc2bow(_doc) for _doc in texts]
#         _tfidf_corpus = self.tfidf_model[_corpus]
        
        return self.toarray(self.vect_model[_corpus])

lda_vect = MyLDAVectorizer()
lda_vect.fit(train_stemmed_text)
train_lda = lda_vect.transform(train_stemmed_text)
test_lda = lda_vect.transform(test_stemmed_text)

CPU times: user 5.57 s, sys: 211 ms, total: 5.78 s
Wall time: 57.1 s


## Doc2Vec

In [12]:
%%time

from gensim.models.doc2vec import Doc2Vec, TaggedDocument
import numpy as np

class MyDoc2Vectorizer(MyCountVectorizer):
    def __init__(self, size = 100):
        self.embedding_size = size
            
    def fit(self, texts):
        _docs = [TaggedDocument(_doc, [i]) for i, _doc in enumerate(texts)]
        self.vect_model = Doc2Vec(_docs, 
                                  vector_size = self.embedding_size, 
#                                   window = 8,
                                  epochs=40, 
                                  workers=4)
        
    def transform(self, texts):
        return np.asarray([self.vect_model.infer_vector(_text) for _text in texts])

doc2vec_vect = MyDoc2Vectorizer(size = 200)
# doc2vec_vect.fit(ALL_STEMMED_TEXTS)
doc2vec_vect.fit(train_stemmed_text)
train_doc2vec = doc2vec_vect.transform(train_stemmed_text)
test_doc2vec = doc2vec_vect.transform(test_stemmed_text)

CPU times: user 50 s, sys: 634 ms, total: 50.7 s
Wall time: 19.4 s


In [13]:
#Test quality

import collections

ranks = []
for doc_id in range(len(train_stemmed_text)):
    inferred_vector = doc2vec_vect.vect_model.infer_vector(train_stemmed_text[train_stemmed_text.index[doc_id]])
    sims = doc2vec_vect.vect_model.docvecs.most_similar([inferred_vector], topn=len(doc2vec_vect.vect_model.docvecs))
    rank = [docid for docid, sim in sims].index(doc_id)
    ranks.append(rank)

collections.Counter(ranks)

Counter({0: 436})

# Models

In [113]:
from sklearn import linear_model, naive_bayes, metrics, svm

def show_score(classifier_name, scores):
    print("Accuracy:%0.2f Precission:%0.2f Recall:%0.2f F1:%0.2f"%scores, "-> [%s]"%(classifier_name))
    
def train_model(classifier, train_feature, train_label, test_feature, test_label, is_neural_net=False):
#     print(train_feature)
    # fit the training dataset on the classifier
    classifier.fit(train_feature, train_label)
    
    # predict the labels on validation dataset
    predictions = classifier.predict(test_feature)
    
    if is_neural_net:
        predictions = predictions.argmax(axis=-1)
    print(metrics.confusion_matrix(predictions, test_label))
    return (metrics.accuracy_score(predictions, test_label),
            metrics.precision_score(predictions, test_label),
            metrics.recall_score(predictions, test_label),
            metrics.f1_score(predictions, test_label))

def run_model(classfier_configs):
    for _name in classfier_configs:
#         print(classfier_configs[_name].train)
        scores = train_model(classfier_configs[_name][0], classfier_configs[_name][1], train_label, classfier_configs[_name][2], test_label)
        show_score(_name, scores)
        
class ClassifierConfig:
     def __init__(self, classifier, train, test):
            self.classifier = classifier
            self.train = train
            self.test = test

## Naive Bayes 

In [114]:
nb_config = {
    "NB Count": (naive_bayes.MultinomialNB(), train_count, test_count),
    "NB TFIDF": (naive_bayes.MultinomialNB(), train_tfidf, test_tfidf),
    "NB TFIDF NGram": (naive_bayes.MultinomialNB(), train_tfidf_ngram, test_tfidf_ngram),
    "NB TFIDF NGram Chars": (naive_bayes.MultinomialNB(), train_tfidf_ngram_chars, test_tfidf_ngram_chars),
    "NB LDA": (naive_bayes.MultinomialNB(), train_lda, test_lda),
}

run_model(nb_config)

[[28 20]
 [30 32]]
Accuracy:0.55 Precission:0.62 Recall:0.52 F1:0.56 -> [NB Count]
[[10  6]
 [48 46]]
Accuracy:0.51 Precission:0.88 Recall:0.49 F1:0.63 -> [NB TFIDF]
[[27 22]
 [31 30]]
Accuracy:0.52 Precission:0.58 Recall:0.49 F1:0.53 -> [NB TFIDF NGram]
[[ 4  2]
 [54 50]]
Accuracy:0.49 Precission:0.96 Recall:0.48 F1:0.64 -> [NB TFIDF NGram Chars]
[[25 16]
 [33 36]]
Accuracy:0.55 Precission:0.69 Recall:0.52 F1:0.60 -> [NB LDA]


Accuracy:0.55 Precission:0.62 Recall:0.52 F1:0.56 -> [NB Count]
Accuracy:0.51 Precission:0.88 Recall:0.49 F1:0.63 -> [NB TFIDF]
Accuracy:0.52 Precission:0.58 Recall:0.49 F1:0.53 -> [NB TFIDF NGram]
Accuracy:0.49 Precission:0.96 Recall:0.48 F1:0.64 -> [NB TFIDF NGram Chars]
Accuracy:0.55 Precission:0.69 Recall:0.52 F1:0.60 -> [NB LDA]

##  Linear Classifier

In [138]:
from sklearn.linear_model import LogisticRegression

linear_parameters = {'penalty':('l1', 'l2'), 'C':[10, 1, 0.1, 0.01]}
linear_model = GridSearchCV(LogisticRegression(solver='saga'), scoring = "f1",param_grid = linear_parameters, cv=5)

linear_model.fit(train_count, train_label)

predictions = linear_model.predict(test_count)

(metrics.accuracy_score(predictions, test_label),
            metrics.precision_score(predictions, test_label),
            metrics.recall_score(predictions, test_label),
            metrics.f1_score(predictions, test_label))

print(linear_model.best_params_)



(0.5454545454545454,
 0.6730769230769231,
 0.5147058823529411,
 0.5833333333333334)

In [137]:
linear_config = {
    "Linear Count": (LogisticRegression(C= 0.1, penalty='l1', tol=0.1, solver='saga'), train_count, test_count)
    , "Linear TFIDF": (LogisticRegression(C= 0.1, penalty='l2', tol=0.1, solver='saga'), train_tfidf, test_tfidf)
    , "Linear TFIDF NGram": (LogisticRegression(C= 0.1, penalty='l2', tol=0.1, solver='saga'), train_tfidf_ngram, test_tfidf_ngram)
    , "Linear TFIDF NGram Chars": (LogisticRegression(C= 0.1, penalty='l2', tol=0.1, solver='saga'), train_tfidf_ngram_chars, test_tfidf_ngram_chars)
#     , "Linear FastText Embedding": (LogisticRegression(solver='lbfgs', max_iter=int(1e6)), train_fasttext_embedding, test_fasttext_embedding)
    , "Linear LDA": (LogisticRegression(solver='lbfgs'), train_lda, test_lda)
    , "Linear Doc2Vec": (LogisticRegression(solver='lbfgs'), train_doc2vec, test_doc2vec)
}

# linear_classifiers = {
#     "Linear Count": (LogisticRegression(), train_count, test_count)
#     , "Linear TFIDF": (LogisticRegression(), train_tfidf, test_tfidf)
#     , "Linear TFIDF NGram": (LogisticRegression(), train_tfidf_ngram, test_tfidf_ngram)
#     , "Linear TFIDF NGram Chars": (LogisticRegression(), train_tfidf_ngram_chars, test_tfidf_ngram_chars)
#     , "Linear FastText Embedding": (LogisticRegression(), train_fasttext_embedding, test_fasttext_embedding)
#     , "Linear LDA": (LogisticRegression(), train_lda, test_lda)
#     , "Linear Doc2Vec": (LogisticRegression(), train_doc2vec, test_doc2vec)
# }

run_model(linear_config)

[[ 2  1]
 [56 51]]
Accuracy:0.48 Precission:0.98 Recall:0.48 F1:0.64 -> [Linear Count]
[[25 18]
 [33 34]]
Accuracy:0.54 Precission:0.65 Recall:0.51 F1:0.57 -> [Linear TFIDF]
[[30 20]
 [28 32]]
Accuracy:0.56 Precission:0.62 Recall:0.53 F1:0.57 -> [Linear TFIDF NGram]
[[21 13]
 [37 39]]
Accuracy:0.55 Precission:0.75 Recall:0.51 F1:0.61 -> [Linear TFIDF NGram Chars]
[[35 27]
 [23 25]]
Accuracy:0.55 Precission:0.48 Recall:0.52 F1:0.50 -> [Linear FastText Embedding]
[[28 20]
 [30 32]]
Accuracy:0.55 Precission:0.62 Recall:0.52 F1:0.56 -> [Linear LDA]
[[28 34]
 [30 18]]
Accuracy:0.42 Precission:0.35 Recall:0.38 F1:0.36 -> [Linear Doc2Vec]


## SVM

In [153]:
from sklearn.svm import SVC

svm_parameters = {'kernel':('linear', 'rbf'), 'C':[0.1, 1, 10, 100], "gamma": np.logspace(-2, 2, 5)}
svm_model = GridSearchCV(SVC(), scoring = "f1", param_grid = svm_parameters, cv=5)

svm_model.fit(train_tfidf_ngram_chars, train_label)

predictions = svm_model.predict(test_tfidf_ngram_chars)

print(metrics.accuracy_score(predictions, test_label),
            metrics.precision_score(predictions, test_label),
            metrics.recall_score(predictions, test_label),
            metrics.f1_score(predictions, test_label))
print(svm_model.best_params_)



0.4818181818181818 0.8653846153846154 0.47368421052631576 0.6122448979591837
{'C': 1, 'gamma': 0.01, 'kernel': 'linear'}


In [158]:
svm_config = {
    "SVM Count": (SVC(C = 1, gamma = 0.1, kernel='rbf'), train_count, test_count),
    "SVM TFIDF": (SVC(C = 10, gamma = 0.01, kernel='rbf'), train_tfidf, test_tfidf),
    "SVM TFIDF NGram": (SVC(C = 1, gamma = 0.01, kernel='linear'), train_tfidf_ngram, test_tfidf_ngram),
    "SVM TFIDF NGram Chars": (SVC(C = 10, gamma = 0.01, kernel='rbf'), train_tfidf_ngram_chars, test_tfidf_ngram_chars),
#     "SVM FastText Embedding": (SVC(C = 1, gamma = 0.1, kernel='rbf'), train_fasttext_embedding, test_fasttext_embedding),
    "SVM LDA": (SVC(C = 1, gamma = 0.1, kernel='rbf'), train_lda, test_lda),
    "SVM Doc2Vec": (SVC(C = 1, gamma = 0.1, kernel='rbf'), train_doc2vec, test_doc2vec),
}
    
run_model(svm_config)

[[ 2  4]
 [56 48]]
Accuracy:0.45 Precission:0.92 Recall:0.46 F1:0.62 -> [SVM Count]
[[19  8]
 [39 44]]
Accuracy:0.57 Precission:0.85 Recall:0.53 F1:0.65 -> [SVM TFIDF]
[[42 25]
 [16 27]]
Accuracy:0.63 Precission:0.52 Recall:0.63 F1:0.57 -> [SVM TFIDF NGram]
[[ 0  0]
 [58 52]]
Accuracy:0.47 Precission:1.00 Recall:0.47 F1:0.64 -> [SVM TFIDF NGram Chars]
[[ 0  0]
 [58 52]]
Accuracy:0.47 Precission:1.00 Recall:0.47 F1:0.64 -> [SVM LDA]
[[16 11]
 [42 41]]
Accuracy:0.52 Precission:0.79 Recall:0.49 F1:0.61 -> [SVM Doc2Vec]


## Boosting

In [173]:
# %%time

# from xgboost import XGBClassifier

# xgb_parameters = {
#         'min_child_weight': [1, 5, 10],
#         'gamma': [0.5, 1, 1.5, 2, 5],
#         'subsample': [0.6, 0.8, 1.0],
#         'colsample_bytree': [0.6, 0.8, 1.0],
#         'max_depth': [3, 4, 5]
#         }

# xgb_model = GridSearchCV(XGBClassifier(), scoring = "f1", param_grid = xgb_parameters, cv=5, verbose = 3)

# xgb_model.fit(train_count, train_label)

# predictions = xgb_model.predict(test_count)

# print(metrics.accuracy_score(predictions, test_label),
#             metrics.precision_score(predictions, test_label),
#             metrics.recall_score(predictions, test_label),
#             metrics.f1_score(predictions, test_label))

# print(xgb_model.best_params_)

Fitting 5 folds for each of 405 candidates, totalling 2025 fits
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.5679012345679012, total=   3.0s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.1s remaining:    0.0s


[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.651685393258427, total=   2.9s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    6.0s remaining:    0.0s


[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.5833333333333334, total=   2.9s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.6363636363636364, total=   2.9s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.6075949367088607, total=   2.9s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.5432098765432098, total=   2.9s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.6744186046511628, total=   2.8s
[CV] colsample_bytree

[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.6813186813186813, total=   3.8s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.6105263157894737, total=   3.0s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.6590909090909091, total=   3.0s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.5569620253164557, total=   3.2s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=4, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.5714285714285715, total=   4.2s
[CV] colsample

[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.5853658536585367, total=   3.4s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.6292134831460675, total=   3.4s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.6185567010309277, total=   3.5s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.6206896551724139, total=   3.4s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.5822784810126581, total=   3.4s
[CV] colsamp

[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0, score=0.6097560975609756, total=   4.4s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.6, total=   3.0s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.6521739130434783, total=   2.9s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.5434782608695652, total=   2.9s
[CV] colsample_bytree=0.6, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.6436781609195402, total=   2.9s
[CV] colsample_bytree=0.6, g

[CV]  colsample_bytree=0.6, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8, score=0.6136363636363636, total=   2.8s
[CV] colsample_bytree=0.6, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8, score=0.5853658536585366, total=   2.8s
[CV] colsample_bytree=0.6, gamma=1, max_depth=3, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=3, min_child_weight=5, subsample=1.0, score=0.5301204819277109, total=   2.9s
[CV] colsample_bytree=0.6, gamma=1, max_depth=3, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=3, min_child_weight=5, subsample=1.0, score=0.6588235294117647, total=   2.8s
[CV] colsample_bytree=0.6, gamma=1, max_depth=3, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=3, min_child_weight=5, subsample=1.0, score=0.5894736842105264, total=   2.8s
[CV] colsample_bytree=0.6, gamma=1, max

[CV]  colsample_bytree=0.6, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6, score=0.6067415730337078, total=   3.6s
[CV] colsample_bytree=0.6, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6, score=0.5714285714285715, total=   3.6s
[CV] colsample_bytree=0.6, gamma=1, max_depth=4, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=4, min_child_weight=5, subsample=0.8, score=0.5238095238095238, total=   3.8s
[CV] colsample_bytree=0.6, gamma=1, max_depth=4, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=4, min_child_weight=5, subsample=0.8, score=0.6511627906976744, total=   3.7s
[CV] colsample_bytree=0.6, gamma=1, max_depth=4, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=4, min_child_weight=5, subsample=0.8, score=0.6391752577319587, total=   3.6s
[CV] colsample_bytree=0.6, gamma=1, max

[CV]  colsample_bytree=0.6, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0, score=0.6304347826086956, total=   4.5s
[CV] colsample_bytree=0.6, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0, score=0.6341463414634148, total=   4.6s
[CV] colsample_bytree=0.6, gamma=1, max_depth=5, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=5, min_child_weight=5, subsample=0.6, score=0.5499999999999999, total=   4.2s
[CV] colsample_bytree=0.6, gamma=1, max_depth=5, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=5, min_child_weight=5, subsample=0.6, score=0.7096774193548386, total=   4.2s
[CV] colsample_bytree=0.6, gamma=1, max_depth=5, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1, max_depth=5, min_child_weight=5, subsample=0.6, score=0.6185567010309277, total=   4.2s
[CV] colsample_bytree=0.6, gamma=1, max

[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.5714285714285713, total=   3.1s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.5822784810126581, total=   2.9s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=3, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=3, min_child_weight=1, subsample=1.0, score=0.5542168674698796, total=   2.9s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=3, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=3, min_child_weight=1, subsample=1.0, score=0.6279069767441862, total=   3.0s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=3, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=3, min_child_weight=1, subsample=1.0, score=0.6249999999999999, total=   3.0s
[CV] colsample_bytree

[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.6666666666666667, total=   3.8s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.6292134831460675, total=   3.8s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.5569620253164557, total=   3.8s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.8, score=0.5647058823529412, total=   3.8s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.8, score=0.6588235294117647, total=   3.7s
[CV] colsample_bytree

[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0, score=0.6590909090909091, total=   3.9s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0, score=0.6391752577319587, total=   3.9s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0, score=0.6304347826086956, total=   3.7s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0, score=0.5647058823529411, total=   3.7s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=5, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=5, min_child_weight=1, subsample=0.6, score=0.5853658536585367, total=   4.7s
[CV] colsample

[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8, score=0.5499999999999999, total=   3.8s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8, score=0.6363636363636364, total=   3.6s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8, score=0.6185567010309277, total=   3.6s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8, score=0.5882352941176471, total=   3.6s
[CV] colsample_bytree=0.6, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8, score=0.6190476190476191, total=   3.6s
[CV] colsamp

[CV]  colsample_bytree=0.6, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6, score=0.6172839506172839, total=   2.8s
[CV] colsample_bytree=0.6, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6, score=0.6170212765957446, total=   2.8s
[CV] colsample_bytree=0.6, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6, score=0.5714285714285714, total=   2.7s
[CV] colsample_bytree=0.6, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6, score=0.6279069767441862, total=   2.8s
[CV] colsample_bytree=0.6, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6, score=0.6190476190476191, total=   2.8s
[CV] colsample_bytree=0.6, gam

[CV]  colsample_bytree=0.6, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0, score=0.5060240963855421, total=   3.7s
[CV] colsample_bytree=0.6, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0, score=0.6265060240963856, total=   3.7s
[CV] colsample_bytree=0.6, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0, score=0.5773195876288659, total=   3.8s
[CV] colsample_bytree=0.6, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0, score=0.6279069767441862, total=   3.9s
[CV] colsample_bytree=0.6, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0, score=0.5569620253164557, total=   3.7s
[CV] colsample_bytree=0.6, gamma=2, max

[CV]  colsample_bytree=0.6, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8, score=0.5783132530120482, total=   4.5s
[CV] colsample_bytree=0.6, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8, score=0.6511627906976744, total=   4.6s
[CV] colsample_bytree=0.6, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8, score=0.6262626262626263, total=   4.8s
[CV] colsample_bytree=0.6, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8, score=0.6279069767441862, total=   4.4s
[CV] colsample_bytree=0.6, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8, score=0.6024096385542169, total=   4.4s
[CV] colsample_bytree=0.6, gamma=2, max

[CV]  colsample_bytree=0.6, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6, score=0.5333333333333333, total=   3.0s
[CV] colsample_bytree=0.6, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6, score=0.7126436781609196, total=   2.9s
[CV] colsample_bytree=0.6, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6, score=0.6153846153846154, total=   3.1s
[CV] colsample_bytree=0.6, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6, score=0.6046511627906977, total=   3.0s
[CV] colsample_bytree=0.6, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6, score=0.6046511627906977, total=   3.1s
[CV] colsample_bytree=0.6, gamma=5, max

[CV]  colsample_bytree=0.6, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0, score=0.5952380952380952, total=   4.8s
[CV] colsample_bytree=0.6, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0, score=0.6428571428571429, total=   4.3s
[CV] colsample_bytree=0.6, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0, score=0.6666666666666667, total=   4.2s
[CV] colsample_bytree=0.6, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0, score=0.6808510638297872, total=   4.4s
[CV] colsample_bytree=0.6, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0, score=0.5569620253164557, total=   4.3s
[CV] colsample_bytree=0.6, gamma=5, max

[CV]  colsample_bytree=0.6, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8, score=0.5714285714285715, total=   4.6s
[CV] colsample_bytree=0.6, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8, score=0.6511627906976744, total=   4.6s
[CV] colsample_bytree=0.6, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8, score=0.6526315789473683, total=   4.6s
[CV] colsample_bytree=0.6, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8, score=0.6666666666666666, total=   4.6s
[CV] colsample_bytree=0.6, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.6, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8, score=0.5822784810126581, total=   4.7s
[CV] colsample_bytree=0.6, gamma=5, max

[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.5499999999999999, total=   3.8s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.688888888888889, total=   3.8s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.6530612244897959, total=   3.8s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.6444444444444444, total=   3.8s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.6419753086419754, total=   3.8s
[CV] colsample_bytree=

[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=10, subsample=0.8, score=0.5316455696202532, total=   3.9s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.5853658536585367, total=   3.7s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.7045454545454546, total=   3.7s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.6382978723404256, total=   3.8s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.6444444444444444, total=   3.8s
[CV] colsamp

[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.6, score=0.6190476190476191, total=   4.1s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.6, score=0.5897435897435898, total=   4.0s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.5882352941176471, total=   4.9s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.6352941176470588, total=   4.5s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.6304347826086956, total=   4.6s
[CV] colsamp

[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0, score=0.6249999999999999, total=   5.9s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0, score=0.6593406593406593, total=   5.9s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0, score=0.6352941176470588, total=   5.9s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.5641025641025641, total=   3.8s
[CV] colsample_bytree=0.8, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=0.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.6956521739130435, total=   3.8s
[CV] colsample_by

[CV]  colsample_bytree=0.8, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8, score=0.6506024096385543, total=   3.8s
[CV] colsample_bytree=0.8, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8, score=0.6236559139784946, total=   3.9s
[CV] colsample_bytree=0.8, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8, score=0.5647058823529411, total=   4.0s
[CV] colsample_bytree=0.8, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8, score=0.5542168674698795, total=   3.8s
[CV] colsample_bytree=0.8, gamma=1, max_depth=3, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=3, min_child_weight=5, subsample=1.0, score=0.5365853658536586, total=   3.8s
[CV] colsample_bytree=0.8, gamma=1, max

[CV]  colsample_bytree=0.8, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6, score=0.711111111111111, total=   5.0s
[CV] colsample_bytree=0.8, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6, score=0.6105263157894737, total=   4.9s
[CV] colsample_bytree=0.8, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6, score=0.6363636363636364, total=   4.7s
[CV] colsample_bytree=0.8, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6, score=0.6341463414634148, total=   4.9s
[CV] colsample_bytree=0.8, gamma=1, max_depth=4, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=4, min_child_weight=5, subsample=0.8, score=0.5432098765432098, total=   4.9s
[CV] colsample_bytree=0.8, gamma=1, max_

[CV]  colsample_bytree=0.8, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0, score=0.6666666666666666, total=   6.3s
[CV] colsample_bytree=0.8, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0, score=0.6666666666666667, total=   6.3s
[CV] colsample_bytree=0.8, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0, score=0.6956521739130435, total=   6.4s
[CV] colsample_bytree=0.8, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0, score=0.5977011494252873, total=   6.3s
[CV] colsample_bytree=0.8, gamma=1, max_depth=5, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1, max_depth=5, min_child_weight=5, subsample=0.6, score=0.574712643678161, total=   5.7s
[CV] colsample_bytree=0.8, gamma=1, max_

[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.5714285714285713, total=   3.9s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.5957446808510639, total=   3.8s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.5813953488372093, total=   3.8s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.5542168674698795, total=   3.8s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=3, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=3, min_child_weight=1, subsample=1.0, score=0.5249999999999999, total=   3.8s
[CV] colsample_bytree

[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.6024096385542169, total=   5.0s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.6666666666666666, total=   5.0s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.6041666666666667, total=   4.9s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.6736842105263158, total=   5.0s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.5714285714285713, total=   5.2s
[CV] colsample_bytree

[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.5853658536585366, total=   4.5s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0, score=0.5647058823529412, total=   4.7s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0, score=0.6352941176470588, total=   4.8s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0, score=0.6236559139784946, total=   4.7s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0, score=0.688888888888889, total=   4.7s
[CV] colsampl

[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.6588235294117647, total=   3.9s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.617283950617284, total=   4.0s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8, score=0.5647058823529412, total=   4.8s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8, score=0.6590909090909091, total=   4.7s
[CV] colsample_bytree=0.8, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8, score=0.6304347826086956, total=   4.8s
[CV] colsampl

[CV]  colsample_bytree=0.8, gamma=2, max_depth=3, min_child_weight=5, subsample=1.0, score=0.651685393258427, total=   3.9s
[CV] colsample_bytree=0.8, gamma=2, max_depth=3, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=3, min_child_weight=5, subsample=1.0, score=0.5263157894736842, total=   4.1s
[CV] colsample_bytree=0.8, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6, score=0.5454545454545454, total=   4.1s
[CV] colsample_bytree=0.8, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6, score=0.6593406593406593, total=   3.8s
[CV] colsample_bytree=0.8, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6, score=0.5591397849462366, total=   3.7s
[CV] colsample_bytree=0.8, gamma=2

[CV]  colsample_bytree=0.8, gamma=2, max_depth=4, min_child_weight=5, subsample=0.8, score=0.611764705882353, total=   4.9s
[CV] colsample_bytree=0.8, gamma=2, max_depth=4, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=4, min_child_weight=5, subsample=0.8, score=0.5783132530120482, total=   4.9s
[CV] colsample_bytree=0.8, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0, score=0.5301204819277109, total=   4.8s
[CV] colsample_bytree=0.8, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0, score=0.651685393258427, total=   4.7s
[CV] colsample_bytree=0.8, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0, score=0.6122448979591836, total=   4.8s
[CV] colsample_bytree=0.8, gamma=2, max_d

[CV]  colsample_bytree=0.8, gamma=2, max_depth=5, min_child_weight=5, subsample=0.6, score=0.6304347826086956, total=   5.9s
[CV] colsample_bytree=0.8, gamma=2, max_depth=5, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=5, min_child_weight=5, subsample=0.6, score=0.5679012345679012, total=   5.8s
[CV] colsample_bytree=0.8, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8, score=0.5853658536585367, total=   6.0s
[CV] colsample_bytree=0.8, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8, score=0.6741573033707865, total=   5.8s
[CV] colsample_bytree=0.8, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8, score=0.6185567010309277, total=   5.9s
[CV] colsample_bytree=0.8, gamma=2, max

[CV]  colsample_bytree=0.8, gamma=5, max_depth=3, min_child_weight=1, subsample=1.0, score=0.651685393258427, total=   4.0s
[CV] colsample_bytree=0.8, gamma=5, max_depth=3, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=3, min_child_weight=1, subsample=1.0, score=0.5925925925925926, total=   4.2s
[CV] colsample_bytree=0.8, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6, score=0.5714285714285714, total=   3.9s
[CV] colsample_bytree=0.8, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6, score=0.6666666666666666, total=   3.7s
[CV] colsample_bytree=0.8, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6, score=0.6067415730337078, total=   3.7s
[CV] colsample_bytree=0.8, gamma=5, max_

[CV]  colsample_bytree=0.8, gamma=5, max_depth=4, min_child_weight=1, subsample=0.8, score=0.6666666666666666, total=   5.2s
[CV] colsample_bytree=0.8, gamma=5, max_depth=4, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=4, min_child_weight=1, subsample=0.8, score=0.5609756097560975, total=   4.9s
[CV] colsample_bytree=0.8, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0, score=0.5679012345679012, total=   4.9s
[CV] colsample_bytree=0.8, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0, score=0.6190476190476191, total=   4.9s
[CV] colsample_bytree=0.8, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0, score=0.6739130434782609, total=   5.0s
[CV] colsample_bytree=0.8, gamma=5, max

[CV]  colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=1, subsample=0.6, score=0.6741573033707865, total=   6.1s
[CV] colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=1, subsample=0.6, score=0.575, total=   6.2s
[CV] colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8, score=0.5813953488372093, total=   6.4s
[CV] colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8, score=0.6741573033707865, total=   6.4s
[CV] colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8, score=0.6249999999999999, total=   6.5s
[CV] colsample_bytree=0.8, gamma=5, max_depth=5, min

[CV]  colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=10, subsample=1.0, score=0.6279069767441862, total=   6.3s
[CV] colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=0.8, gamma=5, max_depth=5, min_child_weight=10, subsample=1.0, score=0.5454545454545454, total=   5.9s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.6329113924050633, total=   4.5s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.5647058823529411, total=   4.6s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.6086956521739131, total=   4.8s
[CV] colsample_bytree=1.

[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=10, subsample=0.8, score=0.6021505376344085, total=   4.5s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=10, subsample=0.8, score=0.6363636363636364, total=   4.7s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=10, subsample=0.8, score=0.617283950617284, total=   4.7s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.5897435897435898, total=   4.8s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.6818181818181818, total=   4.5s
[CV] colsampl

[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.6, score=0.6136363636363636, total=   4.6s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.6, score=0.6170212765957446, total=   4.5s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.6, score=0.6190476190476191, total=   4.4s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.6, score=0.6190476190476191, total=   4.7s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.5432098765432098, total=   5.5s
[CV] colsamp

[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0, score=0.5176470588235293, total=   7.1s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0, score=0.6511627906976744, total=   7.0s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0, score=0.6249999999999999, total=   7.0s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0, score=0.6222222222222223, total=   7.1s
[CV] colsample_bytree=1.0, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=0.5, max_depth=5, min_child_weight=5, subsample=1.0, score=0.5977011494252873, total=   7.1s
[CV] colsample_bytree

[CV]  colsample_bytree=1.0, gamma=1, max_depth=3, min_child_weight=5, subsample=0.6, score=0.6097560975609756, total=   4.4s
[CV] colsample_bytree=1.0, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8, score=0.5432098765432098, total=   4.4s
[CV] colsample_bytree=1.0, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8, score=0.6744186046511628, total=   4.3s
[CV] colsample_bytree=1.0, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8, score=0.6236559139784946, total=   4.4s
[CV] colsample_bytree=1.0, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=3, min_child_weight=5, subsample=0.8, score=0.6046511627906977, total=   4.4s
[CV] colsample_bytree=1.0, gamma=1, max

[CV]  colsample_bytree=1.0, gamma=1, max_depth=4, min_child_weight=1, subsample=1.0, score=0.5714285714285715, total=   5.9s
[CV] colsample_bytree=1.0, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6, score=0.5925925925925926, total=   5.7s
[CV] colsample_bytree=1.0, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6, score=0.717391304347826, total=   5.7s
[CV] colsample_bytree=1.0, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6, score=0.6041666666666667, total=   5.7s
[CV] colsample_bytree=1.0, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=4, min_child_weight=5, subsample=0.6, score=0.6666666666666666, total=   5.7s
[CV] colsample_bytree=1.0, gamma=1, max_

[CV]  colsample_bytree=1.0, gamma=1, max_depth=5, min_child_weight=1, subsample=0.8, score=0.5853658536585366, total=   7.3s
[CV] colsample_bytree=1.0, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0, score=0.5783132530120482, total=   7.4s
[CV] colsample_bytree=1.0, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0, score=0.611764705882353, total=   7.3s
[CV] colsample_bytree=1.0, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0, score=0.64, total=   7.3s
[CV] colsample_bytree=1.0, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1, max_depth=5, min_child_weight=1, subsample=1.0, score=0.6741573033707865, total=   7.3s
[CV] colsample_bytree=1.0, gamma=1, max_depth=5, min_c

[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.6, score=0.617283950617284, total=   4.6s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.5185185185185186, total=   4.5s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.5813953488372093, total=   4.5s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.6382978723404256, total=   4.5s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=1, subsample=0.8, score=0.6292134831460675, total=   4.5s
[CV] colsample_bytree=

[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.6593406593406593, total=   8.5s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=3, min_child_weight=10, subsample=1.0, score=0.6024096385542169, total=   8.4s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.5316455696202531, total=  11.1s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.7032967032967034, total=  11.0s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=1, subsample=0.6, score=0.6021505376344085, total=  10.9s
[CV] colsample_byt

[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.6185567010309277, total=  11.5s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.611764705882353, total=  12.1s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=10, subsample=0.8, score=0.6190476190476191, total=  11.7s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0, score=0.6024096385542169, total=  11.9s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=4, min_child_weight=10, subsample=1.0, score=0.6666666666666666, total=  11.8s
[CV] colsampl

[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.651685393258427, total=  19.4s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.6236559139784946, total=  18.9s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.611764705882353, total=  18.7s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.6, score=0.6097560975609756, total=  19.4s
[CV] colsample_bytree=1.0, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=1.5, max_depth=5, min_child_weight=10, subsample=0.8, score=0.5542168674698796, total=  24.1s
[CV] colsample

[CV]  colsample_bytree=1.0, gamma=2, max_depth=3, min_child_weight=5, subsample=1.0, score=0.6976744186046512, total=  18.2s
[CV] colsample_bytree=1.0, gamma=2, max_depth=3, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=3, min_child_weight=5, subsample=1.0, score=0.6262626262626263, total=  18.6s
[CV] colsample_bytree=1.0, gamma=2, max_depth=3, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=3, min_child_weight=5, subsample=1.0, score=0.651685393258427, total=  18.1s
[CV] colsample_bytree=1.0, gamma=2, max_depth=3, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=3, min_child_weight=5, subsample=1.0, score=0.575, total=  18.3s
[CV] colsample_bytree=1.0, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=3, min_child_weight=10, subsample=0.6, score=0.5526315789473684, total=  18.3s
[CV] colsample_bytree=1.0, gamma=2, max_depth=3, mi

[CV]  colsample_bytree=1.0, gamma=2, max_depth=4, min_child_weight=5, subsample=0.8, score=0.6966292134831461, total=   5.7s
[CV] colsample_bytree=1.0, gamma=2, max_depth=4, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=4, min_child_weight=5, subsample=0.8, score=0.6526315789473683, total=   5.7s
[CV] colsample_bytree=1.0, gamma=2, max_depth=4, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=4, min_child_weight=5, subsample=0.8, score=0.5882352941176471, total=   5.7s
[CV] colsample_bytree=1.0, gamma=2, max_depth=4, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=4, min_child_weight=5, subsample=0.8, score=0.617283950617284, total=   5.8s
[CV] colsample_bytree=1.0, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=4, min_child_weight=5, subsample=1.0, score=0.5517241379310344, total=   5.7s
[CV] colsample_bytree=1.0, gamma=2, max_

[CV]  colsample_bytree=1.0, gamma=2, max_depth=5, min_child_weight=5, subsample=0.6, score=0.6521739130434783, total=   6.6s
[CV] colsample_bytree=1.0, gamma=2, max_depth=5, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=5, min_child_weight=5, subsample=0.6, score=0.6105263157894737, total=   6.5s
[CV] colsample_bytree=1.0, gamma=2, max_depth=5, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=5, min_child_weight=5, subsample=0.6, score=0.6451612903225806, total=   6.6s
[CV] colsample_bytree=1.0, gamma=2, max_depth=5, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=5, min_child_weight=5, subsample=0.6, score=0.5454545454545454, total=   6.7s
[CV] colsample_bytree=1.0, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=2, max_depth=5, min_child_weight=5, subsample=0.8, score=0.5542168674698796, total=   7.0s
[CV] colsample_bytree=1.0, gamma=2, max

[CV]  colsample_bytree=1.0, gamma=5, max_depth=3, min_child_weight=1, subsample=1.0, score=0.674698795180723, total=   4.4s
[CV] colsample_bytree=1.0, gamma=5, max_depth=3, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=3, min_child_weight=1, subsample=1.0, score=0.6881720430107526, total=   4.4s
[CV] colsample_bytree=1.0, gamma=5, max_depth=3, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=3, min_child_weight=1, subsample=1.0, score=0.6956521739130435, total=   4.4s
[CV] colsample_bytree=1.0, gamma=5, max_depth=3, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=3, min_child_weight=1, subsample=1.0, score=0.6, total=   4.4s
[CV] colsample_bytree=1.0, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=3, min_child_weight=5, subsample=0.6, score=0.5925925925925926, total=   4.4s
[CV] colsample_bytree=1.0, gamma=5, max_depth=3, min_ch

[CV]  colsample_bytree=1.0, gamma=5, max_depth=4, min_child_weight=1, subsample=0.8, score=0.6352941176470588, total=   5.9s
[CV] colsample_bytree=1.0, gamma=5, max_depth=4, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=4, min_child_weight=1, subsample=0.8, score=0.6249999999999999, total=   5.9s
[CV] colsample_bytree=1.0, gamma=5, max_depth=4, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=4, min_child_weight=1, subsample=0.8, score=0.6666666666666666, total=   5.9s
[CV] colsample_bytree=1.0, gamma=5, max_depth=4, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=4, min_child_weight=1, subsample=0.8, score=0.575, total=   5.9s
[CV] colsample_bytree=1.0, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=4, min_child_weight=1, subsample=1.0, score=0.5542168674698796, total=   5.9s
[CV] colsample_bytree=1.0, gamma=5, max_depth=4, min

[CV]  colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=1, subsample=0.6, score=0.6352941176470588, total=   7.3s
[CV] colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=1, subsample=0.6, score=0.6595744680851063, total=   7.3s
[CV] colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=1, subsample=0.6, score=0.6590909090909091, total=   7.3s
[CV] colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=1, subsample=0.6 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=1, subsample=0.6, score=0.6024096385542169, total=   7.3s
[CV] colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=1, subsample=0.8, score=0.5301204819277109, total=   7.5s
[CV] colsample_bytree=1.0, gamma=5, max

[CV]  colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=10, subsample=1.0, score=0.6206896551724139, total=   7.0s
[CV] colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=10, subsample=1.0, score=0.6808510638297872, total=   6.9s
[CV] colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=10, subsample=1.0, score=0.6279069767441862, total=   7.0s
[CV] colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=10, subsample=1.0 
[CV]  colsample_bytree=1.0, gamma=5, max_depth=5, min_child_weight=10, subsample=1.0, score=0.5822784810126581, total=   7.0s


[Parallel(n_jobs=1)]: Done 2025 out of 2025 | elapsed: 220.1min finished


0.5727272727272728 0.5384615384615384 0.5490196078431373 0.5436893203883495
{'colsample_bytree': 0.8, 'gamma': 2, 'max_depth': 4, 'min_child_weight': 1, 'subsample': 1.0}
CPU times: user 3h 5min 25s, sys: 25.6 s, total: 3h 5min 51s
Wall time: 3h 40min 8s


In [180]:
# xgboost_config = {
#     "XGBoost Count": (XGBClassifier(colsample_bytree = 0.8, gamma= 2, max_depth = 4, min_child_weight = 1, subsample =1.0), train_count, test_count)
#     , "XGBoost TFIDF": (XGBClassifier(colsample_bytree = 0.8, gamma= 2, max_depth = 4, min_child_weight = 1, subsample =1.0), train_tfidf, test_tfidf)
#     , "XGBoost TFIDF NGram": (XGBClassifier(colsample_bytree = 0.8, gamma= 2, max_depth = 4, min_child_weight = 1, subsample =1.0), train_tfidf_ngram.tocsc(), test_tfidf_ngram.tocsc())
#     , "XGBoost TFIDF NGram Chars": (XGBClassifier(colsample_bytree = 0.8, gamma= 2, max_depth = 4, min_child_weight = 1, subsample =1.0), train_tfidf_ngram_chars, test_tfidf_ngram_chars)
#     , "XGBoost LDA": (XGBClassifier(colsample_bytree = 0.8, gamma= 2, max_depth = 4, min_child_weight = 1, subsample =1.0), train_lda, test_lda)
#     , "XGBoost Doc2Vec": (XGBClassifier(colsample_bytree = 0.8, gamma= 2, max_depth = 4, min_child_weight = 1, subsample =1.0), train_doc2vec, test_doc2vec)
# }

xgboost_config = {
    "XGBoost Count": (XGBClassifier(), train_count, test_count)
    , "XGBoost TFIDF": (XGBClassifier(), train_tfidf, test_tfidf)
    , "XGBoost TFIDF NGram": (XGBClassifier(), train_tfidf_ngram.tocsc(), test_tfidf_ngram.tocsc())
    , "XGBoost TFIDF NGram Chars": (XGBClassifier(), train_tfidf_ngram_chars, test_tfidf_ngram_chars)
    , "XGBoost LDA": (XGBClassifier(), train_lda, test_lda)
    , "XGBoost Doc2Vec": (XGBClassifier(), train_doc2vec, test_doc2vec)
}

run_model(xgboost_config)

[[34 26]
 [24 26]]
Accuracy:0.55 Precission:0.50 Recall:0.52 F1:0.51 -> [XGBoost Count]
[[38 24]
 [20 28]]
Accuracy:0.60 Precission:0.54 Recall:0.58 F1:0.56 -> [XGBoost TFIDF]
[[34 27]
 [24 25]]
Accuracy:0.54 Precission:0.48 Recall:0.51 F1:0.50 -> [XGBoost TFIDF NGram]
[[29 23]
 [29 29]]
Accuracy:0.53 Precission:0.56 Recall:0.50 F1:0.53 -> [XGBoost TFIDF NGram Chars]
[[38 25]
 [20 27]]
Accuracy:0.59 Precission:0.52 Recall:0.57 F1:0.55 -> [XGBoost LDA]
[[34 30]
 [24 22]]
Accuracy:0.51 Precission:0.42 Recall:0.48 F1:0.45 -> [XGBoost Doc2Vec]
