In [4]:
import pandas as pd
import numpy as np

from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem.snowball import SnowballStemmer

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

import re

In [5]:
np.random.seed(100)

In [6]:
data = pd.read_csv('../../Dataset/jigsaw-toxic-comment-classification-challenge/train.csv')

In [7]:
data.head(2)

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0


In [8]:
combine = data[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].sum(axis=1)

In [9]:
print('There are %.2f%% data has labels.' %(sum(combine>1)/data.shape[0]*100))

There are 6.18% data has labels.


In [10]:
data[combine>1].head(5)

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
6,0002bcb3da6cb337,COCKSUCKER BEFORE YOU PISS AROUND ON MY WORK,1,1,1,0,1,0
42,001810bf8c45bf5f,You are gay or antisemmitian? \n\nArchangel WH...,1,0,1,0,1,1
43,00190820581d90ce,"FUCK YOUR FILTHY MOTHER IN THE ASS, DRY!",1,0,1,0,1,0
51,001dc38a83d420cf,GET FUCKED UP. GET FUCKEEED UP. GOT A DRINK T...,1,0,1,0,0,0
55,0020e7119b96eeeb,Stupid peace of shit stop deleting my stuff as...,1,1,1,0,1,0


## Baseline Approach
We can treat this as a multiple binary classification problem

In [11]:
tox = data[['id', 'comment_text', 'toxic']]

In [12]:
tox.head(3)

Unnamed: 0,id,comment_text,toxic
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0


In [13]:
print('%.2f%% of data has label as toxic.' %(sum(tox['toxic']==1)/tox.shape[0]*100))

9.58% of data has label as toxic.


### Imbalanced Classification!!!!

In [14]:
X_train, X_test, y_train, y_test = train_test_split(tox.iloc[:,:2], tox.iloc[:,2], test_size=0.1, 
                                                    random_state=100, stratify = tox['toxic'])

In [15]:
print(X_train.shape)
print(X_test.shape)

(143613, 2)
(15958, 2)


In [16]:
print('Train ratio %.2f%%' %(sum(y_train == 1)/X_train.shape[0]*100))
print('Test ratio %.2f%%' %(sum(y_test == 1)/X_test.shape[0]*100))

Train ratio 9.58%
Test ratio 9.58%


### Token and Stem Text

In [17]:
sample = X_train.loc[3,'comment_text']

In [18]:
sample

'"\nMore\nI can\'t make any real suggestions on improvement - I wondered if the section statistics should be later on, or a subsection of ""types of accidents""  -I think the references may need tidying so that they are all in the exact same format ie date format etc. I can do that later on, if no-one else does first - if you have any preferences for formatting style on references or want to do it yourself please let me know.\n\nThere appears to be a backlog on articles for review so I guess there may be a delay until a reviewer turns up. It\'s listed in the relevant form eg Wikipedia:Good_article_nominations#Transport  "'

In [19]:
w = [w for sent in sent_tokenize(sample) for w in word_tokenize(sent)]

In [20]:
# remove non-word 
filtered = [filt for filt in w if re.search('[A-Za-z]', filt)]

In [21]:
# Stem
snowballStemmer = SnowballStemmer('english')

stemmed = [snowballStemmer.stem(word) for word in filtered]

In [22]:
# Build a function to combine 3 into 1
def token_stem(text):
    word = [w for sent in sent_tokenize(text) for w in word_tokenize(sent)]
    filtered = [filt for filt in word if re.search('[A-Za-z]', filt)]
    stemmed = [snowballStemmer.stem(word) for word in filtered]
    
    return stemmed

### Tfidf

In [23]:
# Deal with stopwords warnings
from nltk.corpus import stopwords

stop_w_list = stopwords.words('english')

In [24]:
preprocess_stop = [snowballStemmer.stem(word) for word in stop_w_list]
preprocess_stop = preprocess_stop + ["'d", 'could', 'might', 'must', "n't", 'need', 'sha', 'wo', 'would']

In [117]:
# Should not use lower case cuz toxic comment seems like having many upper case words
# Toxic words seems combine into 2 together, so use bi-gram
# Consider imbalanced labels, min_df should not be too small
# toxic comment is not that comman so max_df should not be too large
# toxic word should be so many. There are really just a few words can be used toxically!!!
# max_features should not be too large (also avoid curse of dimenality)
tfidf = TfidfVectorizer(max_df=0.7, max_features=5000, lowercase=False,
                                 min_df=10, stop_words=preprocess_stop,
                                 use_idf=True, tokenizer=token_stem,
                                 ngram_range=(1,2))

In [118]:
tfidf_features = tfidf.fit_transform(X_train['comment_text'])

  'stop_words.' % sorted(inconsistent))


### Baseline Model: logistic regression

In [27]:
lr = LogisticRegression(random_state=100, n_jobs=-1)

In [28]:
lr.fit(tfidf_features, y_train)

  " = {}.".format(effective_n_jobs(self.n_jobs)))


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn', n_jobs=-1,
          penalty='l2', random_state=100, solver='warn', tol=0.0001,
          verbose=0, warm_start=False)

In [29]:
lr.score(tfidf_features, y_train)

0.9605537103187037

In [30]:
pd.DataFrame(confusion_matrix(y_train, lr.predict(tfidf_features), labels=[0,1]), index=['Non-Toxic', 'Toxic'], 
             columns=['Pred-Non-Toxic', 'Pred-Toxc'])

Unnamed: 0,Pred-Non-Toxic,Pred-Toxc
Non-Toxic,129033,815
Toxic,4850,8915


In [31]:
print(classification_report(y_train, lr.predict(tfidf_features), target_names=['Non-Toxic', 'Toxic']))

              precision    recall  f1-score   support

   Non-Toxic       0.96      0.99      0.98    129848
       Toxic       0.92      0.65      0.76     13765

   micro avg       0.96      0.96      0.96    143613
   macro avg       0.94      0.82      0.87    143613
weighted avg       0.96      0.96      0.96    143613



Low sensitivity caused by imbalanced labels!!!!!!!!!!!

### Baseline try on test dataset

In [32]:
test_features = tfidf.transform(X_test['comment_text'])

In [33]:
print(classification_report(y_test, lr.predict(test_features), target_names=['Non-Toxic', 'Toxic']))

              precision    recall  f1-score   support

   Non-Toxic       0.96      0.99      0.98     14429
       Toxic       0.89      0.61      0.73      1529

   micro avg       0.96      0.96      0.96     15958
   macro avg       0.92      0.80      0.85     15958
weighted avg       0.95      0.96      0.95     15958



### What about downsampling to eliminate the problem?

In [58]:
toxic_size = len(np.where(y_train==1)[0])
non_toxic_idx = np.where(y_train!=1)[0]
toxic_idx = np.where(y_train==1)[0]

In [59]:
# random select downsample size
down_size_indx = np.random.choice(non_toxic_idx, toxic_size, replace=False)

In [65]:
# Concat two indexes to get new train sample
merged_inx = np.hstack([toxic_idx, down_size_indx]).tolist()

In [82]:
down_X_train = X_train.iloc[merged_inx,]
down_y_train = y_train.iloc[merged_inx]

### Rebuild a model on down_sample

In [100]:
tfidf.fit(X_train['comment_text'])
down_tfidf_features = tfidf.transform(down_X_train['comment_text'])

In [101]:
# re-train the model
lr2 = LogisticRegression(random_state=100, n_jobs=-1)

In [102]:
lr2.fit(down_tfidf_features, down_y_train)

  " = {}.".format(effective_n_jobs(self.n_jobs)))


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn', n_jobs=-1,
          penalty='l2', random_state=100, solver='warn', tol=0.0001,
          verbose=0, warm_start=False)

In [103]:
print(classification_report(down_y_train, lr2.predict(down_tfidf_features), target_names=['Non-Toxic', 'Toxic']))

              precision    recall  f1-score   support

   Non-Toxic       0.90      0.94      0.92     13765
       Toxic       0.94      0.89      0.91     13765

   micro avg       0.92      0.92      0.92     27530
   macro avg       0.92      0.92      0.92     27530
weighted avg       0.92      0.92      0.92     27530



In [104]:
down_test_tfidf_features = tfidf.transform(X_test['comment_text'])

In [105]:
print(classification_report(y_test, lr2.predict(down_test_tfidf_features), target_names=['Non-Toxic', 'Toxic']))

              precision    recall  f1-score   support

   Non-Toxic       0.98      0.93      0.95     14429
       Toxic       0.55      0.86      0.67      1529

   micro avg       0.92      0.92      0.92     15958
   macro avg       0.77      0.89      0.81     15958
weighted avg       0.94      0.92      0.93     15958



- f1 score and precision dropped for Toxic but recall improved. 
- We need to consider precision VS recall now?
- We lost a lot information during downsampling, and precision is too low
- Try GridSearch Parameter and upsampling method!

### Try better model

In [110]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [112]:
?RandomForestClassifier

In [113]:
param_grid = {"n_estimators": [10, 50, 100, 200],
             "max_depth": [2,4,6,8],
             "max_features": ['auto', 'sqrt']}

In [None]:
gsc = GridSearchCV(estimator=RandomForestClassifier(),
                  param_grid=param_grid,
                  scoring='f1',
                  cv=5,
                  verbose=2
                  )

In [None]:
gsc.fit(tfidf_features, y_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV] max_depth=2, max_features=auto, n_estimators=10 .................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] .. max_depth=2, max_features=auto, n_estimators=10, total=   0.5s
[CV] max_depth=2, max_features=auto, n_estimators=10 .................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s
  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=auto, n_estimators=10, total=   0.6s
[CV] max_depth=2, max_features=auto, n_estimators=10 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=auto, n_estimators=10, total=   0.5s
[CV] max_depth=2, max_features=auto, n_estimators=10 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=auto, n_estimators=10, total=   0.4s
[CV] max_depth=2, max_features=auto, n_estimators=10 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=auto, n_estimators=10, total=   0.4s
[CV] max_depth=2, max_features=auto, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=auto, n_estimators=50, total=   1.4s
[CV] max_depth=2, max_features=auto, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=auto, n_estimators=50, total=   1.8s
[CV] max_depth=2, max_features=auto, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=auto, n_estimators=50, total=   1.6s
[CV] max_depth=2, max_features=auto, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=auto, n_estimators=50, total=   1.4s
[CV] max_depth=2, max_features=auto, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=auto, n_estimators=50, total=   1.5s
[CV] max_depth=2, max_features=auto, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=auto, n_estimators=100, total=   2.9s
[CV] max_depth=2, max_features=auto, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=auto, n_estimators=100, total=   2.5s
[CV] max_depth=2, max_features=auto, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=auto, n_estimators=100, total=   2.8s
[CV] max_depth=2, max_features=auto, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=auto, n_estimators=100, total=   2.5s
[CV] max_depth=2, max_features=auto, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=auto, n_estimators=100, total=   2.9s
[CV] max_depth=2, max_features=auto, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=auto, n_estimators=200, total=   7.3s
[CV] max_depth=2, max_features=auto, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=auto, n_estimators=200, total=   5.3s
[CV] max_depth=2, max_features=auto, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=auto, n_estimators=200, total=   6.1s
[CV] max_depth=2, max_features=auto, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=auto, n_estimators=200, total=   5.2s
[CV] max_depth=2, max_features=auto, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=auto, n_estimators=200, total=   5.3s
[CV] max_depth=2, max_features=sqrt, n_estimators=10 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=sqrt, n_estimators=10, total=   0.4s
[CV] max_depth=2, max_features=sqrt, n_estimators=10 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=sqrt, n_estimators=10, total=   0.4s
[CV] max_depth=2, max_features=sqrt, n_estimators=10 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=sqrt, n_estimators=10, total=   0.4s
[CV] max_depth=2, max_features=sqrt, n_estimators=10 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=sqrt, n_estimators=10, total=   0.4s
[CV] max_depth=2, max_features=sqrt, n_estimators=10 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=sqrt, n_estimators=10, total=   0.4s
[CV] max_depth=2, max_features=sqrt, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=sqrt, n_estimators=50, total=   1.7s
[CV] max_depth=2, max_features=sqrt, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=sqrt, n_estimators=50, total=   1.5s
[CV] max_depth=2, max_features=sqrt, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=sqrt, n_estimators=50, total=   1.5s
[CV] max_depth=2, max_features=sqrt, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=sqrt, n_estimators=50, total=   1.6s
[CV] max_depth=2, max_features=sqrt, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=2, max_features=sqrt, n_estimators=50, total=   1.4s
[CV] max_depth=2, max_features=sqrt, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=sqrt, n_estimators=100, total=   2.6s
[CV] max_depth=2, max_features=sqrt, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=sqrt, n_estimators=100, total=   3.0s
[CV] max_depth=2, max_features=sqrt, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=sqrt, n_estimators=100, total=   2.8s
[CV] max_depth=2, max_features=sqrt, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=sqrt, n_estimators=100, total=   2.4s
[CV] max_depth=2, max_features=sqrt, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=sqrt, n_estimators=100, total=   2.7s
[CV] max_depth=2, max_features=sqrt, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=sqrt, n_estimators=200, total=   5.4s
[CV] max_depth=2, max_features=sqrt, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=sqrt, n_estimators=200, total=   6.1s
[CV] max_depth=2, max_features=sqrt, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=sqrt, n_estimators=200, total=   5.8s
[CV] max_depth=2, max_features=sqrt, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=sqrt, n_estimators=200, total=   8.1s
[CV] max_depth=2, max_features=sqrt, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=2, max_features=sqrt, n_estimators=200, total=   5.7s
[CV] max_depth=4, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=4, max_features=auto, n_estimators=10, total=   0.5s
[CV] max_depth=4, max_features=auto, n_estimators=10 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=4, max_features=auto, n_estimators=10, total=   0.5s
[CV] max_depth=4, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=4, max_features=auto, n_estimators=10, total=   0.5s
[CV] max_depth=4, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=4, max_features=auto, n_estimators=10, total=   0.5s
[CV] max_depth=4, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=4, max_features=auto, n_estimators=10, total=   0.5s
[CV] max_depth=4, max_features=auto, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=4, max_features=auto, n_estimators=50, total=   1.8s
[CV] max_depth=4, max_features=auto, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=4, max_features=auto, n_estimators=50, total=   1.8s
[CV] max_depth=4, max_features=auto, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=4, max_features=auto, n_estimators=50, total=   2.1s
[CV] max_depth=4, max_features=auto, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=4, max_features=auto, n_estimators=50, total=   2.4s
[CV] max_depth=4, max_features=auto, n_estimators=50 .................
[CV] .. max_depth=4, max_features=auto, n_estimators=50, total=   2.2s
[CV] max_depth=4, max_features=auto, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=auto, n_estimators=100, total=   4.0s
[CV] max_depth=4, max_features=auto, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=auto, n_estimators=100, total=   3.3s
[CV] max_depth=4, max_features=auto, n_estimators=100 ................
[CV] . max_depth=4, max_features=auto, n_estimators=100, total=   3.6s
[CV] max_depth=4, max_features=auto, n_estimators=100 ................
[CV] . max_depth=4, max_features=auto, n_estimators=100, total=   3.6s
[CV] max_depth=4, max_features=auto, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=auto, n_estimators=100, total=   4.1s
[CV] max_depth=4, max_features=auto, n_estimators=200 ................
[CV] . max_depth=4, max_features=auto, n_estimators=200, total=   7.4s
[CV] max_depth=4, max_features=auto, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=auto, n_estimators=200, total=   9.7s
[CV] max_depth=4, max_features=auto, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=auto, n_estimators=200, total=   8.1s
[CV] max_depth=4, max_features=auto, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=auto, n_estimators=200, total=   9.4s
[CV] max_depth=4, max_features=auto, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=auto, n_estimators=200, total=   8.0s
[CV] max_depth=4, max_features=sqrt, n_estimators=10 .................
[CV] .. max_depth=4, max_features=sqrt, n_estimators=10, total=   0.5s
[CV] max_depth=4, max_features=sqrt, n_estimators=10 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=4, max_features=sqrt, n_estimators=10, total=   0.5s
[CV] max_depth=4, max_features=sqrt, n_estimators=10 .................
[CV] .. max_depth=4, max_features=sqrt, n_estimators=10, total=   0.5s
[CV] max_depth=4, max_features=sqrt, n_estimators=10 .................
[CV] .. max_depth=4, max_features=sqrt, n_estimators=10, total=   0.5s
[CV] max_depth=4, max_features=sqrt, n_estimators=10 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=4, max_features=sqrt, n_estimators=10, total=   0.5s
[CV] max_depth=4, max_features=sqrt, n_estimators=50 .................
[CV] .. max_depth=4, max_features=sqrt, n_estimators=50, total=   1.9s
[CV] max_depth=4, max_features=sqrt, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=4, max_features=sqrt, n_estimators=50, total=   2.0s
[CV] max_depth=4, max_features=sqrt, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=4, max_features=sqrt, n_estimators=50, total=   2.1s
[CV] max_depth=4, max_features=sqrt, n_estimators=50 .................


  'precision', 'predicted', average, warn_for)


[CV] .. max_depth=4, max_features=sqrt, n_estimators=50, total=   2.2s
[CV] max_depth=4, max_features=sqrt, n_estimators=50 .................
[CV] .. max_depth=4, max_features=sqrt, n_estimators=50, total=   2.3s
[CV] max_depth=4, max_features=sqrt, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=sqrt, n_estimators=100, total=   3.8s
[CV] max_depth=4, max_features=sqrt, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=sqrt, n_estimators=100, total=   4.9s
[CV] max_depth=4, max_features=sqrt, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=sqrt, n_estimators=100, total=   3.6s
[CV] max_depth=4, max_features=sqrt, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=sqrt, n_estimators=100, total=   3.6s
[CV] max_depth=4, max_features=sqrt, n_estimators=100 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=sqrt, n_estimators=100, total=   3.6s
[CV] max_depth=4, max_features=sqrt, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=sqrt, n_estimators=200, total=   6.9s
[CV] max_depth=4, max_features=sqrt, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=sqrt, n_estimators=200, total=   6.5s
[CV] max_depth=4, max_features=sqrt, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=sqrt, n_estimators=200, total=   8.7s
[CV] max_depth=4, max_features=sqrt, n_estimators=200 ................


  'precision', 'predicted', average, warn_for)


[CV] . max_depth=4, max_features=sqrt, n_estimators=200, total=   6.8s
[CV] max_depth=4, max_features=sqrt, n_estimators=200 ................
[CV] . max_depth=4, max_features=sqrt, n_estimators=200, total=   6.8s
[CV] max_depth=6, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=6, max_features=auto, n_estimators=10, total=   0.6s
[CV] max_depth=6, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=6, max_features=auto, n_estimators=10, total=   0.6s
[CV] max_depth=6, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=6, max_features=auto, n_estimators=10, total=   0.6s
[CV] max_depth=6, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=6, max_features=auto, n_estimators=10, total=   0.6s
[CV] max_depth=6, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=6, max_features=auto, n_estimators=10, total=   0.6s
[CV] max_depth=6, max_features=auto, n_estimators=50 .................
[CV] .

  'precision', 'predicted', average, warn_for)


[CV] . max_depth=6, max_features=auto, n_estimators=200, total=   8.7s
[CV] max_depth=6, max_features=auto, n_estimators=200 ................
[CV] . max_depth=6, max_features=auto, n_estimators=200, total=   9.4s
[CV] max_depth=6, max_features=auto, n_estimators=200 ................
[CV] . max_depth=6, max_features=auto, n_estimators=200, total= 6.9min
[CV] max_depth=6, max_features=auto, n_estimators=200 ................
[CV] . max_depth=6, max_features=auto, n_estimators=200, total=   9.3s
[CV] max_depth=6, max_features=sqrt, n_estimators=10 .................
[CV] .. max_depth=6, max_features=sqrt, n_estimators=10, total=   0.6s
[CV] max_depth=6, max_features=sqrt, n_estimators=10 .................
[CV] .. max_depth=6, max_features=sqrt, n_estimators=10, total=   0.6s
[CV] max_depth=6, max_features=sqrt, n_estimators=10 .................
[CV] .. max_depth=6, max_features=sqrt, n_estimators=10, total=   0.7s
[CV] max_depth=6, max_features=sqrt, n_estimators=10 .................
[CV] .

  'precision', 'predicted', average, warn_for)


[CV] . max_depth=6, max_features=sqrt, n_estimators=200, total=   8.7s
[CV] max_depth=6, max_features=sqrt, n_estimators=200 ................
[CV] . max_depth=6, max_features=sqrt, n_estimators=200, total=   8.8s
[CV] max_depth=6, max_features=sqrt, n_estimators=200 ................
[CV] . max_depth=6, max_features=sqrt, n_estimators=200, total=   8.6s
[CV] max_depth=6, max_features=sqrt, n_estimators=200 ................
[CV] . max_depth=6, max_features=sqrt, n_estimators=200, total=   8.8s
[CV] max_depth=8, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=8, max_features=auto, n_estimators=10, total=   0.7s
[CV] max_depth=8, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=8, max_features=auto, n_estimators=10, total=   0.7s
[CV] max_depth=8, max_features=auto, n_estimators=10 .................
[CV] .. max_depth=8, max_features=auto, n_estimators=10, total=   0.7s
[CV] max_depth=8, max_features=auto, n_estimators=10 .................
[CV] .

KeyboardInterrupt: 