In [40]:
import pandas as pd 
import nltk
from collections import Counter
from nltk.corpus import stopwords
import string
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np

In [51]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

In [38]:
from gensim.models import Word2Vec

In [2]:
data = pd.read_pickle('data_cleaned.pkl')

In [3]:
data.head()

Unnamed: 0,review,sentiment,tokens
0,One of the other reviewers has mentioned that ...,positive,"[one, reviewers, mentioned, watching, 1, oz, e..."
1,A wonderful little production. <br /><br />The...,positive,"[wonderful, little, production, filming, techn..."
2,I thought this was a wonderful way to spend ti...,positive,"[thought, wonderful, way, spend, time, hot, su..."
3,Basically there's a family where a little boy ...,negative,"[basically, theres, family, little, boy, jake,..."
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,"[petter, matteis, love, time, money, visually,..."


In [21]:
#The first model approach is to use TF IDF, which is simple way to analyze text for a machine learning program
#I also encode the sentiment column so that positve = 1 and negative = 0
#Additionally, I remove english stop words since they don't add much value
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(data['sentiment'])
vectorizer = TfidfVectorizer(stop_words = 'english')
X_tfidf = vectorizer.fit_transform(data['review'])

In [54]:
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=34)

In [55]:
#I am then going to use a random forest classifier to learn from the tfidf vectors and be able to determine sentiment for thes test set
clf = RandomForestClassifier(verbose = 1)

In [56]:
clf.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:  1.2min


In [57]:
#taking a look at the otb random forest model and the results look promising with 85% accuracy
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names = label_encoder.classes_))

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s


              precision    recall  f1-score   support

    negative       0.85      0.86      0.85      5045
    positive       0.86      0.84      0.85      4955

    accuracy                           0.85     10000
   macro avg       0.85      0.85      0.85     10000
weighted avg       0.85      0.85      0.85     10000



In [53]:
#lets see if hyper parameter tuning can have any effect: it does not as the highest accuracy is still 85%
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10,20,50,100],
    'min_samples_split' : [2,5,10]}
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, scoring='accuracy',verbose=2)
grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=10, min_samples_split=2, n_estimators=50; total time=   9.9s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=10, min_samples_split=2, n_estimators=50; total time=   9.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=10, min_samples_split=2, n_estimators=50; total time=   9.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=10, min_samples_split=2, n_estimators=50; total time=   9.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=10, min_samples_split=2, n_estimators=50; total time=   9.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time=  19.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time=  19.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time=  19.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time=  19.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time=  19.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   38.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time=  39.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   39.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time=  39.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.7s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   39.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time=  39.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   38.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time=  39.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   38.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time=  39.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=10, min_samples_split=5, n_estimators=50; total time=   9.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=10, min_samples_split=5, n_estimators=50; total time=   9.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=10, min_samples_split=5, n_estimators=50; total time=   9.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=10, min_samples_split=5, n_estimators=50; total time=   9.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=10, min_samples_split=5, n_estimators=50; total time=   9.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time=  19.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time=  19.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time=  19.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time=  19.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time=  19.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.7s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   39.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time=  39.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.6s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   39.0s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time=  39.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   38.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time=  39.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   39.0s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time=  39.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.6s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   39.0s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time=  39.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=50; total time=   9.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=50; total time=   9.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=50; total time=   9.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=50; total time=   9.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=50; total time=   9.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=100; total time=  19.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=100; total time=  19.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=100; total time=  19.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=100; total time=  19.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=100; total time=  19.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   38.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=200; total time=  39.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   38.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=200; total time=  39.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   38.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=200; total time=  38.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.2s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   37.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=200; total time=  38.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.1s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   37.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s


[CV] END max_depth=10, min_samples_split=10, n_estimators=200; total time=  37.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=20, min_samples_split=2, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=20, min_samples_split=2, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=20, min_samples_split=2, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=20, min_samples_split=2, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=20, min_samples_split=2, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=2, n_estimators=100; total time=  25.9s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=2, n_estimators=100; total time=  26.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=2, n_estimators=100; total time=  25.9s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=2, n_estimators=100; total time=  25.9s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=2, n_estimators=100; total time=  26.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=2, n_estimators=200; total time=  52.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=2, n_estimators=200; total time=  52.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=2, n_estimators=200; total time=  51.9s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=2, n_estimators=200; total time=  52.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=2, n_estimators=200; total time=  52.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=20, min_samples_split=5, n_estimators=50; total time=  12.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=20, min_samples_split=5, n_estimators=50; total time=  12.9s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=20, min_samples_split=5, n_estimators=50; total time=  12.9s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=20, min_samples_split=5, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=20, min_samples_split=5, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=5, n_estimators=100; total time=  25.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=5, n_estimators=100; total time=  25.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=5, n_estimators=100; total time=  25.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=5, n_estimators=100; total time=  25.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=5, n_estimators=100; total time=  25.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=5, n_estimators=200; total time=  51.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=5, n_estimators=200; total time=  51.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=5, n_estimators=200; total time=  51.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=5, n_estimators=200; total time=  51.9s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=5, n_estimators=200; total time=  51.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=10, n_estimators=50; total time=  12.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=10, n_estimators=50; total time=  12.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=10, n_estimators=50; total time=  12.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=10, n_estimators=50; total time=  12.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=10, n_estimators=50; total time=  12.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=10, n_estimators=100; total time=  25.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=10, n_estimators=100; total time=  25.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=10, n_estimators=100; total time=  25.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=10, n_estimators=100; total time=  25.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=20, min_samples_split=10, n_estimators=100; total time=  25.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   50.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=10, n_estimators=200; total time=  51.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.4s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   50.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=10, n_estimators=200; total time=  51.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   50.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=10, n_estimators=200; total time=  51.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.0s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=10, n_estimators=200; total time=  51.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=20, min_samples_split=10, n_estimators=200; total time=  51.7s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=50, min_samples_split=2, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=50, min_samples_split=2, n_estimators=50; total time=  13.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=50, min_samples_split=2, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=50, min_samples_split=2, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=50, min_samples_split=2, n_estimators=50; total time=  13.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=2, n_estimators=100; total time=  26.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=2, n_estimators=100; total time=  26.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=2, n_estimators=100; total time=  26.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=2, n_estimators=100; total time=  26.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=2, n_estimators=100; total time=  26.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=2, n_estimators=200; total time=  52.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=2, n_estimators=200; total time=  52.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=2, n_estimators=200; total time=  52.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   53.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=2, n_estimators=200; total time=  53.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.1s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   53.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=2, n_estimators=200; total time=  54.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.0s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=50, min_samples_split=5, n_estimators=50; total time=  13.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=50, min_samples_split=5, n_estimators=50; total time=  13.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=50, min_samples_split=5, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=50, min_samples_split=5, n_estimators=50; total time=  13.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END .max_depth=50, min_samples_split=5, n_estimators=50; total time=  13.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=5, n_estimators=100; total time=  26.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=5, n_estimators=100; total time=  26.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=5, n_estimators=100; total time=  26.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=5, n_estimators=100; total time=  26.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=5, n_estimators=100; total time=  26.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=5, n_estimators=200; total time=  52.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=5, n_estimators=200; total time=  52.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=5, n_estimators=200; total time=  53.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=5, n_estimators=200; total time=  53.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=5, n_estimators=200; total time=  53.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=10, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=10, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=10, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=10, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=10, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=10, n_estimators=100; total time=  26.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=10, n_estimators=100; total time=  26.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=10, n_estimators=100; total time=  26.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=10, n_estimators=100; total time=  26.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=50, min_samples_split=10, n_estimators=100; total time=  26.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=10, n_estimators=200; total time=  52.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=10, n_estimators=200; total time=  52.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=10, n_estimators=200; total time=  52.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=10, n_estimators=200; total time=  52.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=50, min_samples_split=10, n_estimators=200; total time=  52.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=2, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=2, n_estimators=50; total time=  13.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=2, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=2, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=2, n_estimators=50; total time=  13.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=2, n_estimators=100; total time=  26.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=2, n_estimators=100; total time=  26.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=2, n_estimators=100; total time=  26.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=2, n_estimators=100; total time=  26.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=2, n_estimators=100; total time=  26.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=2, n_estimators=200; total time=  52.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=2, n_estimators=200; total time=  53.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=2, n_estimators=200; total time=  52.9s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=2, n_estimators=200; total time=  53.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=2, n_estimators=200; total time=  53.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=5, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=5, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=5, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=5, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=5, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=5, n_estimators=100; total time=  26.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=5, n_estimators=100; total time=  26.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=5, n_estimators=100; total time=  26.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=5, n_estimators=100; total time=  26.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.0s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=5, n_estimators=100; total time=  26.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=5, n_estimators=200; total time=  52.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=5, n_estimators=200; total time=  52.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=5, n_estimators=200; total time=  52.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=5, n_estimators=200; total time=  52.8s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.9s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=5, n_estimators=200; total time=  52.9s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=10, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=10, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=10, n_estimators=50; total time=  13.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=10, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=10, n_estimators=50; total time=  13.1s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=10, n_estimators=100; total time=  26.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=10, n_estimators=100; total time=  26.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=10, n_estimators=100; total time=  26.0s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=10, n_estimators=100; total time=  26.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


[CV] END max_depth=100, min_samples_split=10, n_estimators=100; total time=  26.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=10, n_estimators=200; total time=  52.2s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.0s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=10, n_estimators=200; total time=  52.5s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.6s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=10, n_estimators=200; total time=  52.4s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   51.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=10, n_estimators=200; total time=  52.3s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:   52.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


[CV] END max_depth=100, min_samples_split=10, n_estimators=200; total time=  52.6s


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.3s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:  1.1min


Best parameters: {'max_depth': 20, 'min_samples_split': 10, 'n_estimators': 200}
Best score: 0.8413749999999999


In [33]:
#I also wanted to test out nearest neighbor
knn = KNeighborsClassifier(n_neighbors = 3)

In [34]:
knn.fit(X_train, y_train)

In [35]:
y_pred = knn.predict(X_test)

In [36]:
#accuracy is significantly lower
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    negative       0.79      0.73      0.76      4961
    positive       0.75      0.80      0.78      5039

    accuracy                           0.77     10000
   macro avg       0.77      0.77      0.77     10000
weighted avg       0.77      0.77      0.77     10000



In [41]:
#Finally I am testing out a word embeddings approach with word2vec
model = Word2Vec(sentences = data['tokens'])

In [42]:
#function to create a unique vector of a movie review and then the tokens column in passed into this function
def review_to_vector(tokens, model):
    vectors = [model.wv[word] for word in tokens if word in model.wv]
    if len(vectors) == 0:
        return np.zeros(model.vector_size)
    return np.mean(vectors, axis=0)

data['review_vector'] = data['tokens'].apply(lambda tokens: review_to_vector(tokens, model))

In [43]:
X = np.array(data['review_vector'].tolist())

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [45]:
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.8s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


In [46]:
#accuracy is similar to the random forest approach
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
print(report)

              precision    recall  f1-score   support

    negative       0.85      0.82      0.83      4961
    positive       0.83      0.86      0.84      5039

    accuracy                           0.84     10000
   macro avg       0.84      0.84      0.84     10000
weighted avg       0.84      0.84      0.84     10000



In [None]:
#with these results, I would move forward with the random forest model.  It is the simplist model with the highest accuracy.