## Working Notebook

In [1]:
import pandas as pd
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from tqdm import tqdm
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV

In [2]:
df1 = pd.read_csv('data/tweets.csv', encoding='ISO-8859-1')
df1.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion


In [3]:
df1['is_there_an_emotion_directed_at_a_brand_or_product'].value_counts()

No emotion toward brand or product    5389
Positive emotion                      2978
Negative emotion                       570
I can't tell                           156
Name: is_there_an_emotion_directed_at_a_brand_or_product, dtype: int64

## Load in Processed Text

In [4]:
df = pd.read_csv("data/processed_tweets.csv", index_col=0).dropna(subset=["unprocessed_tweet"])
df["emotion"].value_counts() / len(df)

No emotion toward brand or product    0.592609
Positive emotion                      0.327541
Negative emotion                      0.062692
I can't tell                          0.017158
Name: emotion, dtype: float64

In [5]:
df_multi_dropped = df[~(df["emotion"] == "I can't tell")].copy()
df_multi_dropped.shape

(8936, 4)

In [6]:
df_multi_dropped["emotion"].value_counts()

No emotion toward brand or product    5388
Positive emotion                      2978
Negative emotion                       570
Name: emotion, dtype: int64

In [7]:
emotion_map = {
    "No emotion toward brand or product": 2,
    "Positive emotion": 1,
    "Negative emotion": 0,
}

df_multi_dropped["emotion_encoded"] = df_multi_dropped["emotion"].map(emotion_map)
df_multi_dropped["emotion_encoded"].value_counts()

2    5388
1    2978
0     570
Name: emotion_encoded, dtype: int64

In [17]:
sw = stopwords.words("english")
sw = [i.replace("'", '') for i in stop_words]

In [18]:
X = df_multi_dropped["processed_tweet"]
y = df_multi_dropped["emotion_encoded"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((6255,), (2681,), (6255,), (2681,))

In [19]:
pipe_dt = Pipeline([("vec", TfidfVectorizer(stop_words=sw)), ("dt", DecisionTreeClassifier(random_state=42))])
pipe_rf = Pipeline([("vec", TfidfVectorizer(stop_words=sw)), ("rf", RandomForestClassifier(random_state=42))])
pipe_knn = Pipeline([("vec", TfidfVectorizer(stop_words=sw)), ("knn", KNeighborsClassifier())])

pipes = [pipe_dt, pipe_rf, pipe_knn]
names = ["dt", "rf", "knn"]

for pipeline in tqdm(pipes):
    pipeline.fit(X_train, y_train)

100%|██████████| 3/3 [00:08<00:00,  2.97s/it]


In [11]:
preds = {pipe: None for pipe in names}

for name, pipe in tqdm(zip(names, pipes)):
    preds[name] = pipe.predict(X_test)


3it [00:01,  1.95it/s]


In [12]:
for name in preds.keys():
    print(name.upper())
    print(classification_report(y_test, preds[name]))
    print()

DT
              precision    recall  f1-score   support

           0       0.32      0.18      0.23       189
           1       0.48      0.48      0.48       880
           2       0.68      0.72      0.70      1612

    accuracy                           0.60      2681
   macro avg       0.50      0.46      0.47      2681
weighted avg       0.59      0.60      0.59      2681


RF
              precision    recall  f1-score   support

           0       0.71      0.15      0.25       189
           1       0.61      0.39      0.47       880
           2       0.68      0.88      0.76      1612

    accuracy                           0.67      2681
   macro avg       0.67      0.47      0.50      2681
weighted avg       0.66      0.67      0.63      2681


KNN
              precision    recall  f1-score   support

           0       0.29      0.11      0.16       189
           1       0.53      0.38      0.44       880
           2       0.67      0.83      0.75      1612

    accu

## Multi-Class Pipeline GridSearch

### GridSearch on Decision Tree Model

In [None]:
pipe_dt = Pipeline([("vec", TfidfVectorizer(stop_words=sw)), ("dt", DecisionTreeClassifier(random_state=42))])

In [26]:
grid_dt = {'dt__max_depth': ['None', 2, 5, 10],
       'dt__min_samples_split': [2, 5],
       'vec__ngram_range': [(1,1), (1,2)],
        'vec__max_df': [.8, .9, .99],
        'vec__min_df': [.01, .05]}

In [27]:
gs = GridSearchCV(estimator=pipe_dt, param_grid=grid_dt, verbose=2)

In [28]:
gs.fit(X_train, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV] END dt__max_depth=2, dt__min_samples_split=2, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=2, dt__min_samples_split=2, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=2, dt__min_samples_split=2, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=2, dt__min_samples_split=2, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=2, dt__min_samples_split=2, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=2, dt__min_samples_split=2, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=2, dt__min_samples_split=2, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=2, dt

[CV] END dt__max_depth=2, dt__min_samples_split=5, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=2, dt__min_samples_split=5, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=2, dt__min_samples_split=5, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=2, dt__min_samples_split=5, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=2, dt__min_samples_split=5, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=2, dt__min_samples_split=5, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=2, dt__min_samples_split=5, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=2, dt__min_samples_split=5, vec__max_df=0.99, vec__min_df=0.

[CV] END dt__max_depth=5, dt__min_samples_split=5, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=5, dt__min_samples_split=5, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=5, dt__min_samples_split=5, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=5, dt__min_samples_split=5, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=5, dt__min_samples_split=5, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=5, dt__min_samples_split=5, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=5, dt__min_samples_split=5, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=5, dt__min_samples_split=5, vec__max_df=0.9, vec__min_df=0.05, vec_

[CV] END dt__max_depth=10, dt__min_samples_split=2, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=10, dt__min_samples_split=2, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=10, dt__min_samples_split=2, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=10, dt__min_samples_split=2, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END dt__max_depth=10, dt__min_samples_split=2, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=10, dt__min_samples_split=2, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=10, dt__min_samples_split=2, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END dt__max_depth=10, dt__min_samples_split=2, vec__max_df=0.99, vec__m

In [29]:
gs.best_params_

{'dt__max_depth': 5,
 'dt__min_samples_split': 2,
 'vec__max_df': 0.9,
 'vec__min_df': 0.01,
 'vec__ngram_range': (1, 2)}

In [30]:
gs.best_score_

0.634052757793765

**Messing around with different Hyperparameters**

In [53]:
pipe_dt2 = Pipeline([("vec", TfidfVectorizer(stop_words=sw, max_df=0.9, min_df=0.01, ngram_range= (1, 2))), 
                    ("dt", DecisionTreeClassifier(max_depth=5,min_samples_split=2, random_state=42))])

In [54]:
pipe_dt2.fit(X_train, y_train)

pipe_dt2.score(X_train, y_train)

0.640607513988809

### GridSearch on RF

In [None]:
#RandomForestClassifier(random_state=42)

In [71]:
grid_rf = {'rf__n_estimators': [10, 50, 100], 
           'rf__max_depth': [2, 5, 10],
       'rf__min_samples_split': [2,3,4],
       'vec__ngram_range': [(1,1), (1,2)],
        'vec__max_df': [.9, .99],
        'vec__min_df': [.01, .05]}

In [72]:
gs_rf = GridSearchCV(pipe_rf, param_grid=grid_rf, verbose=2)

In [73]:
gs_rf.fit(X_train, y_train)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END rf__max_depth=2, rf__

[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.99, vec_

[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.4s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=2, rf__n_estimators=100, vec__max_

[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.9, vec__

[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=

[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99

[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=2, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.9

[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=10, vec__max_df=0.99

[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.4s
[CV] END rf__max_depth=5, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.9

[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.9, vec__

[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=50, vec__max_df=0.99

[CV] END rf__max_depth=5, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.5s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.9, vec

[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.5s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.99, vec_

[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.4s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.4s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.4s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.4s
[CV] END rf__max_depth=5, rf__min_samples_split=4, rf__n_estimators=100, vec__max_

[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=50, vec__max_df=0.9

[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.7s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.7s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.9s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.8s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.6s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=100, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.6s
[CV] END rf__max_depth=10, rf__min_samples_split=2, rf__n_estimators=100, vec__m

[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=10, vec__max_

[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.7s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.6s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.6s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.6s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.6s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=100, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.7s
[CV] END rf__max_depth=10, rf__min_samples_split=3, rf__n_estimators=100, vec__max

[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.2s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=10, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.1s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=10, vec__max_d

[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.5s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.5s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.7s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.6s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   1.0s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=50, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   1.5s
[CV] END rf__max_depth=10, rf__min_samples_split=4, rf__n_estimators=50, vec__max_

In [74]:
gs_rf.best_params_

{'rf__max_depth': 10,
 'rf__min_samples_split': 3,
 'rf__n_estimators': 100,
 'vec__max_df': 0.9,
 'vec__min_df': 0.01,
 'vec__ngram_range': (1, 1)}

In [75]:
gs_rf.best_score_

0.641726618705036

### GridSearch on KNN

In [None]:
pipe_knn = Pipeline([("vec", TfidfVectorizer(stop_words=sw)), ("knn", KNeighborsClassifier())])

In [55]:
grid_knn = {'knn__n_neighbors': [2, 4, 6], 
           'knn__weights': ['uniform', 'distance'],
       'knn__p': [1,2],
       'vec__ngram_range': [(1,1), (1,2)],
        'vec__max_df': [.9, .99],
        'vec__min_df': [.01, .05]}

In [56]:
gs_knn = GridSearchCV(pipe_knn, param_grid=grid_rf, verbose=2)

In [57]:
gs_knn.fit(X_train, y_train)

Fitting 5 folds for each of 96 candidates, totalling 480 fits
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.4s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 2); total time=   0.5s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.01, ve

[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=2, knn__p=1, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END

[CV] END knn__n_neighbors=2, knn__p=2, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=2, knn__p=2, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   1.9s
[CV] END knn__n_neighbors=2, knn__p=2, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   1.5s
[CV] END knn__n_neighbors=2, knn__p=2, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   1.2s
[CV] END knn__n_neighbors=2, knn__p=2, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.6s
[CV] END knn__n_neighbors=2, knn__p=2, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.9s
[CV] END knn__n_neighbors=2, knn__p=2, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.6s
[CV] END knn_

[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_n

[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=1, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=2, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=4, knn__p=2, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END k

[CV] END knn__n_neighbors=4, knn__p=2, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END knn__n_neighbors=4, knn__p=2, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=2, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=2, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=4, knn__p=2, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END knn__n_neighbors=4, knn__p=2, knn__weights=distance, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.3s
[CV] END knn__n_neighbors=4, knn__p=2, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.01, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn

[CV] END knn__n_neighbors=6, knn__p=1, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=6, knn__p=1, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=6, knn__p=1, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=6, knn__p=1, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=6, knn__p=1, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.5s
[CV] END knn__n_neighbors=6, knn__p=1, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.9s
[CV] END knn__n_neighbors=6, knn__p=1, knn__weights=uniform, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.7s
[CV] END knn_

[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.3s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=uniform, vec__max_df=0.9, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neig

[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 1); total time=   0.2s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] END knn__n_neighbors=6, knn__p=2, knn__weights=distance, vec__max_df=0.99, vec__min_df=0.05, vec__ngram_range=(1, 2); total time=   0.4s
[CV] E

In [58]:
gs.best_params_

{'knn__n_neighbors': 6,
 'knn__p': 2,
 'knn__weights': 'distance',
 'vec__max_df': 0.9,
 'vec__min_df': 0.01,
 'vec__ngram_range': (1, 2)}

In [59]:
gs.best_score_

0.6108713029576338