# EP1 de Processamento de Língua Natural - Grid Search

## Leitura e análise dos dados

In [1]:
import pandas as pd
import numpy as np
import time
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.dummy import DummyClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline


In [2]:
treino_dataset = pd.read_excel(io='../../data/ep1_esic2023_clareza_TRAIN.xlsx')
treino_dataset

Unnamed: 0,resp_text,clarity
0,Prezado Sr Jose Taunai Em atenção ao seu pe...,c5
1,"""A pedido do Pró-Reitor de Graduação, informa...",c5
2,"""Prezado (a) Sr. (a), Agradecemos o contato e...",c234
3,"""Prezado (a) Sr. (a), Agradecemos o contato e...",c234
4,"""Prezado Prof. Gilberto Tadeu Reis da Silva ...",c234
...,...,...
5995,Trata-se de solicitação com base na Lei de Ac...,c1
5996,Trata-se de uma solicitação repetida. As info...,c5
5997,Unidade:,c5
5998,Vale dizer que a gestão dos Telefones de Uso ...,c234


## Modelos

In [3]:
X = treino_dataset.resp_text
Y = treino_dataset.clarity

In [4]:
def print_results(grid_search):
    best_params = grid_search.best_params_
    best_estimator = grid_search.best_estimator_

    mean_accuracy = cross_val_score(best_estimator, X, Y, scoring='accuracy', cv=10, n_jobs=2).mean()

    print("Best Parameters:", best_params)
    print("Best Mean Accuracy:", mean_accuracy)

### Regressão Logística

#### Count Vectorizer

In [4]:
pipeline_logistic_count = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('clf', LogisticRegression(max_iter=9999))
])

param_grid_logistic_count = {
    'vectorizer__max_features': [5000, None],
    'vectorizer__lowercase': [True, False],
    'clf__C': [0.001, 0.01, 0.1, 0.5, 1.0, 5.0, 10.0],
    'clf__class_weight': ['balanced', None],
    'clf__penalty': ['l1', 'l2'],
    'clf__solver': ['lbfgs', 'liblinear']
}

grid_search = GridSearchCV(pipeline_logistic_count, param_grid_logistic_count, cv=10, scoring='accuracy', n_jobs=3, verbose=10)

grid_search.fit(X, Y)

print_results(grid_search)

NameError: name 'X' is not defined

#### TF-IDF Vectorizer Padrão, (2,2), (3,3) e (4,4)

In [23]:
pipeline_logistic_tfidf_bigram = Pipeline([
    ('vectorizer', TfidfVectorizer(max_features=None)),
    ('clf', LogisticRegression(max_iter=9999))
])

param_grid_logistic_tfidf_bigram = {
    'vectorizer__ngram_range': [(1,1), (2,2), (3,3), (4,4)],
    'vectorizer__lowercase': [True, False],
    'clf__C': [0.01, 0.05, 0.1, 0.5, 1.0, 5.0],
    'clf__class_weight': ['balanced', None],
    'clf__penalty': ['l1', 'l2'],
    'clf__solver': ['lbfgs', 'liblinear']
}

grid_search_logistic_tfidf_bigram = GridSearchCV(pipeline_logistic_tfidf_bigram, param_grid_logistic_tfidf_bigram, cv=10, scoring='accuracy', n_jobs=2, verbose=10)

grid_search_logistic_tfidf_bigram.fit(X, Y)

print_results(grid_search_logistic_tfidf_bigram)

Fitting 10 folds for each of 384 candidates, totalling 3840 fits
[CV 2/10; 1/384] START clf__C=0.01, clf__class_weight=balanced, clf__penalty=l1, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(1, 1)
[CV 1/10; 1/384] START clf__C=0.01, clf__class_weight=balanced, clf__penalty=l1, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(1, 1)
[CV 2/10; 1/384] END clf__C=0.01, clf__class_weight=balanced, clf__penalty=l1, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(1, 1);, score=nan total time=   0.3s
[CV 1/10; 1/384] END clf__C=0.01, clf__class_weight=balanced, clf__penalty=l1, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(1, 1);, score=nan total time=   0.3s
[CV 3/10; 1/384] START clf__C=0.01, clf__class_weight=balanced, clf__penalty=l1, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(1, 1)
[CV 4/10; 1/384] START clf__C=0.01, clf__class_weight=balanced, clf__penalty=l1, c



[CV 6/10; 18/384] START clf__C=0.01, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(2, 2)
[CV 5/10; 18/384] END clf__C=0.01, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(2, 2);, score=0.423 total time=   1.5s
[CV 7/10; 18/384] START clf__C=0.01, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(2, 2)
[CV 6/10; 18/384] END clf__C=0.01, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(2, 2);, score=0.430 total time=   1.4s
[CV 8/10; 18/384] START clf__C=0.01, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(2, 2)
[CV 7/10; 18/384] END clf__C=0.01, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer_



[CV 1/10; 300/384] END clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.433 total time= 4.8min
[CV 3/10; 300/384] START clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 3/10; 300/384] END clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.388 total time=   1.8s
[CV 4/10; 300/384] START clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 4/10; 300/384] END clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.433 total time=   1.7s
[CV 5/10; 300/384] START clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vector



[CV 2/10; 300/384] END clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.455 total time= 5.0min
[CV 8/10; 300/384] START clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 7/10; 300/384] END clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.392 total time=   1.5s
[CV 9/10; 300/384] START clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 8/10; 300/384] END clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.407 total time=   1.6s
[CV 10/10; 300/384] START clf__C=1.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vecto



[CV 7/10; 331/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3);, score=0.347 total time= 4.4min
[CV 8/10; 332/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 8/10; 332/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.422 total time=   2.0s
[CV 9/10; 332/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 9/10; 332/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.385 total time=   1.9s
[CV 10/10; 332/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf_



[CV 7/10; 332/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.335 total time= 6.5min
[CV 6/10; 335/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3)




[CV 5/10; 335/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.432 total time= 4.7min
[CV 7/10; 335/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3)




[CV 6/10; 335/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.430 total time= 5.2min
[CV 8/10; 335/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3)
[CV 8/10; 335/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.437 total time=   1.8s
[CV 9/10; 335/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3)
[CV 9/10; 335/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.405 total time=   1.8s
[CV 10/10; 335/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1,



[CV 7/10; 335/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.335 total time= 5.1min
[CV 2/10; 336/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)
[CV 2/10; 336/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.523 total time=   2.0s
[CV 3/10; 336/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)
[CV 3/10; 336/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.453 total time=   2.0s
[CV 4/10; 336/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, 



[CV 1/10; 336/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.420 total time= 5.3min
[CV 5/10; 336/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)
[CV 5/10; 336/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.420 total time=   2.0s
[CV 6/10; 336/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)




[CV 4/10; 336/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.397 total time= 5.7min
[CV 7/10; 336/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)
[CV 7/10; 336/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.330 total time=   1.9s
[CV 8/10; 336/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)




[CV 6/10; 336/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.432 total time= 6.9min
[CV 9/10; 336/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)




[CV 8/10; 336/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.428 total time= 5.0min
[CV 10/10; 336/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)




[CV 10/10; 336/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.472 total time= 5.6min
[CV 1/10; 337/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(1, 1)
[CV 1/10; 337/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(1, 1);, score=0.450 total time=   2.0s
[CV 2/10; 337/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(1, 1)
[CV 2/10; 337/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(1, 1);, score=0.507 total time=   2.3s
[CV 3/10; 337/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, 



[CV 9/10; 336/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.385 total time=10.6min
[CV 10/10; 339/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3)
[CV 9/10; 339/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3);, score=0.433 total time=  15.6s
[CV 1/10; 340/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 10/10; 339/384] END clf__C=5.0, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3);, score=0.475 total time=  16.9s
[CV 2/10; 340/384] START clf__C=5.0, clf__class_weight=balanced, clf__penalty=l2, clf__solver=lbfgs,



[CV 1/10; 363/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3);, score=0.450 total time= 4.3min
[CV 7/10; 363/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3)
[CV 7/10; 363/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3);, score=0.347 total time=   1.7s
[CV 8/10; 363/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3)
[CV 8/10; 363/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3);, score=0.455 total time=   1.7s
[CV 9/10; 363/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vector



[CV 6/10; 363/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3);, score=0.430 total time= 4.7min
[CV 10/10; 363/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3)
[CV 10/10; 363/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3);, score=0.472 total time=   1.7s
[CV 1/10; 364/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 1/10; 364/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.467 total time=   2.0s
[CV 2/10; 364/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vect



[CV 9/10; 363/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3);, score=0.398 total time= 4.6min
[CV 5/10; 364/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 5/10; 364/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.398 total time=   1.9s
[CV 6/10; 364/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 6/10; 364/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.422 total time=   1.9s
[CV 7/10; 364/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vector



[CV 4/10; 364/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.438 total time= 5.0min
[CV 9/10; 364/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 9/10; 364/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.387 total time=   2.0s
[CV 10/10; 364/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4)
[CV 10/10; 364/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.477 total time=   1.9s
[CV 1/10; 365/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vect



[CV 8/10; 364/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=True, vectorizer__ngram_range=(4, 4);, score=0.422 total time= 5.0min
[CV 5/10; 367/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3)
[CV 5/10; 367/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.432 total time=   1.7s
[CV 6/10; 367/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3)




[CV 4/10; 367/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.422 total time= 5.1min
[CV 7/10; 367/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3)
[CV 7/10; 367/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.337 total time=   1.7s
[CV 8/10; 367/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3)




[CV 6/10; 367/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.430 total time= 4.9min
[CV 9/10; 367/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3)
[CV 9/10; 367/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.407 total time=   1.8s
[CV 10/10; 367/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3)
[CV 10/10; 367/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.457 total time=   1.7s
[CV 1/10; 368/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear,



[CV 8/10; 367/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(3, 3);, score=0.437 total time= 5.1min
[CV 4/10; 368/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)




[CV 3/10; 368/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.453 total time= 5.3min
[CV 5/10; 368/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)




[CV 4/10; 368/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.395 total time= 5.6min
[CV 6/10; 368/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)




[CV 5/10; 368/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.420 total time= 5.1min
[CV 7/10; 368/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)




[CV 7/10; 368/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.330 total time= 5.4min
[CV 8/10; 368/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)
[CV 8/10; 368/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.432 total time=   2.0s
[CV 9/10; 368/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4)
[CV 9/10; 368/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.387 total time=   2.0s
[CV 10/10; 368/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, 



[CV 6/10; 368/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l1, clf__solver=liblinear, vectorizer__lowercase=False, vectorizer__ngram_range=(4, 4);, score=0.432 total time=10.8min
[CV 2/10; 371/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3)
[CV 1/10; 371/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3);, score=0.457 total time=  16.3s
[CV 3/10; 371/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3)
[CV 2/10; 371/384] END clf__C=5.0, clf__class_weight=None, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=True, vectorizer__ngram_range=(3, 3);, score=0.518 total time=  14.8s
[CV 4/10; 371/384] START clf__C=5.0, clf__class_weight=None, clf__penalty=l2, clf__solver=lbfgs, vectorizer__lowercase=Tru

960 fits failed out of a total of 3840.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
960 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/pipeline.py", line 427, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py", l

Best Parameters: {'clf__C': 1.0, 'clf__class_weight': 'balanced', 'clf__penalty': 'l2', 'clf__solver': 'lbfgs', 'vectorizer__lowercase': False, 'vectorizer__ngram_range': (1, 1)}
Best Mean Accuracy: 0.4655


#### TF-IDF Vectorizer 2-Char, 3-Char, 4-Char e 5-Char

In [None]:
pipeline_logistic_tfidf_3char = Pipeline([
    ('vectorizer', TfidfVectorizer(analyzer='char')),
    ('clf', LogisticRegression(max_iter=9999))
])

param_grid_logistic_tfidf_3char = {
    'vectorizer__lowercase': [True, False],
    'vectorizer__ngram_range': [(2,2), (3,3), (4,4), (5,5)],
    'clf__C': [0.01, 0.05, 0.1, 0.5, 1.0, 5.0],
    'clf__class_weight': ['balanced', None],
    'clf__penalty': ['l1', 'l2'],
    'clf__solver': ['lbfgs', 'liblinear']
}

grid_search_logistic_tfidf_3char = GridSearchCV(pipeline_logistic_tfidf_3char, param_grid_logistic_tfidf_3char, cv=10, scoring='accuracy', n_jobs=2, verbose=10)

grid_search_logistic_tfidf_3char.fit(X, Y)

print_results(grid_search_logistic_tfidf_3char)

### Random Forest

#### Count Vectorizer

In [10]:
pipeline_random_forest_count = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('clf', RandomForestClassifier(random_state=100, class_weight='balanced'))
])

param_grid_random_forest_count = {
    'vectorizer__lowercase': [True, False],
    'clf__n_estimators': [5, 10, 50, 100, 500, 1000],
    'clf__criterion': ['gini', 'entropy', 'log_loss'],
    'clf__max_features': ['sqrt', 'log2', None]
}

grid_search_random_forest_count = GridSearchCV(pipeline_random_forest_count, param_grid_random_forest_count, cv=10, scoring='accuracy', n_jobs=2, verbose=10)

grid_search_random_forest_count.fit(X, Y)

print_results(grid_search_random_forest_count)

Fitting 10 folds for each of 16 candidates, totalling 160 fits
[CV 2/10; 1/16] START clf__class_weight=balanced, clf__random_state=100, vectorizer__lowercase=True, vectorizer__max_features=5000
[CV 1/10; 1/16] START clf__class_weight=balanced, clf__random_state=100, vectorizer__lowercase=True, vectorizer__max_features=5000
[CV 2/10; 1/16] END clf__class_weight=balanced, clf__random_state=100, vectorizer__lowercase=True, vectorizer__max_features=5000;, score=0.472 total time=   4.5s
[CV 3/10; 1/16] START clf__class_weight=balanced, clf__random_state=100, vectorizer__lowercase=True, vectorizer__max_features=5000
[CV 1/10; 1/16] END clf__class_weight=balanced, clf__random_state=100, vectorizer__lowercase=True, vectorizer__max_features=5000;, score=0.472 total time=   4.5s
[CV 4/10; 1/16] START clf__class_weight=balanced, clf__random_state=100, vectorizer__lowercase=True, vectorizer__max_features=5000
[CV 3/10; 1/16] END clf__class_weight=balanced, clf__random_state=100, vectorizer__lowerc

#### TF-IDF Vectorizer Padrão, (2,2), (3,3) e (4,4)

In [None]:
pipeline_random_forest_count = Pipeline([
    ('vectorizer', CountVectorizer(max_features=None)),
    ('clf', RandomForestClassifier(random_state=100, class_weight='balanced'))
])

param_grid_random_forest_count = {
    'vectorizer__lowercase': [True, False],
    'vectorizer__ngram_range': [(1,1), (2,2), (3,3), (4,4)],
    'clf__n_estimators': [5, 10, 50, 100, 500, 1000],
    'clf__criterion': ['gini', 'entropy', 'log_loss'],
    'clf__max_features': ['sqrt', 'log2', None]
}

grid_search_random_forest_count = GridSearchCV(pipeline_random_forest_count, param_grid_random_forest_count, cv=10, scoring='accuracy', n_jobs=2, verbose=10)

grid_search_random_forest_count.fit(X, Y)

print_results(grid_search_random_forest_count)

#### TF-IDF Vectorizer 2-Char, 3-Char, 4-Char e 5-Char

In [None]:
pipeline_random_forest_count = Pipeline([
    ('vectorizer', CountVectorizer(max_features=None, analyzer='char')),
    ('clf', RandomForestClassifier(random_state=100, class_weight='balanced'))
])

param_grid_random_forest_count = {
    'vectorizer__lowercase': [True, False],
    'vectorizer__ngram_range': [(2,2), (3,3), (4,4), (5,5)],
    'clf__n_estimators': [5, 10, 50, 100, 500, 1000],
    'clf__criterion': ['gini', 'entropy', 'log_loss'],
    'clf__max_features': ['sqrt', 'log2', None]
}

grid_search_random_forest_count = GridSearchCV(pipeline_random_forest_count, param_grid_random_forest_count, cv=10, scoring='accuracy', n_jobs=2, verbose=10)

grid_search_random_forest_count.fit(X, Y)

print_results(grid_search_random_forest_count)

### SVC

In [4]:
import spacy

nlp = spacy.load('pt_core_news_lg')

i = 0
for text in treino_dataset['resp_text']:
    doc = nlp(text)
    lemma = [token.lemma_ for token in doc if not token.is_punct and not token.is_space]
    treino_dataset['resp_text'].iloc[i] = " ".join(x for x in lemma)
    i = i + 1

print(treino_dataset['resp_text'])

X = treino_dataset.resp_text
Y = treino_dataset.clarity

0       Prezado Sr Jose Taunai em atenção a o seu pedi...
1       a pedido de o Pró-Reitor de Graduação informar...
2       Prezado o sr. ele Agradecemos o contato e info...
3       Prezado o sr. ele Agradecemos o contato e info...
4       Prezado Prof Gilberto Tadeu Reis de o Silva em...
                              ...                        
5995    tratar se de solicitação com base em o Lei de ...
5996    tratar se de um solicitação repetir o informaç...
5997                                              Unidade
5998    Vale dizer que o gestão de o Telefones de Uso ...
5999    WILHAN DONIZETE GONçALVES NUNES em este NUP 23...
Name: resp_text, Length: 6000, dtype: object


In [6]:
import numpy as np

kernel_list = ['linear']
gamma_list = ['scale']
degree_list = [3]
C_list = np.geomspace(1.0, 6.0, num=10)
ngram_range_list = [(3, 3)]
analyzer_list = ['char']
random_state = 100

best_accuracy = 0
best_hyperparameters = {}

for C in C_list:
  for kernel in kernel_list:
      for ngram_range in ngram_range_list:
          for gamma in gamma_list:
                for degree in degree_list:
                    for analyzer in analyzer_list:
                      vectorizer = TfidfVectorizer(max_features=None, ngram_range=ngram_range, analyzer=analyzer)
                      X_tfidf = vectorizer.fit_transform(X)
                      clf = SVC(C = C, gamma = gamma, kernel = kernel, degree = degree ,random_state=random_state)
                      accuracy = cross_val_score(clf, X_tfidf, Y, scoring='accuracy', cv=10, n_jobs=2).mean()

                      print(f"Hyperparameters:  C={C}, ngram_range={ngram_range}, gamma={gamma}, kernel={kernel}"
                            f", degree={degree}, analyzer={analyzer}")
                      print(f"Mean Accuracy: {accuracy}\n")

                      if accuracy > best_accuracy:
                          best_accuracy = accuracy
                          best_hyperparameters = {
                              'C': C,
                              'ngram_range': ngram_range,
                              'kernel': kernel,
                              'gamma': gamma,
                              'degree': degree,
                              'analyzer': analyzer,
                      }

print("Best Hyperparameters:", best_hyperparameters)
print("Best Mean Accuracy:", best_accuracy)

Hyperparameters:  C=1.0, ngram_range=(3, 3), gamma=scale, kernel=linear, degree=3, analyzer=char
Mean Accuracy: 0.46316666666666667

Hyperparameters:  C=1.2202849358728105, ngram_range=(3, 3), gamma=scale, kernel=linear, degree=3, analyzer=char
Mean Accuracy: 0.46316666666666667

Hyperparameters:  C=1.4890953247181091, ngram_range=(3, 3), gamma=scale, kernel=linear, degree=3, analyzer=char
Mean Accuracy: 0.46316666666666667

Hyperparameters:  C=1.8171205928321397, ngram_range=(3, 3), gamma=scale, kernel=linear, degree=3, analyzer=char
Mean Accuracy: 0.4573333333333333

Hyperparameters:  C=2.2174048860973308, ngram_range=(3, 3), gamma=scale, kernel=linear, degree=3, analyzer=char
Mean Accuracy: 0.46049999999999996

Hyperparameters:  C=2.7058657792353378, ngram_range=(3, 3), gamma=scale, kernel=linear, degree=3, analyzer=char
Mean Accuracy: 0.4618333333333333

Hyperparameters:  C=3.301927248894627, ngram_range=(3, 3), gamma=scale, kernel=linear, degree=3, analyzer=char
Mean Accuracy: 0.4

# Rascunho

In [None]:
c_list = np.geomspace(0.01, 1.0, 20)

pipeline = Pipeline([
    ('vectorizer', CountVectorizer(max_features=None)),
    ('clf', LogisticRegression(max_iter=9999))
])

param_grid = {
    'clf__C': c_list,
}

grid_search = GridSearchCV(pipeline, param_grid, cv=10, scoring='accuracy', n_jobs=4, verbose=10)

grid_search.fit(X, Y)

best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

# Calculate the mean accuracy using cross-validation
mean_accuracy = cross_val_score(best_estimator, X, Y, scoring='accuracy', cv=10).mean()

print("Best Parameters:", best_params)
print("Best Mean Accuracy:", mean_accuracy)

Fitting 10 folds for each of 20 candidates, totalling 200 fits
[CV 3/10; 1/20] START clf__C=0.01...............................................
[CV 4/10; 1/20] START clf__C=0.01...............................................
[CV 2/10; 1/20] START clf__C=0.01...............................................
[CV 1/10; 1/20] START clf__C=0.01...............................................
[CV 4/10; 1/20] END ................clf__C=0.01;, score=0.453 total time=   2.0s
[CV 5/10; 1/20] START clf__C=0.01...............................................
[CV 2/10; 1/20] END ................clf__C=0.01;, score=0.518 total time=   2.1s
[CV 6/10; 1/20] START clf__C=0.01...............................................
[CV 3/10; 1/20] END ................clf__C=0.01;, score=0.427 total time=   2.1s
[CV 7/10; 1/20] START clf__C=0.01...............................................
[CV 1/10; 1/20] END ................clf__C=0.01;, score=0.450 total time=   2.3s
[CV 8/10; 1/20] START clf__C=0.01.............

In [None]:
pipeline = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('clf', LogisticRegression(max_iter=9999))
])

param_grid = {
    'vectorizer__max_features': [5000, 6000, None],
    'clf__C': [0.01, 0.1, 1.0, 10.0, 100.0],
    'clf__penalty': ['l1', 'l2'],
    'clf__solver': ['lbfgs', 'liblinear', 'saga']
}

grid_search = GridSearchCV(pipeline, param_grid, cv=10, scoring='accuracy', n_jobs=4, verbose=10)

grid_search.fit(X, Y)

best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

# Calculate the mean accuracy using cross-validation
mean_accuracy = cross_val_score(best_estimator, X, Y, scoring='accuracy', cv=10).mean()

print("Best Parameters:", best_params)
print("Best Mean Accuracy:", mean_accuracy)

Fitting 10 folds for each of 180 candidates, totalling 1800 fits
[CV 2/10; 1/180] START clf__C=0.01, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=500
[CV 3/10; 1/180] START clf__C=0.01, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=500
[CV 1/10; 1/180] START clf__C=0.01, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=500
[CV 4/10; 1/180] START clf__C=0.01, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=500
[CV 2/10; 1/180] END clf__C=0.01, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=500;, score=nan total time=   0.3s
[CV 3/10; 1/180] END clf__C=0.01, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=500;, score=nan total time=   0.3s
[CV 4/10; 1/180] END clf__C=0.01, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=500;, score=nan total time=   0.3s
[CV 1/10; 1/180] END clf__C=0.01, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=500;, score=nan total time=   0.3s
[CV 5/10; 1

KeyboardInterrupt: 

In [None]:
pipeline = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('clf', LogisticRegression(max_iter=9999))
])

param_grid = {
    'vectorizer__max_features': [5000, None],
    'vectorizer__lowercase': [True, False],
    'clf__C': [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0],
    'clf__class_weight': ['balanced', None],
    'clf__penalty': ['l1', 'l2'],
    'clf__solver': ['lbfgs', 'liblinear', 'saga']
}

grid_search = GridSearchCV(pipeline, param_grid, cv=10, scoring='accuracy', n_jobs=1, verbose=10)

grid_search.fit(X, Y)

best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

# Calculate the mean accuracy using cross-validation
mean_accuracy = cross_val_score(best_estimator, X, Y, scoring='accuracy', cv=10).mean()

print("Best Parameters:", best_params)
print("Best Mean Accuracy:", mean_accuracy)

Fitting 10 folds for each of 60 candidates, totalling 600 fits
[CV 1/10; 1/60] START clf__C=0.001, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=5000
[CV 2/10; 1/60] START clf__C=0.001, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=5000
[CV 2/10; 1/60] END clf__C=0.001, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=5000;, score=nan total time=   0.3s
[CV 1/10; 1/60] END clf__C=0.001, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=5000;, score=nan total time=   0.3s
[CV 3/10; 1/60] START clf__C=0.001, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=5000
[CV 4/10; 1/60] START clf__C=0.001, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=5000
[CV 4/10; 1/60] END clf__C=0.001, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=5000;, score=nan total time=   0.3s
[CV 3/10; 1/60] END clf__C=0.001, clf__penalty=l1, clf__solver=lbfgs, vectorizer__max_features=5000;, score=nan total time=   0.3s
[CV 5

100 fits failed out of a total of 600.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
100 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/pipeline.py", line 427, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py", li

Best Parameters: {'clf__C': 0.05, 'clf__penalty': 'l2', 'clf__solver': 'liblinear', 'vectorizer__max_features': None}
Best Mean Accuracy: 0.4505
