# Tuning dos parametros com GridSearch

- avaliações dos algoritmos anteriormente
- naive bayes: 93,80
- arvore de decisão: 98,20
- Random Forest: 98,4
- regras - 97,4
- KNN: 98,6
- regresssão logistica: 94,6
- SVM: 98,80
- redes neurais: 99,6

## Preparação dos dados

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [None]:
import pickle
with open('/content/drive/MyDrive/Machine Learning e Data Science com Python de A a Z/01. Classificação /credit.pkl', 'rb') as f:
  X_credit_train, X_credit_test, y_credit_train, y_credit_test = pickle.load(f)

In [None]:
X_credit_train.shape, X_credit_test.shape

((1500, 3), (500, 3))

In [None]:
y_credit_train.shape, y_credit_test.shape

((1500,), (500,))

In [None]:
# Concatenando treino e teste
import numpy as np
X_credit = np.concatenate((X_credit_train, X_credit_test), axis = 0)
y_credit = np.concatenate((y_credit_train, y_credit_test), axis = 0 )

In [None]:
X_credit

array([[-1.3754462 ,  0.50631087,  0.10980934],
       [ 1.45826409, -1.6489393 , -1.21501497],
       [-0.79356829,  0.22531191, -0.43370226],
       ...,
       [ 1.37445674, -1.05746281, -1.12564819],
       [-1.57087737, -0.63488173, -0.36981671],
       [-1.03572293, -0.93978122,  0.04244312]])

In [None]:
y_credit

array([0, 0, 0, ..., 0, 1, 1])

## Arvore de decisão

In [None]:
parametros = {'criterion': ['gini', 'entropy'],
              'splitter':['best', 'random'],
              'min_samples_split':[2,5,10],
              'min_samples_leaf':[1,5,10]}

In [None]:
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}
0.983


## Random forest

In [None]:
parametros = {'n_estimators': [10,40,100,150],
              'criterion':['gini', 'entropy'],
              'min_samples_split':[2,5,10],
              'min_samples_leaf':[1,5,10]}

In [None]:
grid_search = GridSearchCV(estimator=RandomForestClassifier(), param_grid=parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
0.9875


## KNN

In [None]:
parametros = {'n_neighbors': [3,5,10,20],
              'p': [1,2]}

In [None]:
grid_search = GridSearchCV(estimator=KNeighborsClassifier(), param_grid=parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'n_neighbors': 20, 'p': 1}
0.9800000000000001


## Regressão logistica


In [None]:
parametros = {'tol': [0.0001, 0.00001, 0.000001],
              'C':[1.0, 1.5, 2.0],
              'solver':['lbfgs', 'sag', 'saga'],
              }

In [None]:
grid_search = GridSearchCV(estimator=LogisticRegression(), param_grid=parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'C': 1.0, 'solver': 'lbfgs', 'tol': 0.0001}
0.9484999999999999


## SVM

In [None]:
parametros = {'tol': [0.001, 0.0001, 0.00001],
              'C':[1.0, 1.5, 2.0],
              'kernel':['rbf', 'linear', 'sigmoid'],
              }

In [None]:
grid_search = GridSearchCV(estimator=SVC(), param_grid=parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'C': 1.5, 'kernel': 'rbf', 'tol': 0.001}
0.9829999999999999


## Redes neurais

In [None]:
parametros = {'activation': ['relu', 'logistic', 'tahn'],
              'solver':['adam', 'sgd'],
              'batch_size': [10,56 ],
              }

In [None]:
grid_search = GridSearchCV(estimator=MLPClassifier(), param_grid=parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_

20 fits failed out of a total of 60.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
20 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py", line 747, in fit
    self._validate_params()
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 600, in _validate_params
    validate_parameter_constraints(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/_param_validation.py", line 97, in validate_parameter_constraints
    raise Inval

In [None]:
print(melhores_parametros)
print(melhor_resultado)

{'activation': 'relu', 'batch_size': 56, 'solver': 'adam'}
0.9970000000000001
