# Especialização em Inteligência Artificial

**Aprendizado de Máquina - Aula 5.3: Tuning**

Código de exemplo desenvolvido pelo docente [Adriano Rivolli](mailto:rivolli@utpfr.edu.br)

*O código apresenta o uso de Random e Grid Search*

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

## Busca aleatória

In [None]:
iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define o espaço de busca dos hyperparâmetros
param_dist = {
    'C': [0.1, 1, 10, 100, 1000],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': [0.01, 0.1, 1, 10, 100]
}

svm_classifier = SVC()
random_search = RandomizedSearchCV(estimator=svm_classifier, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

# Valor dos hyperparâmetros
print("Best Hyperparameters:", random_search.best_params_)

# Obtendo o modelo selecionado
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)

# Avaliação do modelo
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on Test Set:", accuracy)

Best Hyperparameters: {'kernel': 'linear', 'gamma': 0.1, 'C': 1000}
Accuracy on Test Set: 1.0


## Busca em grade (simples)

In [None]:
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a grade dos parâmetros a ser pesquisado
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
    'min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10]
}

dt_classifier = DecisionTreeClassifier()

grid_search = GridSearchCV(estimator=dt_classifier, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Valor dos hyperparâmetros
print("Best Hyperparameters:", grid_search.best_params_)

# Obtendo o modelo selecionado
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Avaliação do modelo
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on Test Set:", accuracy)

Best Hyperparameters: {'criterion': 'entropy', 'max_depth': 5, 'min_samples_split': 3}
Accuracy on Test Set: 0.956140350877193


## Múltiplas grades

In [None]:
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Define o espaço de busca dos hyperparâmetros
params_dist = [
    {
        'kernel': ['poly'],
        'C': [0.1, 1, 10],
        'degree': [2, 5, 10],
        'coef0': [0.01, 0.1, 0.5]
    },
    {
        'kernel': ['rbf'],
        'C': [0.1, 1, 10],
        'gamma': ['auto', 'scale']
    }

]

svm_classifier = SVC()
grid_search = GridSearchCV(estimator=svm_classifier, param_grid=params_dist, cv=5, scoring='accuracy', refit=True)
grid_search.fit(X_train, y_train)

# Valor dos hyperparâmetros
print("Best Hyperparameters:", grid_search.best_params_)

# Obtendo o modelo selecionado
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Avaliação do modelo
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on Test Set:", accuracy)

Best Hyperparameters: {'C': 10, 'coef0': 0.5, 'degree': 10, 'kernel': 'poly'}
Accuracy on Test Set: 0.9473684210526315


## Validação Cruzada aninhada

In [None]:
digts = load_digits()
X, y = digts.data, digts.target

inner_cv = KFold(n_splits=3, shuffle=True, random_state=10)
outer_cv = KFold(n_splits=3, shuffle=True, random_state=10)

p_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

svm = SVC()

# Non_nested parameter search and scoring
clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=outer_cv, verbose=3)
clf.fit(X, y)
print("Performance sem aninhamento: ", clf.best_score_)
print("Best Hyperparameters:", clf.best_params_)

Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV 1/3] END .C=0.1, gamma=scale, kernel=linear;, score=0.978 total time=   0.1s
[CV 2/3] END .C=0.1, gamma=scale, kernel=linear;, score=0.972 total time=   0.1s
[CV 3/3] END .C=0.1, gamma=scale, kernel=linear;, score=0.985 total time=   0.1s
[CV 1/3] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.948 total time=   0.3s
[CV 2/3] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.938 total time=   0.3s
[CV 3/3] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.965 total time=   0.3s
[CV 1/3] END ...C=0.1, gamma=scale, kernel=poly;, score=0.975 total time=   0.1s
[CV 2/3] END ...C=0.1, gamma=scale, kernel=poly;, score=0.970 total time=   0.1s
[CV 3/3] END ...C=0.1, gamma=scale, kernel=poly;, score=0.982 total time=   0.1s
[CV 1/3] END ..C=0.1, gamma=auto, kernel=linear;, score=0.978 total time=   0.1s
[CV 2/3] END ..C=0.1, gamma=auto, kernel=linear;, score=0.972 total time=   0.1s
[CV 3/3] END ..C=0.1, gamma=auto, kernel=linear;

In [None]:
# Nested CV with parameter optimization
clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv, verbose=3)
nested_score = cross_val_score(clf, X=X, y=y, cv=outer_cv)
print("Performance comew  aninhamento: ", nested_score.mean())

Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV 1/3] END .C=0.1, gamma=scale, kernel=linear;, score=0.978 total time=   0.0s
[CV 2/3] END .C=0.1, gamma=scale, kernel=linear;, score=0.977 total time=   0.0s
[CV 3/3] END .C=0.1, gamma=scale, kernel=linear;, score=0.985 total time=   0.0s
[CV 1/3] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.930 total time=   0.1s
[CV 2/3] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.927 total time=   0.2s
[CV 3/3] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.935 total time=   0.1s
[CV 1/3] END ...C=0.1, gamma=scale, kernel=poly;, score=0.975 total time=   0.0s
[CV 2/3] END ...C=0.1, gamma=scale, kernel=poly;, score=0.975 total time=   0.1s
[CV 3/3] END ...C=0.1, gamma=scale, kernel=poly;, score=0.967 total time=   0.1s
[CV 1/3] END ..C=0.1, gamma=auto, kernel=linear;, score=0.978 total time=   0.1s
[CV 2/3] END ..C=0.1, gamma=auto, kernel=linear;, score=0.977 total time=   0.0s
[CV 3/3] END ..C=0.1, gamma=auto, kernel=linear;