In [1]:
from si.io.csv_file import read_csv
from si.model_selection.randomized_search import randomized_search_cv
from si.models.logistic_regression import LogisticRegression
import numpy as np 

In [2]:
#1. Carregar dataset
data=read_csv ('../datasets/breast_bin/breast-bin.csv', sep=',', features=True, label=True)

In [3]:
#2. Criar modelo
model=LogisticRegression()

In [4]:
#3. Definir distribuições
param_distributions = {
    'l2_penalty': [1,10,10],
    'alpha': [0.001, 0.0001, 100],
    'max_iter': np.linspace(1000, 2000, 200, dtype=int)
}

In [6]:
#4. Executar com n_iter=10, cv=3
results=randomized_search_cv(
    model, data, param_distributions, 
    n_iter=10, cv=3, seed=42)

Randomized Search com Cross Validation

Modelo: LogisticRegression
Dataset: (698, 9)
CV Folds: 3
Iterações (combinações aleatorias): 10
Random seed: 42

Distribuições de hiperparâmetros:
 l2_penalty: 3 valores[1.0000 - 10.0000]
 alpha: 3 valores[0.0001 - 100.0000]
 max_iter: 200 valores[1000.0000 - 2000.0000]

Total de combinações possíveis: 1800
Testando 10 combinações aleatórias(0.56% do espaço)
Total de treinos: 30

--------------------------------------------------------------------------------

Iteração 1/10:
Params: {'l2_penalty': np.int64(10), 'alpha': np.float64(0.001), 'max_iter': np.int64(1070)}
Score: 0.9670 +/- 0.0113

Iteração 2/10:
Params: {'l2_penalty': np.int64(10), 'alpha': np.float64(0.001), 'max_iter': np.int64(1020)}
Score: 0.9670 +/- 0.0113

Iteração 3/10:
Params: {'l2_penalty': np.int64(10), 'alpha': np.float64(0.001), 'max_iter': np.int64(1587)}
Score: 0.9670 +/- 0.0113

Iteração 4/10:
Params: {'l2_penalty': np.int64(1), 'alpha': np.float64(0.0001), 'max_iter': n

  cost = (dataset.y * np.log(predictions)) + (1 - dataset.y) * np.log(1 - predictions)
  cost = (dataset.y * np.log(predictions)) + (1 - dataset.y) * np.log(1 - predictions)
  return 1 / (1 + np.exp(-X))


Score: 0.0316 +/- 0.0133

Iteração 8/10:
Params: {'l2_penalty': np.int64(1), 'alpha': np.float64(0.001), 'max_iter': np.int64(1487)}
Score: 0.9670 +/- 0.0113

Iteração 9/10:
Params: {'l2_penalty': np.int64(1), 'alpha': np.float64(100.0), 'max_iter': np.int64(1643)}
Score: 0.9741 +/- 0.0106

Iteração 10/10:
Params: {'l2_penalty': np.int64(1), 'alpha': np.float64(100.0), 'max_iter': np.int64(1025)}
Score: 0.9052 +/- 0.0945

Resultados do Randomized Search

#     Score        Hyperparameters
--------------------------------------------------------------------------------
1     0.9741       {'l2_penalty': np.int64(1), 'alpha': np.float64(100.0), 'max_iter': np.int64(1643)} Best
2     0.9670       {'l2_penalty': np.int64(1), 'alpha': np.float64(0.001), 'max_iter': np.int64(1487)} 
3     0.9670       {'l2_penalty': np.int64(10), 'alpha': np.float64(0.001), 'max_iter': np.int64(1020)} 
4     0.9670       {'l2_penalty': np.int64(10), 'alpha': np.float64(0.001), 'max_iter': np.int64(1427)} 
5  

In [7]:
print(f"Best score: {results['best_score']: .4f}")
print(f"Best hyperparameters: {results['best_hyperparameters']}")

Best score:  0.9741
Best hyperparameters: {'l2_penalty': np.int64(1), 'alpha': np.float64(100.0), 'max_iter': np.int64(1643)}
