In [2]:
!pip install optuna
!wget https://github.com/oreilly-japan/ml-security-jp/raw/master/ch02/dataset.csv

Collecting optuna
  Downloading optuna-2.10.0-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 7.0 MB/s 
[?25hCollecting alembic
  Downloading alembic-1.7.5-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 15.4 MB/s 
Collecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting cliff
  Downloading cliff-3.10.0-py3-none-any.whl (80 kB)
[K     |████████████████████████████████| 80 kB 5.9 MB/s 
[?25hCollecting colorlog
  Downloading colorlog-6.6.0-py2.py3-none-any.whl (11 kB)
Collecting Mako
  Downloading Mako-1.1.6-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 3.5 MB/s 
[?25hCollecting pbr!=2.1.0,>=2.0.0
  Downloading pbr-5.8.0-py2.py3-none-any.whl (112 kB)
[K     |████████████████████████████████| 112 kB 14.0 MB/s 
[?25hCollecting stevedore>=2.0.1
  Downloading stevedore-3.5.0-py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 5.6 MB/s 
[?25hCollect

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np
import optuna
from sklearn.model_selection import cross_validate

class Objective:
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __call__(self, trial):
        params = {
            'solver': trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
            'C': trial.suggest_loguniform('C', 0.0001, 10),
            'max_iter': trial.suggest_int('max_iter', 100, 100000)
        }
        model = LogisticRegression(**params)
        scores = cross_validate(model,
                                            X=self.X,
                                            y=self.y,
                                            scoring='accuracy',
                                            n_jobs=-1)
        return scores['test_score'].mean()

training_data = np.genfromtxt('dataset.csv', delimiter=',', dtype=np.int32)
X = training_data[:,:-1]
y = training_data[:, -1]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=101
)

objective = Objective(X_train, y_train)
study = optuna.create_study(direction='maximize')
study.optimize(objective, timeout=60)
print('params:', study.best_params)

[32m[I 2021-11-29 22:52:41,284][0m A new study created in memory with name: no-name-b3497a3f-4f78-4bd6-b37f-92a4ec19a378[0m
[32m[I 2021-11-29 22:52:42,534][0m Trial 0 finished with value: 0.9198330856454422 and parameters: {'solver': 'liblinear', 'C': 0.002340087617843422, 'max_iter': 60772}. Best is trial 0 with value: 0.9198330856454422.[0m
[32m[I 2021-11-29 22:52:42,895][0m Trial 1 finished with value: 0.9214159647422043 and parameters: {'solver': 'newton-cg', 'C': 0.0028523671057923985, 'max_iter': 26131}. Best is trial 1 with value: 0.9214159647422043.[0m
[32m[I 2021-11-29 22:52:43,195][0m Trial 2 finished with value: 0.9225467388329424 and parameters: {'solver': 'sag', 'C': 0.0033688169952285733, 'max_iter': 7683}. Best is trial 2 with value: 0.9225467388329424.[0m
[32m[I 2021-11-29 22:52:43,535][0m Trial 3 finished with value: 0.9289924005432934 and parameters: {'solver': 'liblinear', 'C': 0.2619828846525913, 'max_iter': 44422}. Best is trial 3 with value: 0.928992

params: {'solver': 'newton-cg', 'C': 0.09462162204832922, 'max_iter': 73096}
