In [2]:
!pip install optuna
!wget https://github.com/oreilly-japan/ml-security-jp/raw/master/ch02/dataset.csv

Collecting optuna
  Downloading optuna-2.10.0-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 5.2 MB/s 
[?25hCollecting colorlog
  Downloading colorlog-6.6.0-py2.py3-none-any.whl (11 kB)
Collecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting cliff
  Downloading cliff-3.10.0-py3-none-any.whl (80 kB)
[K     |████████████████████████████████| 80 kB 9.0 MB/s 
Collecting alembic
  Downloading alembic-1.7.5-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 53.5 MB/s 
Collecting Mako
  Downloading Mako-1.1.6-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 4.8 MB/s 
Collecting autopage>=0.4.0
  Downloading autopage-0.4.0-py3-none-any.whl (20 kB)
Collecting stevedore>=2.0.1
  Downloading stevedore-3.5.0-py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 6.4 MB/s 
[?25hCollecting pbr!=2.1.0,>=2.0.0
  Downloading pbr-5.8.0-py2.py3-none-any.whl (112 kB)
[K  

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np
import optuna
from sklearn.model_selection import cross_validate

class Objective:
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __call__(self, trial):
        params = {
            'solver': trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
            'C': trial.suggest_loguniform('C', 0.0001, 10),
            'max_iter': trial.suggest_int('max_iter', 100, 100000)
        }
        model = LogisticRegression(**params)
        scores = cross_validate(model,
                                            X=self.X,
                                            y=self.y,
                                            scoring='accuracy',
                                            n_jobs=-1)
        return scores['test_score'].mean()

training_data = np.genfromtxt('dataset.csv', delimiter=',', dtype=np.int32)
X = training_data[:,:-1]
y = training_data[:, -1]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=101
)

objective = Objective(X_train, y_train)
study = optuna.create_study(direction='maximize')
study.optimize(objective, timeout=60)
print('params:', study.best_params)

from sklearn.metrics import confusion_matrix, accuracy_score
model = LogisticRegression(
    solver = study.best_params['solver'],
    C = study.best_params['C'],
    max_iter = study.best_params['max_iter']
)
model.fit(X_train, y_train)
pred = model.predict(X_test)
print("Accuracy: {:.5f} %".format(100 * accuracy_score(y_test, pred)))
print(confusion_matrix(y_test, pred))

[32m[I 2021-12-05 00:52:19,080][0m A new study created in memory with name: no-name-4bfe05dd-c8ef-4731-b300-43bfb4c33db3[0m
[32m[I 2021-12-05 00:52:19,489][0m Trial 0 finished with value: 0.9227730471237937 and parameters: {'solver': 'newton-cg', 'C': 0.0061441606942428595, 'max_iter': 27668}. Best is trial 0 with value: 0.9227730471237937.[0m
[32m[I 2021-12-05 00:52:19,914][0m Trial 1 finished with value: 0.9280877429025269 and parameters: {'solver': 'liblinear', 'C': 4.643225371316981, 'max_iter': 18639}. Best is trial 1 with value: 0.9280877429025269.[0m
[32m[I 2021-12-05 00:52:20,366][0m Trial 2 finished with value: 0.9282008650744726 and parameters: {'solver': 'lbfgs', 'C': 3.2766233080001776, 'max_iter': 62645}. Best is trial 2 with value: 0.9282008650744726.[0m
[32m[I 2021-12-05 00:52:20,635][0m Trial 3 finished with value: 0.9276352542147441 and parameters: {'solver': 'liblinear', 'C': 0.020386283935396727, 'max_iter': 4305}. Best is trial 2 with value: 0.92820086

params: {'solver': 'saga', 'C': 0.0931240487312583, 'max_iter': 83112}
Accuracy: 92.35640 %
[[ 874   97]
 [  72 1168]]
