### Import dependencies

In [3]:
import numpy as np 
import pandas as pd

from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

### Read dataset 

In [4]:
data = pd.read_csv('../data.csv')
data.drop('Index', axis=1, inplace=True)
data.head()

x = data.iloc[:, :-1]
y = data.iloc[:, -1]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=1)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, random_state=1) 

### Models

In [5]:
model = LogisticRegression()
solvers = ['newton-cg', 'lbfgs', 'liblinear']
penalty = ['l2']
c_values = [100, 10, 1.0, 0.1, 0.01]

### Grid Search

In [7]:
grid = dict(solver=solvers,penalty=penalty,C=c_values)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(x_train, y_train)

### Summary

In [8]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.540095 using {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'}
0.540095 (0.007360) with: {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'}
0.462675 (0.008304) with: {'C': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
0.539736 (0.006016) with: {'C': 100, 'penalty': 'l2', 'solver': 'liblinear'}
0.530905 (0.007273) with: {'C': 10, 'penalty': 'l2', 'solver': 'newton-cg'}
0.463024 (0.007117) with: {'C': 10, 'penalty': 'l2', 'solver': 'lbfgs'}
0.529069 (0.006869) with: {'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}
0.508545 (0.007668) with: {'C': 1.0, 'penalty': 'l2', 'solver': 'newton-cg'}
0.462685 (0.008140) with: {'C': 1.0, 'penalty': 'l2', 'solver': 'lbfgs'}
0.503457 (0.007364) with: {'C': 1.0, 'penalty': 'l2', 'solver': 'liblinear'}
0.465157 (0.006482) with: {'C': 0.1, 'penalty': 'l2', 'solver': 'newton-cg'}
0.456736 (0.005872) with: {'C': 0.1, 'penalty': 'l2', 'solver': 'lbfgs'}
0.457782 (0.005412) with: {'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}
0.373213 (0.005584) wit

In [11]:
import pickle
with open('./models/logistic.pickle', 'wb') as f:
    pickle.dump(grid_result, f)