In [32]:
from pandas import read_csv
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

In [33]:
def load_dataset(filname):
    data = read_csv(filname, header=None)
    dataset = data.values
    X = dataset[:, :-1]
    y = dataset[:, -1]
    return X, y

In [34]:
X, y = load_dataset('pima-indians-diabetes.csv')

In [35]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

In [36]:
model = LogisticRegression(solver='liblinear')

In [37]:
fs = SelectKBest(score_func=f_classif)

In [38]:
pipeline = Pipeline(steps=[('anova', fs), ('lr', model)])

In [39]:
grid = dict()

In [40]:
grid['anova__k'] = [i+1 for i in range(X.shape[1])]

In [41]:
search = GridSearchCV(pipeline, grid, scoring='accuracy', n_jobs=-1, cv=cv)

In [42]:
results = search.fit(X, y)

In [27]:
print('Best Mean Accuracy: %.3f' % results.best_score_)
print('Best Config: %s' % results.best_params_)

Best Mean Accuracy: 0.770
Best Config: {'anova__k': 5}
