In [154]:
import sklearn.metrics
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

#### создание my scorer:

In [155]:
def my_recall(target, pred, alpha = 0.5):
    treshhold_probability = np.percentile(pred, alpha*100)
    pred[pred[:,1]>treshhold_probability,1] = 1
    pred[pred[:,1]<=treshhold_probability,1] = 0    
    return np.sum(pred[:,1]*target)/np.sum(target == 1)

#### обучение модели

In [156]:
cancer = load_breast_cancer()
pred = LogisticRegression().fit(cancer.data, cancer.target).predict(cancer.data)

#### валидация модели

In [157]:
cross_val_score(LogisticRegression(), cancer.data, cancer.target, cv=3, scoring=sklearn.metrics.make_scorer(my_recall, greater_is_better=True, needs_proba=True)).mean()

0.96638655462184886

In [161]:
#### использование GridSearchCV и RandomizedSearchCV для поиска наилучших параметров

In [158]:
params = {
    'penalty': ['l2', 'l1'],
    'C': [0.1, 0.5, 1, 5, 10],
    'fit_intercept': [True, False],
    'max_iter': [50, 100, 120, 150]
}
model_1 = GridSearchCV(LogisticRegression(), params, scoring=sklearn.metrics.make_scorer(my_recall, greater_is_better=True, needs_proba=True))
model_2 = RandomizedSearchCV(LogisticRegression(), params, scoring=sklearn.metrics.make_scorer(my_recall, greater_is_better=True, needs_proba=True))

#### кросс-валидация модели

In [159]:
print (cross_val_score(model_1, cancer.data, cancer.target, cv=3, scoring=sklearn.metrics.make_scorer(my_recall, greater_is_better=True, needs_proba=True)).mean())
print (cross_val_score(model_2, cancer.data, cancer.target, cv=3, scoring=sklearn.metrics.make_scorer(my_recall, greater_is_better=True, needs_proba=True)).mean())

0.966386554622
0.966386554622


#### кросс-валидацяи модели с метрикой accuracy

In [160]:
print (cross_val_score(model_1, cancer.data, cancer.target, cv=3, scoring='accuracy').mean())
print (cross_val_score(model_2, cancer.data, cancer.target, cv=3, scoring='accuracy').mean())

0.952529471828
0.952538754293
