In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
np.set_printoptions(precision=2)

## Using built-in scoring functions

In [3]:
from sklearn.datasets import make_classification
from sklearn.cross_validation import train_test_split

X, y = make_classification(random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [4]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(X_train, y_train)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [7]:
lr.score(X_test, y_test)

0.88

In [8]:
pred = lr.predict(X_test)

In [9]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, pred)

array([[12,  1],
       [ 2, 10]])

In [11]:
from sklearn.metrics import classification_report
print(classification_report(y_test, pred))

             precision    recall  f1-score   support

          0       0.86      0.92      0.89        13
          1       0.91      0.83      0.87        12

avg / total       0.88      0.88      0.88        25



In [13]:
from sklearn.metrics import precision_score, f1_score
print("Precision score: %f f1_score: %f" % (precision_score(y_test, pred),\
                                           f1_score(y_test, pred)))

Precision score: 0.909091 f1_score: 0.869565


In [14]:
from sklearn.metrics import roc_auc_score, average_precision_score, log_loss

probs = lr.predict_proba(X_test)[:, 1]

print("area under the roc curve: %f" % roc_auc_score(y_test, probs))
print("average precision: %f" % average_precision_score(y_test, probs))
print("log loss: %f" % log_loss(y_test, probs))

area under the roc curve: 0.935897
average precision: 0.950575
log loss: 0.308483


## Scorers for cross-validation and grid-search

In [15]:
from sklearn.metrics.scorer import SCORERS
print(SCORERS.keys())

dict_keys(['r2', 'accuracy', 'mean_squared_error', 'precision', 'f1_macro', 'f1_samples', 'f1_micro', 'recall_samples', 'precision_weighted', 'f1', 'adjusted_rand_score', 'f1_weighted', 'roc_auc', 'precision_macro', 'average_precision', 'recall_micro', 'mean_absolute_error', 'recall', 'precision_micro', 'recall_weighted', 'precision_samples', 'median_absolute_error', 'recall_macro', 'log_loss'])


In [16]:
from sklearn.cross_validation import cross_val_score
cross_val_score(LogisticRegression(), X, y)

array([ 0.82,  0.79,  0.81])

In [18]:
print("Accuracy scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="accuracy"))
print("F! scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="f1"))
print("AUC scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="roc_auc"))
print("Log loss scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="log_loss"))

Accuracy scoring: [ 0.82  0.79  0.81]
F! scoring: [ 0.83  0.79  0.79]
AUC scoring: [ 0.94  0.88  0.88]
Log loss scoring: [-0.3  -0.58 -0.56]


In [21]:
from sklearn.grid_search import GridSearchCV

param_grid = {'C': np.logspace(start=-3, stop=3, num=10)}
grid_search = GridSearchCV(LogisticRegression(), param_grid, scoring="log_loss")
grid_search.fit(X,y)

GridSearchCV(cv=None, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'C': array([  1.00000e-03,   4.64159e-03,   2.15443e-02,   1.00000e-01,
         4.64159e-01,   2.15443e+00,   1.00000e+01,   4.64159e+01,
         2.15443e+02,   1.00000e+03])},
       pre_dispatch='2*n_jobs', refit=True, scoring='log_loss', verbose=0)

In [22]:
grid_search.grid_scores_

[mean: -0.66258, std: 0.00106, params: {'C': 0.001},
 mean: -0.58738, std: 0.00455, params: {'C': 0.0046415888336127772},
 mean: -0.46489, std: 0.01213, params: {'C': 0.021544346900318832},
 mean: -0.38646, std: 0.03217, params: {'C': 0.10000000000000001},
 mean: -0.41391, std: 0.08809, params: {'C': 0.46415888336127775},
 mean: -0.58592, std: 0.16415, params: {'C': 2.154434690031882},
 mean: -0.94970, std: 0.25540, params: {'C': 10.0},
 mean: -1.59448, std: 0.45646, params: {'C': 46.415888336127729},
 mean: -2.39817, std: 0.74119, params: {'C': 215.44346900318823},
 mean: -3.16412, std: 0.91376, params: {'C': 1000.0}]

In [23]:
grid_search.best_params_

{'C': 0.10000000000000001}

## Defining your own scoring callable

### From Scratch

In [25]:
def my_accuracy_score(est, X, y):
    return np.mean(est.predict(X) == y)

print(cross_val_score(LogisticRegression(), X, y))
print(cross_val_score(LogisticRegression(), X, y, scoring = my_accuracy_score))

[ 0.82  0.79  0.81]
[ 0.82  0.79  0.81]


### From a score function

In [27]:
from sklearn.metrics import fbeta_score
fbeta_score(y_test, pred, beta=10)

0.83402146985962022

In [28]:
from sklearn.metrics.scorer import make_scorer
my_fbeta_scorer = make_scorer(fbeta_score, beta=10)

In [31]:
print(cross_val_score(LogisticRegression(), X, y, scoring = my_fbeta_scorer))

[ 0.88  0.77  0.69]
