In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target

In [2]:
from sklearn.model_selection import ShuffleSplit
ss = ShuffleSplit(n_splits=1,
                 train_size=0.8,
                 test_size=0.2,
                 random_state=0)
train_index, test_index = next(ss.split(X, y))

X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]

In [3]:
from sklearn import linear_model
clf = linear_model.LogisticRegression()

In [4]:
clf.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [5]:
clf.score(X_test, y_test)

0.956140350877193

In [6]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [7]:

y_pred = clf.predict(X_test)

In [8]:
accuracy_score(y_test, y_pred)

0.956140350877193

In [9]:
cm = confusion_matrix(y_test, y_pred)

In [10]:
cm.sum(), cm.diagonal().sum(), cm.diagonal().sum() / cm.sum()

(114, 109, 0.956140350877193)

In [11]:
TP = cm[0,0]
TN = cm[1,1]
FP = cm[1,0]
FN = cm[0,1]
TP, TN, FP, FN

(46, 63, 4, 1)

In [19]:
cm

array([[46,  1],
       [ 4, 63]])

In [12]:
from sklearn.metrics import classification_report

In [13]:
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

           0     0.9200    0.9787    0.9485        47
           1     0.9844    0.9403    0.9618        67

   micro avg     0.9561    0.9561    0.9561       114
   macro avg     0.9522    0.9595    0.9551       114
weighted avg     0.9578    0.9561    0.9563       114



In [15]:
recall_0 = TP / (TP+FN)
recall_0

0.9787234042553191

In [16]:
precision_0 = TP / (TP + FP)
precision_0

0.92

In [18]:
recall_1 = TN /(FP+TN)
recall_1

0.9402985074626866

In [20]:
FP / (FP+TN)

0.05970149253731343

In [22]:
precision_1 = TN /(TN+FN)
precision_1

0.984375

## f1-score

In [23]:
f1_0 = 2*recall_0*precision_0 / (recall_0+precision_0)
f1_0

0.9484536082474226

In [24]:
f1_1 = 2*recall_1*precision_1 / (recall_1+precision_1)
f1_1

0.9618320610687023

In [25]:
from sklearn.metrics import f1_score


In [26]:
f1_score(y_test,y_pred, pos_label=0), f1_score(y_test, y_pred, pos_label=1)

(0.9484536082474226, 0.9618320610687023)

In [27]:
from sklearn.metrics import fbeta_score

In [28]:
fbeta_score(y_test, y_pred,beta=1, pos_label=0), fbeta_score(y_test, y_pred, beta=1, pos_label=1)

(0.9484536082474226, 0.9618320610687023)

In [29]:
from sklearn.metrics import precision_recall_fscore_support

In [30]:
precision_recall_fscore_support(y_test, y_pred,beta=1)

(array([0.92    , 0.984375]),
 array([0.9787234 , 0.94029851]),
 array([0.94845361, 0.96183206]),
 array([47, 67]))

## 10class

In [31]:
from sklearn.datasets import load_digits
data = load_digits()

X = data.data
y = data.target

In [32]:
from sklearn.model_selection import ShuffleSplit
ss = ShuffleSplit(n_splits=1,
                 train_size=0.8,
                 test_size=0.2,
                 random_state=0)
train_index, test_index = next(ss.split(X, y))

X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]

In [33]:
clf.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [34]:
y_pred = clf.predict(X_test)

In [35]:
accuracy_score(y_test, y_pred)

0.95

In [36]:
confusion_matrix(y_test, y_pred)

array([[27,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0, 31,  0,  0,  0,  0,  1,  0,  3,  0],
       [ 0,  0, 34,  2,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 29,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 30,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0, 39,  0,  0,  0,  1],
       [ 0,  1,  0,  0,  0,  0, 43,  0,  0,  0],
       [ 0,  1,  0,  0,  1,  0,  0, 37,  0,  0],
       [ 0,  2,  1,  0,  0,  0,  0,  0, 35,  1],
       [ 0,  0,  0,  1,  0,  1,  0,  0,  2, 37]])

In [37]:
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        27
           1     0.8857    0.8857    0.8857        35
           2     0.9714    0.9444    0.9577        36
           3     0.9062    1.0000    0.9508        29
           4     0.9677    1.0000    0.9836        30
           5     0.9750    0.9750    0.9750        40
           6     0.9773    0.9773    0.9773        44
           7     1.0000    0.9487    0.9737        39
           8     0.8750    0.8974    0.8861        39
           9     0.9487    0.9024    0.9250        41

   micro avg     0.9500    0.9500    0.9500       360
   macro avg     0.9507    0.9531    0.9515       360
weighted avg     0.9509    0.9500    0.9500       360

