# Import bibliotek.

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.figure_factory as ff

# Metryki.

## Klasyfikacja binarna.

### Dokładność.

Wzór:

$$Accuracy = \frac{correct\ predictions}{total\ predictions} $$

In [2]:
y_true = np.array([1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1])
y_pred = np.array([0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1])

In [3]:
def accuracy(y_true, y_pred):
    correct = 0
    for idx, _ in enumerate(y_true):
        if y_true[idx] == y_pred[idx]:
            correct += 1
    return correct / len(y_true)

In [4]:
accuracy(y_true, y_pred)

0.6428571428571429

In [5]:
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results = results.sort_values(by='y_true')
results['sample'] = range(1, len(y_true) + 1)
results

Unnamed: 0,y_true,y_pred,sample
1,0,0,1
3,0,1,2
4,0,0,3
6,0,0,4
7,0,0,5
9,0,1,6
12,0,0,7
0,1,0,8
2,1,1,9
5,1,0,10


In [6]:
fig = make_subplots(rows=2, cols=1)
fig.add_trace(go.Scatter(x=results['sample'], y=results['y_true'], mode='markers', name='y_true'), row=1, col=1)
fig.add_trace(go.Scatter(x=results['sample'], y=results['y_pred'], mode='markers', name='y_pred'), row=2, col=1)
fig.update_layout(width=1000, height=800, title='Binary classifier predictions')
fig.show()

### Macierz pomyłek.

In [7]:
def confusion_matrix(y_true, y_pred):
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    for idx, _ in enumerate(y_true):
        if y_true[idx] == 1 and y_pred[idx] == 1:
            tp += 1
        elif y_true[idx] == 1 and y_pred[idx] == 0:
            fn += 1
        elif y_true[idx] == 0 and y_pred[idx] == 1:
            fp += 1
        elif y_true[idx] == 0 and y_pred[idx] == 0:
            tn += 1
    return np.array([[tp, fn], [fp, tn]])

In [8]:
cm = confusion_matrix(y_true, y_pred)
cm

array([[4, 3],
       [2, 5]])

In [9]:
import plotly.figure_factory as ff

def plot_confusion_matrix(cm):
    cm = cm[::-1]
    cm = pd.DataFrame(cm, columns=['pred_0', 'pred_1'], index=['true_1', 'true_0'])

    fig = ff.create_annotated_heatmap(z=cm.values, x=list(cm.columns), y=list(cm.index), colorscale='ice', showscale=True, reversescale=True)
    fig.update_layout(width=500, height=500, title='Confusion Matrix', font_size=16)
    fig.show()

plot_confusion_matrix(cm)

In [10]:
cm = confusion_matrix(y_true, y_pred)
cm_df = pd.DataFrame(cm, columns=['pred_0', 'pred_1'], index=['true_0', 'true_1'])
cm_df

Unnamed: 0,pred_0,pred_1
true_0,4,3
true_1,2,5


In [11]:
tn, fp, fn, tp = cm.ravel()
print(f'TN - True Negative: {tn}')
print(f'FP - False Positive: {fp}')
print(f'FN - False Negative: {fn}')
print(f'TP - True Positive: {tp}')

TN - True Negative: 4
FP - False Positive: 3
FN - False Negative: 2
TP - True Positive: 5


In [12]:
fpr = fp / (fp + tn)
fnr = fn / (fn + tp)
print(f"False positive rate: {fpr}")
print(f"False negative rate: {fnr}")

False positive rate: 0.42857142857142855
False negative rate: 0.2857142857142857


In [13]:
precision = tp / (tp + fp)
recall = tp / (tp + fn)
print(f"Precision: {precision}")
print(f"Recall: {recall}")

Precision: 0.625
Recall: 0.7142857142857143


### Krzywa ROC.

In [14]:
from sklearn.metrics import roc_curve

fpr, tpr, tresh = roc_curve(y_true, y_pred, pos_label=1)
roc = pd.DataFrame({'fpr': fpr, 'tpr': tpr})
roc

Unnamed: 0,fpr,tpr
0,0.0,0.0
1,0.285714,0.571429
2,1.0,1.0


In [15]:
def plot_roc_curve(y_true, y_pred):
    fig = go.Figure(data=[go.Scatter(x=roc['fpr'], y=roc['tpr'], line_color='red', name='ROC Curve'),
                        go.Scatter(x=[0, 1], y=[0, 1], mode='lines', line_dash='dash', line_color='navy')],
                    layout=go.Layout(xaxis_title='False Positive Rate',
                                    yaxis_title='True Positive Rate',
                                    title='ROC Curve',
                                    showlegend=False,
                                    width=800))
    fig.show()
plot_roc_curve(y_true, y_pred)

In [16]:
from sklearn.metrics import roc_auc_score

roc_auc = roc_auc_score(y_true, y_pred)
roc_auc

0.6428571428571428

In [17]:
gini = roc_auc * 2 - 1
print(gini)

0.2857142857142856


## Klasyfikacja wieloklasowa.

### Dokładność.

In [18]:
y_true = np.array([1, 0, 1, 2, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 2, 1, 1, 2, 2])
y_pred = np.array([0, 0, 1, 2, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 1, 2, 1, 2])

from sklearn.metrics import accuracy_score

accuracy_score(y_true, y_pred)

0.6842105263157895

### Macierz pomyłek.

In [19]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_true, y_pred)
cm

array([[4, 1, 0],
       [3, 6, 1],
       [0, 1, 3]])

In [20]:
def plot_confusion_matrix(cm):
    cm = cm[::-1]
    cm = pd.DataFrame(cm, columns=['pred_0', 'pred_1', 'pred_2'], index=['true_2','true_1', 'true_0'])

    fig = ff.create_annotated_heatmap(z=cm.values, x=list(cm.columns), y=list(cm.index), colorscale='ice', showscale=True, reversescale=True)
    fig.update_layout(width=500, height=500, title='Confusion Matrix', font_size=16)
    fig.show()

plot_confusion_matrix(cm)

### Raport klasyfikacji.

In [21]:
from sklearn.metrics import classification_report

print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.57      0.80      0.67         5
           1       0.75      0.60      0.67        10
           2       0.75      0.75      0.75         4

    accuracy                           0.68        19
   macro avg       0.69      0.72      0.69        19
weighted avg       0.70      0.68      0.68        19



## Regresja.

In [22]:
np.random.seed(42)
y_true = 100 + 20 * np.random.randn(100)
y_true

array([109.93428306,  97.23471398, 112.95377076, 130.46059713,
        95.31693251,  95.31726086, 131.58425631, 115.34869458,
        90.61051228, 110.85120087,  90.73164614,  90.68540493,
       104.83924543,  61.73439511,  65.50164335,  88.75424942,
        79.74337759, 106.28494665,  81.83951849,  71.75392597,
       129.31297538,  95.48447399, 101.35056409,  71.50503628,
        89.11234551, 102.21845179,  76.98012845, 107.51396037,
        87.9872262 ,  94.166125  ,  87.96586776, 137.04556369,
        99.73005551,  78.84578142, 116.45089824,  75.583127  ,
       104.1772719 ,  60.80659752,  73.43627902, 103.93722472,
       114.7693316 , 103.42736562,  97.68703435,  93.97792609,
        70.42956019,  85.60311583,  90.78722458, 121.14244452,
       106.87236579,  64.73919689, 106.48167939,  92.29835439,
        86.46155999, 112.23352578, 120.61999045, 118.62560238,
        83.21564954,  93.81575248, 106.62526863, 119.51090254,
        90.41651524,  96.28682047,  77.87330052,  76.07

In [23]:
np.random.seed(24)
y_pred = 100 + 20 * np.random.randn(100)
y_pred

array([126.58424345,  84.59933096,  93.67439281,  80.18379227,
        78.58367489,  71.2257344 , 111.28833703, 105.91443775,
        67.47191533, 104.39130397, 113.57609598, 137.78545463,
       119.23076797, 102.08022391,  90.37669365, 117.00457062,
       129.06849333, 121.15474871, 103.31123214, 110.30036756,
        73.26128628, 111.25722273, 127.8570965 ,  98.73344033,
       102.43336723, 124.15205076,  99.9591957 , 132.55591489,
       107.08985571, 120.75055265,  92.28632974, 110.39636002,
       133.73165775,  73.48073708, 128.579674  ,  58.21291445,
        97.40360125, 112.63045899,  88.26923871, 105.81440162,
       125.28206731, 105.80069564,  60.59423005, 116.07811778,
       120.61100668, 102.36195872,  99.56293345, 100.93681428,
        67.42494282,  92.15278819, 134.01945411, 121.2265951 ,
       113.91607148,  91.2802286 ,  93.34116759, 112.04269127,
       102.17577918, 100.73533857,  89.22073187, 109.98355775,
        85.76096486,  95.25998061, 117.14238471,  62.35

In [24]:
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results.head()

Unnamed: 0,y_true,y_pred
0,109.934283,126.584243
1,97.234714,84.599331
2,112.953771,93.674393
3,130.460597,80.183792
4,95.316933,78.583675


In [25]:
results['error'] = results['y_true'] - results['y_pred']
results.head()

Unnamed: 0,y_true,y_pred,error
0,109.934283,126.584243,-16.64996
1,97.234714,84.599331,12.635383
2,112.953771,93.674393,19.279378
3,130.460597,80.183792,50.276805
4,95.316933,78.583675,16.733258


In [26]:
def plot_regression_results(y_true, y_pred):
    min = results[['y_true', 'y_pred']].min().min()
    max = results[['y_true', 'y_pred']].max().max()

    fig = go.Figure(data=[go.Scatter(x=results['y_true'], y=results['y_pred'], mode='markers'),
                    go.Scatter(x=[min, max], y=[min, max])],
                    layout=go.Layout(showlegend=False, width=800,
                                     xaxis_title='y_true',
                                     yaxis_title='y_pred',
                                     title='Regression results'))
    fig.show()
plot_regression_results(y_true, y_pred)

In [27]:
y_true = 100 + 20 * np.random.randn(1000)
y_pred = y_true + 10 * np.random.randn(1000)
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results['error'] = results['y_true'] - results['y_pred']

px.histogram(results, x='error', nbins=50, width=800)

### Średni błąd absolutny.

Wzór:

$$MAE = \frac{1}{n}\sum_{i=1}^{N}|y_{true} - y_{pred}|$$

In [28]:
def mae(y_true, y_pred):
    return abs(y_true - y_pred).sum() / len(y_true)

mae(y_true, y_pred)

7.631535630829797

In [29]:
from sklearn.metrics import mean_absolute_error

mean_absolute_error(y_true, y_pred)

7.631535630829797

In [30]:
from tensorflow.keras.losses import MeanAbsoluteError

mae = MeanAbsoluteError()
mae(y_true, y_pred).numpy()

7.631536

### Średni błąd kwadratowy.

Wzór:

$$MSE = \frac{1}{n}\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}$$

In [31]:
def mean_squared_error(y_true, y_pred):
    return ((y_true - y_pred) ** 2).sum() / len(y_true)

mean_squared_error(y_true, y_pred)

93.01557865376334

In [32]:
from tensorflow.keras.losses import MeanSquaredError

mse = MeanSquaredError()
mse(y_true, y_pred).numpy()

93.01558

In [33]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_true, y_pred)

93.01557865376334

### Root Mean Squared Error.

Wzór:

$$RMSE = \sqrt{MSE}$$

In [34]:
def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(((y_true - y_pred) ** 2).sum() / len(y_true))

root_mean_squared_error(y_true, y_pred)

9.64445844274127

In [35]:
np.sqrt(mean_squared_error(y_true, y_pred))

9.64445844274127

### Maksymalny błąd.

In [36]:
def max_error(y_true, y_pred):
    return abs(y_true - y_pred).max()

max_error(y_true, y_pred)

34.04861031063015

In [37]:
from sklearn.metrics import max_error

max_error(y_true, y_pred)

34.04861031063015

### Współczynnik determinacji.

Wzór:

$$R2\_score = 1 - \frac{\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}}{\sum_{i=1}^{N}(y_{true} - \overline{y_{true}})^{2}}$$

In [38]:
def r2_score(y_true, y_pred):
    numerator = ((y_true - y_pred) ** 2).sum()
    denominator = ((y_true - y_true.mean()) ** 2).sum()
    try:
        r2 = 1 - numerator / denominator
    except ZeroDivisionError:
        print("Dzielenie przez zero!")
    return r2

r2_score(y_true, y_pred)

0.7752507159514859

In [39]:
from sklearn.metrics import r2_score

r2_score(y_true, y_pred)

0.7752507159514859

In [40]:
from tensorflow.keras.metrics import R2Score

r2 = R2Score()
r2(y_true.reshape(-1, 1), y_pred.reshape(-1, 1)).numpy()

0.7752514