<a href="https://colab.research.google.com/github/dekoska/neural-network-course/blob/main/01_basics/04_classification_metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### klasyfikacja binarna

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
y_true = np.array([1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1])
y_pred = np.array([0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1])

### Accuracy

poprawne predykcje/wszystkie predykcje * 100%


In [3]:
def accuracy(y_true, y_pred):
  correct = 0
  for idx, _ in enumerate(y_true):
    if y_true[idx] == y_pred[idx]:
      correct += 1
  return correct / len(y_true) * 100

In [4]:
accuracy(y_true, y_pred)

71.42857142857143

In [5]:
results = pd.DataFrame({'y_true' : y_true, 'y_pred' : y_pred})
results = results.sort_values(by=['y_true'])
results['sample'] = range(1, len(y_true) + 1)
results

Unnamed: 0,y_true,y_pred,sample
1,0,0,1
4,0,0,2
7,0,0,3
9,0,1,4
12,0,0,5
2,1,1,6
3,1,1,7
0,1,0,8
6,1,0,9
5,1,1,10


In [6]:
fig = make_subplots(rows=2, cols=1)
fig.add_trace(go.Scatter(x=results['sample'], y=results['y_true'], mode='markers', name='y_true'), row=1, col=1)
fig.add_trace(go.Scatter(x=results['sample'], y=results['y_pred'], mode='markers', name='y_pred'), row=2, col=1)
fig.update_layout(width=1000, height=800, title='Binary classifier predictions')
fig.show()

### Macierz konfuzji

In [7]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_true, y_pred)
cm

array([[4, 1],
       [3, 6]])

In [10]:
import plotly.figure_factory as ff

def plot_confufion_matric(cm):
  cm = cm[::-1]
  cm = pd.DataFrame(cm, columns=['pred_0', 'pred_1'], index=['true_1', 'true_0'])

  fig = ff.create_annotated_heatmap(z=cm.values, x=list(cm.columns), y=list(cm.index), colorscale='ice', showscale=True, reversescale=True)
  fig.update_layout(width=500, height=500, title='Confusion matrix', font_size=16)
  fig.show()

plot_confufion_matric(cm)

In [11]:
cm = confusion_matrix(y_true, y_pred)
cm_df = pd.DataFrame(cm, columns=['pred_0', 'pred_1'], index=['true_0', 'true_1'])
cm_df

Unnamed: 0,pred_0,pred_1
true_0,4,1
true_1,3,6


In [12]:
tn, fp, fn, tp = cm.ravel()
print(f'TN - True Negative: {tn}')
print(f'FP - False Positive: {fp}')
print(f'FN - False Negative: {fn}')
print(f'TP - True Positive: {tp}')

TN - True Negative: 4
FP - False Positive: 1
FN - False Negative: 3
TP - True Positive: 6


In [13]:
#type I error
fpr = fp / (fp+tn)
print(f'False Positive Rate: {fpr}')

False Positive Rate: 0.2


In [15]:
#type II error
fnr = fn / (fn+tp)
print(f'False Negative Rate: {fnr}')

False Negative Rate: 0.3333333333333333


In [18]:
#precision ile pozytywnych obserwacji jest przewidzianych jako pozytywne
precision = tp / (tp+fp)
print(f'Precision: {precision}')

Precision: 0.8571428571428571


In [17]:
#type IV error
tnr = tn / (tn+fp)
print(f'True Negative Rate: {tnr}')

True Negative Rate: 0.8


### Krzywa ROC

In [19]:
from sklearn.metrics import roc_curve

fpr, tpr, thresh = roc_curve(y_true, y_pred, pos_label=1)

In [20]:
roc = pd.DataFrame({'fpr' : fpr, 'tpr' : tpr})
roc

Unnamed: 0,fpr,tpr
0,0.0,0.0
1,0.2,0.666667
2,1.0,1.0


In [21]:
def plot_roc_curve(y_true, y_pred):
    # binary classification
    fpr, tpr, tresh = roc_curve(y_true, y_pred, pos_label=1)

    fig = go.Figure(data=[go.Scatter(x=roc['fpr'], y=roc['tpr'], line_color='red', name='ROC Curve'),
                        go.Scatter(x=[0, 1], y=[0, 1], mode='lines', line_dash='dash', line_color='navy')],
                    layout=go.Layout(xaxis_title='False Positive Rate',
                                    yaxis_title='True Positive Rate',
                                    title='ROC Curve',
                                    showlegend=False,
                                    width=800))
    fig.show()
plot_roc_curve(y_true, y_pred)

In [22]:
#AUC score
from sklearn.metrics import roc_auc_score

roc_auc = roc_auc_score(y_true, y_pred)
roc_auc

np.float64(0.7333333333333333)

### Metryki wieloklasowe!!

In [23]:
y_true = np.array([1, 0, 1, 2, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 2, 1, 1, 2, 2])
y_pred = np.array([0, 0, 1, 2, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 1, 2, 1, 2])

from sklearn.metrics import accuracy_score

accuracy_score(y_true, y_pred)

0.6842105263157895

In [24]:
cm = confusion_matrix(y_true, y_pred)
cm

array([[4, 1, 0],
       [3, 6, 1],
       [0, 1, 3]])

In [25]:
def plot_confusion_matrix(cm):
    cm = cm[::-1]
    cm = pd.DataFrame(cm, columns=['pred_0', 'pred_1', 'pred_2'], index=['true_2','true_1', 'true_0'])

    fig = ff.create_annotated_heatmap(z=cm.values, x=list(cm.columns), y=list(cm.index), colorscale='ice', showscale=True, reversescale=True)
    fig.update_layout(width=500, height=500, title='Confusion Matrix', font_size=16)
    fig.show()

plot_confusion_matrix(cm)

In [26]:
from sklearn.metrics import classification_report

print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.57      0.80      0.67         5
           1       0.75      0.60      0.67        10
           2       0.75      0.75      0.75         4

    accuracy                           0.68        19
   macro avg       0.69      0.72      0.69        19
weighted avg       0.70      0.68      0.68        19

