# Evaluation

<span  style="font-size: 20px; line-height: 30px;">
Calculate metrics:
    
<ol>
    <li> F1 </li>
    <li> Mean IoU </li>
    <li> Recall </li>
    <li> Precision </li>
    <li> Accuracy </li>
</ol>

</span>

## Import

In [12]:
import os
import numpy as np
import cv2
from glob import glob
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, jaccard_score, precision_score, recall_score

## Load predict mask and ground truth Mask

In [11]:
pred_mask = sorted(glob(os.path.join("prediction", "*")))
true_mask = sorted(glob(os.path.join("dataset", "test", "masks", "*")))

## Calculate metrics

In [16]:
score = []

for pred_y, true_y in tqdm(zip(pred_mask, true_mask), total=len(pred_mask)):
    name = pred_y.split("/")[-1]
    
    pred_y = cv2.imread(pred_y, cv2.IMREAD_GRAYSCALE)
    pred_y = pred_y/255.0
    pred_y = pred_y > 0.5
    pred_y = pred_y.astype(np.int32)
    pred_y = pred_y.flatten()
    
    true_y = cv2.imread(true_y, cv2.IMREAD_GRAYSCALE)
    true_y = true_y/255.0
    true_y = true_y > 0.5
    true_y = true_y.astype(np.int32)
    true_y = true_y.flatten()
    
    acc_value = accuracy_score(pred_y, true_y)
    f1_value = f1_score(pred_y, true_y, labels=[0, 1], average="binary")
    jac_value = jaccard_score(pred_y, true_y, labels=[0, 1], average="binary")
    recall_value = recall_score(pred_y, true_y, labels=[0, 1], average="binary")
    precision_value = precision_score(pred_y, true_y, labels=[0, 1], average="binary")
    score.append([name, acc_value, f1_value, jac_value, recall_value, precision_value])

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|████████████████████████████████████████████████████████████████████████████████| 306/306 [01:16<00:00,  4.02it/s]


In [17]:
score = [s[1:]for s in score]
score = np.mean(score, axis=0)
print(f"Accuracy: {score[0]:0.5f}")
print(f"F1: {score[1]:0.5f}")
print(f"Jaccard: {score[2]:0.5f}")
print(f"Recall: {score[3]:0.5f}")
print(f"Precision: {score[4]:0.5f}")

Accuracy: 0.99427
F1: 0.79447
Jaccard: 0.70679
Recall: 0.85322
Precision: 0.78579
