# 4.4.1. Foreground-Background imbalance in detection metrics

Simulation of four different imbalance scenarios based on the MITOS12 class distribution, for a classifier with fixed performances:
* Candidate selection step: 99.99% specificity, 100% sensitivity
* Candidate classifier: 99% specificity, 75% sensitivity

In [1]:
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
from metrics.detection import precision, recall, f1
from datasets.mitos12 import get_file_names, get_mitosis_per_file, get_total_candidate_area, PX_SIZE

**Compute characteristics of the dataset**

In [3]:
print("TRAINING SET:")
files = get_file_names()
mitosis_per_file = get_mitosis_per_file(files)

n_mitosis = sum([len(mitosis) for mitosis in mitosis_per_file.values()])
print(f"Number of mitosis: {n_mitosis}")

mitosis_area = 0
for mitosis in mitosis_per_file.values():
    for m in mitosis:
        mitosis_area += m.area
print(f"Mitosis area: {mitosis_area} (px), {mitosis_area*PX_SIZE*PX_SIZE:.2f} (µm²)")

total_area = get_total_candidate_area(files)
print(f"Total area: {total_area} (px), {total_area*PX_SIZE*PX_SIZE:.2f} (µm²)")

print(f"Mitosis ratio: {100*mitosis_area/total_area:.4f}%")

print(f"Avg mitosis area: {mitosis_area/n_mitosis:.2f} (px)")

print("TEST SET:")
files = get_file_names(train=False)
mitosis_per_file = get_mitosis_per_file(files, train=False)

n_mitosis = sum([len(mitosis) for mitosis in mitosis_per_file.values()])
print(f"Number of mitosis: {n_mitosis}")

mitosis_area = 0
for mitosis in mitosis_per_file.values():
    for m in mitosis:
        mitosis_area += m.area
print(f"Mitosis area: {mitosis_area} (px²), {mitosis_area*PX_SIZE*PX_SIZE:.2f} (µm²)")

total_area = get_total_candidate_area(files)
print(f"Total area: {total_area} (px²), {total_area*PX_SIZE*PX_SIZE:.2f} (µm²)")

print(f"Mitosis ratio: {100*mitosis_area/total_area:.4f}%")

print(f"Avg mitosis area: {mitosis_area/n_mitosis:.2f} (px)")

TRAINING SET:
Number of mitosis: 226
Mitosis area: 135366 (px), 8165.19 (µm²)
Total area: 146800640 (px), 8854920.65 (µm²)
Mitosis ratio: 0.0922%
Avg mitosis area: 598.96 (px)
TEST SET:
Number of mitosis: 101
Mitosis area: 57193 (px²), 3449.85 (µm²)
Total area: 62914560 (px²), 3794965.99 (µm²)
Mitosis ratio: 0.0909%
Avg mitosis area: 566.27 (px)


**Scenarios**

In [4]:
# Detector performances simulation:
from datasets.mitos12 import detector_simulator
from metrics.detection import precision, recall, f1

SEN = 0.75
SPE = 0.99
selector_SPE = 0.9999

SCENARIOS = {
    "MITOS12 distribution":
        {'n_mitosis': n_mitosis,
         'n_nonmitosis': total_area-mitosis_area},
    "Smaller region":
        {'n_mitosis': 0.75*n_mitosis,
         'n_nonmitosis': 0.5*total_area - 0.75*mitosis_area},
    "Larger region":
        {'n_mitosis': 1.5*n_mitosis,
         'n_nonmitosis': 2*total_area-1.5*mitosis_area},
    "MITOS-ATYPIA-14 distribution":
        {'n_mitosis': 749,
         'n_nonmitosis': 2541196800-441910}
}

for scenario, values in SCENARIOS.items():
    print(scenario)
    tp, fp, fn, tn = detector_simulator(values["n_mitosis"], 
                                        values["n_nonmitosis"], 
                                        sensitivity=SEN,
                                        specificity=SPE,
                                        pre_detector_specificity=selector_SPE)
    print(f"Precision: {precision(tp, fp, fn):.2f}")
    print(f"Recall: {recall(tp, fp, fn):.2f}")
    print(f"F1: {f1(tp, fp, fn):.2f}")
    print("===============")

MITOS12 distribution
Precision: 0.55
Recall: 0.75
F1: 0.63
Smaller region
Precision: 0.64
Recall: 0.75
F1: 0.69
Larger region
Precision: 0.47
Recall: 0.75
F1: 0.58
MITOS-ATYPIA-14 distribution
Precision: 0.18
Recall: 0.75
F1: 0.29
