In [1]:
# improve speed of SVC in sklearn
from sklearnex import patch_sklearn
patch_sklearn("SVC")


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
import sys
sys.path.insert(0, '..')

In [3]:
from machine_learning.ml_detector import DetectorML
from metrics.metrics_utils import get_tp_fp_fn_center_patch_criteria

In [4]:
import pandas as pd
import numpy as np

from database.dataset import *

# set the seed for reproducible execution
rseed = np.random.seed(42)

In [5]:
det_ml = DetectorML('/home/vzalevskyi/projects/data/cascade/cascade_models.pkl')

Loading model...
Selected sensitivity threshold to keep 0.98
Confidence threshold to keep given sensitivity is 0.00380
Filtering out all candidates with confidence <=0.00380 is estimated to reduce FP by 49.97 %


# Validation

In [6]:
db = INBreast_Dataset(
    return_lesions_mask=True,
    level='image',
    max_lesion_diam_mm=None,
    extract_patches=False,
    partitions=['validation'],
    lesion_types=['calcification', 'cluster'],
    cropped_imgs=True,
    keep_just_images_of_lesion_type=False,
    use_muscle_mask=False,
    ignore_diameter_px=15
)


Circular metric

In [7]:
all_features = []
all_tps = []
all_fps = []
all_fns = []
all_ignored_candidates = []
save_path = Path.cwd().parent.parent/'data/test_features/val/circular'
for idx in tqdm(range(len(db))):
    # extracting data
    db_sample = db[idx]
    image = db_sample['img']
    image_id = db.df.iloc[idx].img_id
    image_mask = db_sample['lesion_mask']

    cand_features_probas = det_ml.detect(image)
    cand_features_probas['img_id'] = image_id
    
    tp, fp, fn, ignored_candidates = get_tp_fp_fn_center_patch_criteria(
        np.stack(cand_features_probas.candidate_coordinates.values).astype(int), image_mask, None, 14, True)
    tp['img_id'] = image_id
    fp['img_id'] = image_id
    fn['img_id'] = image_id
    ignored_candidates['img_id'] = image_id
    
    all_features.append(cand_features_probas)
    all_tps.append(tp)
    all_fps.append(fp)
    all_fns.append(fn)
    all_ignored_candidates.append(ignored_candidates)
    

all_features = pd.DataFrame(all_features)
all_tps = pd.DataFrame(all_tps)
all_fps = pd.DataFrame(all_fps)
all_fns = pd.DataFrame(all_fns)
all_ignored_candidates = pd.DataFrame(all_ignored_candidates)


  0%|          | 0/62 [00:00<?, ?it/s]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  2%|▏         | 1/62 [00:40<41:22, 40.69s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  3%|▎         | 2/62 [01:15<37:25, 37.42s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  5%|▍         | 3/62 [02:01<40:28, 41.16s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  6%|▋         | 4/62 [02:57<45:22, 46.94s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version 

In [8]:
all_features = pd.concat(all_features[0].values)
all_tps = pd.concat(all_tps[0].values)
all_fps = pd.concat(all_fps[0].values)
all_fns = pd.concat(all_fns[0].values)
all_ignored_candidates = pd.concat(all_ignored_candidates[0].values)


In [9]:

all_features.reset_index().to_feather(str(save_path) + 'features.f')
all_tps.reset_index().to_feather(str(save_path) + 'tps.f')
all_fps.reset_index().to_feather(str(save_path) + 'fps.f')
all_fns.reset_index().to_feather(str(save_path) + 'fns.f')
all_ignored_candidates.reset_index().to_feather(str(save_path) + 'ignored_candidates.f')


Square

In [7]:
all_features = []
all_tps = []
all_fps = []
all_fns = []
all_ignored_candidates = []
save_path = Path.cwd().parent.parent/'data/test_features/val/square'
for idx in tqdm(range(len(db))):
    # extracting data
    db_sample = db[idx]
    image = db_sample['img']
    image_id = db.df.iloc[idx].img_id
    image_mask = db_sample['lesion_mask']

    cand_features_probas = det_ml.detect(image)
    cand_features_probas['img_id'] = image_id
    
    tp, fp, fn, ignored_candidates = get_tp_fp_fn_center_patch_criteria(
        np.stack(cand_features_probas.candidate_coordinates.values).astype(int), image_mask, None, 14, False)
    tp['img_id'] = image_id
    fp['img_id'] = image_id
    fn['img_id'] = image_id
    ignored_candidates['img_id'] = image_id
    
    all_features.append(cand_features_probas)
    all_tps.append(tp)
    all_fps.append(fp)
    all_fns.append(fn)
    all_ignored_candidates.append(ignored_candidates)
    

all_features = pd.concat(all_features).reset_index()
all_tps = pd.concat(all_tps).reset_index()
all_fps = pd.concat(all_fps).reset_index()
all_fns = pd.concat(all_fns).reset_index()
all_ignored_candidates = pd.concat(all_ignored_candidates).reset_index()

all_features.to_feather(str(save_path) + 'features.f')
all_tps.to_feather(str(save_path) + 'tps.f')
all_fps.to_feather(str(save_path) + 'fps.f')
all_fns.to_feather(str(save_path) + 'fns.f')
all_ignored_candidates.to_feather(str(save_path) + 'ignored_candidates.f')


  0%|          | 0/62 [00:00<?, ?it/s]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  2%|▏         | 1/62 [00:42<43:24, 42.69s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  3%|▎         | 2/62 [01:20<39:46, 39.78s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  5%|▍         | 3/62 [02:03<40:29, 41.17s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  6%|▋         | 4/62 [02:56<44:31, 46.06s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version 

# Test

In [8]:
db = INBreast_Dataset(
    return_lesions_mask=True,
    level='image',
    max_lesion_diam_mm=None,
    extract_patches=False,
    partitions=['test'],
    lesion_types=['calcification', 'cluster'],
    cropped_imgs=True,
    keep_just_images_of_lesion_type=False,
    use_muscle_mask=False,
    ignore_diameter_px=15
)


Circular metric

In [9]:
all_features = []
all_tps = []
all_fps = []
all_fns = []
all_ignored_candidates = []
save_path = Path.cwd().parent.parent/'data/test_features/test/circular'
for idx in tqdm(range(len(db))):
    # extracting data
    db_sample = db[idx]
    image = db_sample['img']
    image_id = db.df.iloc[idx].img_id
    image_mask = db_sample['lesion_mask']

    cand_features_probas = det_ml.detect(image)
    cand_features_probas['img_id'] = image_id
    
    tp, fp, fn, ignored_candidates = get_tp_fp_fn_center_patch_criteria(
        np.stack(cand_features_probas.candidate_coordinates.values).astype(int), image_mask, None, 14, True)
    tp['img_id'] = image_id
    fp['img_id'] = image_id
    fn['img_id'] = image_id
    ignored_candidates['img_id'] = image_id
    
    all_features.append(cand_features_probas)
    all_tps.append(tp)
    all_fps.append(fp)
    all_fns.append(fn)
    all_ignored_candidates.append(ignored_candidates)
    

all_features = pd.concat(all_features).reset_index()
all_tps = pd.concat(all_tps).reset_index()
all_fps = pd.concat(all_fps).reset_index()
all_fns = pd.concat(all_fns).reset_index()
all_ignored_candidates = pd.concat(all_ignored_candidates).reset_index()

all_features.to_feather(str(save_path) + 'features.f')
all_tps.to_feather(str(save_path) + 'tps.f')
all_fps.to_feather(str(save_path) + 'fps.f')
all_fns.to_feather(str(save_path) + 'fns.f')
all_ignored_candidates.to_feather(str(save_path) + 'ignored_candidates.f')


  0%|          | 0/200 [00:00<?, ?it/s]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  0%|          | 1/200 [01:11<3:57:20, 71.56s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  1%|          | 2/200 [01:51<2:54:30, 52.88s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  2%|▏         | 3/200 [02:55<3:10:20, 57.97s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running accelerated version on CPU
  2%|▏         | 4/200 [03:42<2:55:29, 53.72s/it]INFO:root:sklearn.svm.SVC._predict_proba: running accelerated version on CPU
INFO:root:sklearn.svm.SVC.decision_function: running acceler