## Feature Extaction Test

In [1]:
import sys
sys.path.insert(0, '..')
from metrics.metrics_utils import get_tp_fp_fn_center_patch_criteria
from general_utils.plots import plot_bboxes_over_image, simple_im_show, simple_im_show2
from mc_candidate_proposal.hough_mc import HoughCalcificationDetection
from mc_candidate_proposal.morphology_mc import MorphologyCalcificationDetection
from feature_extraction.feature_extraction import CandidatesFeatureExtraction, CandidatesFeatureExtraction_MP
from database.dataset import *
import feature_extraction.haar_features.haar_modules as hm

import pickle

### Database initialization

In [2]:
db = INBreast_Dataset(
    return_lesions_mask=True,
    level='image',
    max_lesion_diam_mm=None,
    extract_patches=False,
    partitions = ['train', 'validation'],
    lesion_types = ['calcification', 'cluster'],
    cropped_imgs = True,
    keep_just_images_of_lesion_type = False,
    use_muscle_mask=True,
    ignore_diameter_px = 15
)

### Detectors Parameters and initialization

In [3]:
hd = HoughCalcificationDetection()
CENTER_CROP_PATCH = 14
PATCH_SIZE = 14

GABOR_PARAMS = {'scale': 2, 'orientation': 3,
                'max_freq': 0.2, 'ksize': (20, 20), 'sigma': 3}
WAVELET_PARAMS = {'angles': [0, np.pi/4, np.pi/2]}
# HAAR_PARAMS = {
#     'skimage': {
#         'feature_type': [], 'feature_coord': []
#     },
#     'ours': {
#         'horizontal_feature_types': [(hm.Feature3h3v, 3, 3)],
#         'rotated_feature_types': None,
#         'horizontal_feature_selection': None,
#         'rotated_feature_selection': None,
#     },
#     'patch_size': PATCH_SIZE
# }


# # IS THIS FINAL HAAR? MAYBE
# path = (Path.cwd().parent / f'feature_extraction/haar_features/')
# with open(path/'final_feat_selection.p', 'rb') as f:
#     selection = pickle.load(f)
    
# HAAR_PARAMS = {
#     'skimage': {
#         'feature_type': selection['skimage_haar_feature_types_sel'],
#         'feature_coord': selection['skimage_haar_feature_coords_sel']
#     },
#     'ours': {
#         'horizontal_feature_selection': selection['hor_feats_selection'].tolist(),
#         'rotated_feature_selection': selection['rot_feats_selection'].tolist(),
#         'rotated_feature_types': None,
#         'horizontal_feature_types': None
#     },
#     'patch_size': 14
# }

cfe = CandidatesFeatureExtraction_MP(patch_size=PATCH_SIZE, fos=True, gabor_params=GABOR_PARAMS,
                                  wavelet_params=WAVELET_PARAMS, haar_params=None, n_jobs=4)

### Detector initialization

In [4]:
db_range = list(range(len(db)))

### FE

### Hough

In [5]:
data_path = Path.cwd().parent.parent/'data/features/hough'

fdf = []
fns_df = []
ingnored_df = []

batch = 1

for idx in tqdm(db_range[:110]):
    # extracting data
    db_sample = db[idx]
    image = db_sample['img']
    image_id = db.df.iloc[idx].img_id
    image_mask = db_sample['lesion_mask']
    muscle_mask = db_sample['muscle_mask']

    # candidate selection
    candidates = hd.detect(image, image_id, load_processed_images=True, hough2=False, muscle_mask=muscle_mask)[0]
     # labeling of candidates:
    tp, fp, fn, ignored_candidates = get_tp_fp_fn_center_patch_criteria(
        candidates, image_mask, None, 14)
    
    candidates = pd.concat([tp, fp], axis=0, ignore_index=True)
    
    # Extracting features
    labels = np.where(candidates.label.values == 'TP', True, False)
    
    X = candidates.drop_duplicates(subset='repeted_idxs')
    X = cfe.extract_features(X.loc[:, ['x','y','radius']].values.astype(int), image)
    
    # Get features dfs
    X = pd.DataFrame(data=X, columns=cfe.feature_names)
    X.index = candidates.drop_duplicates(subset='repeted_idxs').index

    res = X.loc[candidates.repeted_idxs.tolist(),:]
    res['img_id'] = image_id
    res['repeted_idxs'] = candidates.repeted_idxs.tolist()
    res['matching_gt'] = candidates.matching_gt.tolist()
    res['label'] = labels

    # Generate a fn dataframe to compute frocs
    fn['img_id'] = image_id
    fns_df.append(fn)
    fdf.append(res)
    ingnored_df.append(ignored_candidates)
    
all_data_df = pd.concat(fdf, ignore_index=True)
fns_df = pd.concat(fns_df, ignore_index=True)
ingnored_df = pd.concat(ingnored_df, ignore_index=True)

all_data_df.to_feather(str(data_path/f'all_data_df_{batch}.f'))
fns_df.to_feather(str(data_path/f'fns_df_{batch}.f'))
ingnored_df.to_feather(str(data_path/f'ingnored_df_{batch}.f'))

100%|██████████| 110/110 [1:55:41<00:00, 63.11s/it] 


In [6]:
del all_data_df
del fns_df
del fdf
del ingnored_df

In [8]:
data_path = Path.cwd().parent.parent/'data/features/hough'

fdf = []
fns_df = []
ingnored_df = []

batch = 2

for idx in tqdm(db_range[110:]):
    # extracting data
    db_sample = db[idx]
    image = db_sample['img']
    image_id = db.df.iloc[idx].img_id
    image_mask = db_sample['lesion_mask']
    muscle_mask = db_sample['muscle_mask']

    # candidate selection
    candidates = hd.detect(image, image_id, load_processed_images=True, hough2=False, muscle_mask=muscle_mask)[0]
     # labeling of candidates:
    tp, fp, fn, ignored_candidates = get_tp_fp_fn_center_patch_criteria(
        candidates, image_mask, None, 14)
    
    candidates = pd.concat([tp, fp], axis=0, ignore_index=True)
    
    # Extracting features
    labels = np.where(candidates.label.values == 'TP', True, False)
    
    X = candidates.drop_duplicates(subset='repeted_idxs')
    X = cfe.extract_features(X.loc[:, ['x','y','radius']].values.astype(int), image)
    
    # Get features dfs
    X = pd.DataFrame(data=X, columns=cfe.feature_names)
    X.index = candidates.drop_duplicates(subset='repeted_idxs').index

    res = X.loc[candidates.repeted_idxs.tolist(),:]
    res['img_id'] = image_id
    res['repeted_idxs'] = candidates.repeted_idxs.tolist()
    res['matching_gt'] = candidates.matching_gt.tolist()
    res['label'] = labels

    # Generate a fn dataframe to compute frocs
    fn['img_id'] = image_id
    fns_df.append(fn)
    fdf.append(res)
    ingnored_df.append(ignored_candidates)
    
all_data_df = pd.concat(fdf, ignore_index=True)
fns_df = pd.concat(fns_df, ignore_index=True)
ingnored_df = pd.concat(ingnored_df, ignore_index=True)

all_data_df.to_feather(str(data_path/f'all_data_df_{batch}.f'))
fns_df.to_feather(str(data_path/f'fns_df_{batch}.f'))
ingnored_df.to_feather(str(data_path/f'ingnored_df_{batch}.f'))

100%|██████████| 100/100 [1:50:09<00:00, 66.10s/it]


### Morphology

In [5]:
threshold = 0.95
min_distance = 6
area = 14
rbd_p = '/home/vzalevskyi/projects/calc-det/data/recounstructed_by_dialation_img'
md = MorphologyCalcificationDetection(rbd_p, threshold, min_distance, area*area, filter_muscle_region=True, store_intermediate=False)

In [6]:
data_path = Path.cwd().parent.parent/'data/features/morph'

fdf = []
fns_df = []
ingnored_df = []

batch = 1

for idx in tqdm(db_range[:110]):
    # extracting data
    db_sample = db[idx]
    image = db_sample['img']
    image_id = db.df.iloc[idx].img_id
    image_mask = db_sample['lesion_mask']
    muscle_mask = db_sample['muscle_mask']

    # candidate selection
    candidates = md.detect(image, image_id, muscle_mask=muscle_mask)
     # labeling of candidates:
    tp, fp, fn, ignored_candidates = get_tp_fp_fn_center_patch_criteria(
        candidates, image_mask, None, 14)
    
    candidates = pd.concat([tp, fp], axis=0, ignore_index=True)
    
    # Extracting features
    labels = np.where(candidates.label.values == 'TP', True, False)
    
    X = candidates.drop_duplicates(subset='repeted_idxs')
    X = cfe.extract_features(X.loc[:, ['x','y','radius']].values.astype(int), image)
    
    # Get features dfs
    X = pd.DataFrame(data=X, columns=cfe.feature_names)
    X.index = candidates.drop_duplicates(subset='repeted_idxs').index

    res = X.loc[candidates.repeted_idxs.tolist(),:]
    res['img_id'] = image_id
    res['repeted_idxs'] = candidates.repeted_idxs.tolist()
    res['matching_gt'] = candidates.matching_gt.tolist()
    res['label'] = labels

    # Generate a fn dataframe to compute frocs
    fn['img_id'] = image_id
    fns_df.append(fn)
    fdf.append(res)
    ingnored_df.append(ignored_candidates)
    
all_data_df = pd.concat(fdf, ignore_index=True)
fns_df = pd.concat(fns_df, ignore_index=True)
ingnored_df = pd.concat(ingnored_df, ignore_index=True)

all_data_df.to_feather(str(data_path/f'all_data_df_{batch}.f'))
fns_df.to_feather(str(data_path/f'fns_df_{batch}.f'))
ingnored_df.to_feather(str(data_path/f'ingnored_df_{batch}.f'))

100%|██████████| 110/110 [47:58<00:00, 26.17s/it]


In [7]:
del all_data_df
del fns_df
del fdf
del ingnored_df

In [8]:
data_path = Path.cwd().parent.parent/'data/features/morph'

fdf = []
fns_df = []
ingnored_df = []

batch = 2

for idx in tqdm(db_range[110:]):
    # extracting data
    db_sample = db[idx]
    image = db_sample['img']
    image_id = db.df.iloc[idx].img_id
    image_mask = db_sample['lesion_mask']
    muscle_mask = db_sample['muscle_mask']

    # candidate selection
    candidates = md.detect(image, image_id, muscle_mask=muscle_mask)
     # labeling of candidates:
    tp, fp, fn, ignored_candidates = get_tp_fp_fn_center_patch_criteria(
        candidates, image_mask, None, 14)
    
    candidates = pd.concat([tp, fp], axis=0, ignore_index=True)
    
    # Extracting features
    labels = np.where(candidates.label.values == 'TP', True, False)
    
    X = candidates.drop_duplicates(subset='repeted_idxs')
    X = cfe.extract_features(X.loc[:, ['x','y','radius']].values.astype(int), image)
    
    # Get features dfs
    X = pd.DataFrame(data=X, columns=cfe.feature_names)
    X.index = candidates.drop_duplicates(subset='repeted_idxs').index

    res = X.loc[candidates.repeted_idxs.tolist(),:]
    res['img_id'] = image_id
    res['repeted_idxs'] = candidates.repeted_idxs.tolist()
    res['matching_gt'] = candidates.matching_gt.tolist()
    res['label'] = labels

    # Generate a fn dataframe to compute frocs
    fn['img_id'] = image_id
    fns_df.append(fn)
    fdf.append(res)
    ingnored_df.append(ignored_candidates)
    
all_data_df = pd.concat(fdf, ignore_index=True)
fns_df = pd.concat(fns_df, ignore_index=True)
ingnored_df = pd.concat(ingnored_df, ignore_index=True)

all_data_df.to_feather(str(data_path/f'all_data_df_{batch}.f'))
fns_df.to_feather(str(data_path/f'fns_df_{batch}.f'))
ingnored_df.to_feather(str(data_path/f'ingnored_df_{batch}.f'))

100%|██████████| 100/100 [47:24<00:00, 28.44s/it]
