In [None]:
!pip install xlrd
!pip install --upgrade pandas
!pip install openpyxl
!pip install nibabel

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import nibabel as nib
import pickle
import glob
import torchvision.ops as ops
import torch
pd.__version__

In [None]:
location = '' # <specified location of SPIE data and metadata>
train_labels = pd.read_csv(os.path.join(location,'BCS-DBT labels-train.csv'))
train_boxes = pd.read_csv(os.path.join(location,'BCS-DBT boxes-train.csv'))
train_paths = pd.read_csv(os.path.join(location,'BCS-DBT file-paths-train.csv'))

In [None]:
# Example of a bounding box prediction for a single slice in a DBT volume
# output by the Faster R-CNN detector
#
# {'boxes': array([[ 626.5633, 1347.9958,  729.663 , 1417.5177]], dtype=float32), 'labels': array([1]), 'scores': array([0.08185726], dtype=float32)}

# ---------------
# Functions
# ---------------

### 3D Candidate Generation

In [None]:
def sls_to_vol(sls,
               continuity=0.03,
               score_threshold=0.25,
               depth_threshold=0.02,
               intersection_thresholds=[0,1],
               intersection_mode='IoSIB',
               box_scores_function=np.mean,
               top_n_scores=1,
               weighted_boxes_fusion=False):
    '''
    continuity: fraction of slices without boxes to be tolerated as gaps
    
    assumes:
        that all the slices in sls are ordered
    handles:
        situation where slice groups overlap later
    '''
    continuity = int(np.ceil(continuity * len(sls)))
    
    if depth_threshold < 1:
        depth_threshold = int(np.ceil(depth_threshold * len(sls)))
    
    slice_groups = {}
    boxes3D = {}
    if weighted_boxes_fusion:
        scores = {}
    box2D_scores = {}
    for i in range(len(sls)):
        sl_boxes = sls[i]['boxes']
        sl_scores = sls[i]['scores']

        if len(sl_scores) == 0:
            sl_scores = np.array([])
        elif type(sl_scores[0]) == np.ndarray:
            sl_scores = [box_scores_function(_) for _ in sl_scores]

        # non-maximum suppression (NMS)
        if intersection_thresholds[1] < 1:
            idx = ops.nms(torch.from_numpy(sl_boxes),
                          torch.from_numpy(sl_scores),
                          iou_threshold=intersection_thresholds[1])
            sl_boxes = sl_boxes[idx.numpy()]
            sl_scores = sl_scores[idx.numpy()]

        for bi in range(len(sl_boxes)):
            appended = False
            if len(slice_groups) > 0:
                for j in slice_groups:
                    if appended:
                        break
                    if intersection2d(sl_boxes[bi][[0,2]],
                                      boxes3D[j]['x'],
                                      sl_boxes[bi][[1,3]],
                                      boxes3D[j]['y'],
                                      b_intersection_thr=intersection_thresholds[0],
                                      intersection_mode=intersection_mode) and \
                        (i - continuity) <= max(slice_groups[j]) and \
                        sl_scores[bi] > score_threshold:
                        
                        if weighted_boxes_fusion:
                            boxes3D[j]['x'][0] = (scores[j]*boxes3D[j]['x'][0] +
                                                  sl_scores[bi]*sl_boxes[bi][0])/ \
                                                 (scores[j] + sl_scores[bi])
                            boxes3D[j]['x'][1] = (scores[j]*boxes3D[j]['x'][1] +
                                                  sl_scores[bi]*sl_boxes[bi][2])/ \
                                                 (scores[j] + sl_scores[bi])
                            boxes3D[j]['y'][0] = (scores[j]*boxes3D[j]['y'][0] +
                                                  sl_scores[bi]*sl_boxes[bi][1])/ \
                                                 (scores[j] + sl_scores[bi])
                            boxes3D[j]['y'][1] = (scores[j]*boxes3D[j]['y'][1] +
                                                  sl_scores[bi]*sl_boxes[bi][3])/ \
                                                 (scores[j] + sl_scores[bi])
                            scores[j] += sl_scores[bi]
                        else:
                            if sl_boxes[bi][0] < boxes3D[j]['x'][0]:
                                boxes3D[j]['x'][0] = sl_boxes[bi][0]
                            if sl_boxes[bi][2] > boxes3D[j]['x'][1]:
                                boxes3D[j]['x'][1] = sl_boxes[bi][2]
                            if sl_boxes[bi][1] < boxes3D[j]['y'][0]:
                                boxes3D[j]['y'][0] = sl_boxes[bi][1]
                            if sl_boxes[bi][3] > boxes3D[j]['y'][1]:
                                boxes3D[j]['y'][1] = sl_boxes[bi][3]

                        slice_groups[j].append(i)

                        boxes3D[j]['z'][0] = min(slice_groups[j])
                        boxes3D[j]['z'][1] = max(slice_groups[j])

                        box2D_scores[j].append(sl_scores[bi])
                        
                        appended = True
                        break
            if not appended and sl_scores[bi] > score_threshold:
                if weighted_boxes_fusion:
                    scores[len(slice_groups)] = sl_scores[bi]
                boxes3D[len(slice_groups)] = {}
                boxes3D[len(slice_groups)]['x'] = list(sl_boxes[bi][[0,2]])
                boxes3D[len(slice_groups)]['y'] = list(sl_boxes[bi][[1,3]])
                boxes3D[len(slice_groups)]['z'] = [i, i]
                box2D_scores[len(slice_groups)] = [sl_scores[bi]]
                slice_groups[len(slice_groups)] = [i]

    slice_groups_temp = {}
    boxes3D_temp = {}
    if weighted_boxes_fusion:
        scores_temp = {}
    box2D_scores_temp = {}
    while True:
        for i in range(len(boxes3D)):
            appended = False
            if len(slice_groups_temp) > 0:
                for j in range(len(boxes3D_temp)):
                    if intersection2d(boxes3D[i]['x'],
                                      boxes3D[j]['x'],
                                      boxes3D[i]['y'],
                                      boxes3D[j]['y'],
                                      b_intersection_thr=intersection_thresholds[0],
                                      intersection_mode=intersection_mode) and \
                      (boxes3D[i]['z'][1] > boxes3D[j]['z'][0] and \
                       boxes3D[j]['z'][1] > boxes3D[i]['z'][0] or \
                       abs(min(boxes3D[i]['z']) - max(boxes3D[j]['z'])) < continuity or \
                       abs(min(boxes3D[j]['z']) - max(boxes3D[i]['z'])) < continuity):

                        if weighted_boxes_fusion:
                            for coord in 'xyz':
                                for boundary in range(2):
                                    boxes3D_temp[j][coord][boundary] =\
                                        (scores_temp[j]*boxes3D_temp[j][coord][boundary] +
                                         scores[i]*boxes3D[i][coord][boundary])/ \
                                        (scores_temp[j] + scores[i])
                                scores_temp[j] += scores[i]
                        else:
                            for coord in 'xyz':
                                if boxes3D[i][coord][0] < boxes3D_temp[j][coord][0]:
                                    boxes3D_temp[j][coord][0] = boxes3D[i][coord][0]
                                if boxes3D[i][coord][1] > boxes3D_temp[j][coord][1]:
                                    boxes3D_temp[j][coord][1] = boxes3D[i][coord][1]

                        slice_groups_temp[j] =\
                            slice_groups_temp[j] + slice_groups[i]

                        box2D_scores_temp[j] =\
                            box2D_scores_temp[j] + box2D_scores[i]

                        appended = True
                        break
            if not appended:
                slice_groups_temp[len(box2D_scores_temp)] = slice_groups[i]
                boxes3D_temp[len(box2D_scores_temp)] = boxes3D[i]
                if weighted_boxes_fusion:
                    scores_temp[len(box2D_scores_temp)] = scores[i]
                box2D_scores_temp[len(box2D_scores_temp)] = box2D_scores[i]

        if boxes3D == boxes3D_temp:
            break                
                
    slice_groups = {}
    boxes3D = {}
    scores = {}
    box2D_scores = {}
    for i in range(len(boxes3D_temp)):
        depth = int(np.diff(boxes3D_temp[i]['z'])) + 1
        if depth <= depth_threshold:
            continue
        slice_groups[len(scores)] = slice_groups_temp[i]
        boxes3D[len(scores)] = boxes3D_temp[i]
        box2D_scores[len(scores)] = box2D_scores_temp[i]
        scores[len(scores)] = np.mean(sorted(box2D_scores[len(scores)])[-top_n_scores:])

    return slice_groups, boxes3D, scores, box2D_scores


def intersection2d(b1_x,b2_x,b1_y,b2_y,b_intersection_thr=0,intersection_mode='IoSIB'):
    i_x = (b1_x[1] - b2_x[0], b2_x[1] - b1_x[0])
    i_y = (b1_y[1] - b2_y[0], b2_y[1] - b1_y[0])
    
    
    if intersection_mode == 'IoU':
        intersection_ind = min(i_x) > 0 and min(i_y) > 0 and \
                           np.prod((min(i_x),min(i_y))) \
                           / (np.prod((np.diff(b1_x),np.diff(b1_y))) + \
                              np.prod((np.diff(b2_x),np.diff(b2_y))) - \
                              np.prod((min(i_x),min(i_y)))) \
                           > b_intersection_thr
    elif intersection_mode == 'IoSIB':
        intersection_ind = min(i_x) > 0 and min(i_y) > 0 and \
                           np.prod((min(i_x),min(i_y))) / \
                           min(np.prod((np.diff(b1_x),np.diff(b1_y))),
                               np.prod((np.diff(b2_x),np.diff(b2_y)))) \
                           > b_intersection_thr    
    return intersection_ind

### 3D Candidate Generation Evaluation

In [None]:
def find_tps(df_pred: pd.DataFrame,
             df_true: pd.DataFrame,
             num_slices_info: dict
            ) -> pd.DataFrame:

    df_pred = df_pred.reset_index().set_index(["StudyUID", "View"]).sort_index()
    df_true = df_true.reset_index().set_index(["StudyUID", "View"]).sort_index()
    
    df_pred['TP'] = 0
    df_pred['GTID'] = -1
    
    # find true positive predictions and assign detected ground truth box ID
    for pred in df_pred.itertuples():
        if pred.Index not in df_true.index:
            continue
        df_true_temp = df_true.loc[[pred.Index]]
  
        tps = [
            tp
            for tp in df_true_temp.itertuples()
            if is_tp(pred, tp,
                     slice_offset=\
                         num_slices_info[pred.PatientID][pred.Index[0]][pred.Index[1]] / 4)
        ]

        if len(tps) > 1:
            # find the nearest GT box
            tp_distances = [distance(pred, tp) for tp in tps]
            tps = [tps[np.argmin(tp_distances)]]

        if len(tps) > 0:
            df_pred.loc[df_pred["index"] == pred.index, ('TP', 'GTID')] = (1, tps[0].index)
    
    return df_pred

def evaluate(df_labels: pd.DataFrame,
             df_pred: pd.DataFrame,
             df_true: pd.DataFrame,
             num_slices_info: dict,
             fps_per_vol: tuple = (1.0, 2.0, 3.0, 4.0,)
            ) -> dict:
    
    df_labels = df_labels.reset_index().set_index(["StudyUID", "View"]).sort_index()
    df_pred = df_pred.reset_index().set_index(["StudyUID", "View"]).sort_index()
    df_true = df_true.reset_index().set_index(["StudyUID", "View"]).sort_index()
    
    df_pred['TP'] = 0
    df_pred['GTID'] = -1

    thresholds = [df_pred['Score'].max() + 1.0]
    
    # find true positive predictions and assign detected ground truth box ID
    for pred in df_pred.itertuples():
        if pred.Index not in df_true.index:
            continue
        df_true_temp = df_true.loc[[pred.Index]]
  
        tps = [
            tp
            for tp in df_true_temp.itertuples()
            if is_tp(pred, tp,
                     slice_offset=\
                         num_slices_info[pred.PatientID][pred.Index[0]][pred.Index[1]] / 4)
        ]

        if len(tps) > 1:
            # find the nearest GT box
            tp_distances = [distance(pred, tp) for tp in tps]
            tps = [tps[np.argmin(tp_distances)]]

        if len(tps) > 0:
            df_pred.loc[df_pred["index"] == pred.index, ('TP', 'GTID')] = (1, tps[0].index)
            thresholds.append(pred.Score)

    thresholds.append(df_pred['Score'].min() - 1.0)

    # compute sensitivity at specified FPs/volume on all cases
    tpr_all = froc(df_pred=df_pred,
                   thresholds=thresholds,
                   n_volumes=len(df_labels),
                   n_boxes=len(df_true),
                   evaluation_fps=fps_per_vol
                  )

    # compute sensitivity at specified FPs/volume on positive cases
    df_pred = df_pred[df_pred.index.isin(df_true.index)]
    df_labels = df_labels[df_labels.index.isin(df_true.index)]
    tpr_positive = froc(df_pred=df_pred,
                        thresholds=thresholds,
                        n_volumes=len(df_labels),
                        n_boxes=len(df_true),
                        evaluation_fps=fps_per_vol
                       )
    
    return tpr_positive, tpr_all


def froc(df_pred: pd.DataFrame,
         thresholds: list,
         n_volumes: int,
         n_boxes: int,
         evaluation_fps: tuple
        ) -> list:
    tpr = []
    fps = []
    for th in sorted(thresholds, reverse=True):
        df_th = df_pred.loc[df_pred["Score"] >= th]
        df_th_unique_tp = df_th.reset_index().drop_duplicates(
            subset=["StudyUID", "View", "TP", "GTID"]
        )
        n_tps_th = float(sum(df_th_unique_tp["TP"]))
        tpr_th = n_tps_th / n_boxes
        n_fps_th = float(len(df_th[df_th["TP"] == 0]))
        fps_th = n_fps_th / n_volumes
        tpr.append(tpr_th)
        fps.append(fps_th)
        if fps_th > max(evaluation_fps):
            break
    return [np.interp(x, fps, tpr) for x in evaluation_fps]


def distance(pred, true):
    pred_y = pred.Y + pred.Height / 2
    pred_x = pred.X + pred.Width / 2
    pred_z = pred.Z + pred.Depth / 2
    true_y = true.Y + true.Height / 2
    true_x = true.X + true.Width / 2
    true_z = true.Slice
    return np.linalg.norm((pred_x - true_x, pred_y - true_y, pred_z - true_z))

def is_tp(pred, true, slice_offset, min_dist=100):
    pred_x = pred.X + pred.Width / 2
    pred_y = pred.Y + pred.Height / 2
    pred_z = pred.Z + pred.Depth / 2
    
    true_x = true.X + true.Width / 2
    true_y = true.Y + true.Height / 2
    true_z = true.Slice
    
    # 2D distance between true and predicted center points
    dist = np.linalg.norm((pred_x - true_x, pred_y - true_y))

    # compute radius based on true box size
    dist_threshold = np.sqrt(true.Width ** 2 + true.Height ** 2) / 2.0
    dist_threshold = max(dist_threshold, min_dist)
    slice_diff = np.abs(pred_z - true_z)
    
    # TP if predicted center within radius and slice within slice offset
    return dist <= dist_threshold and slice_diff <= slice_offset

### 3D Candidate Generation Non-Maximum Suppression

In [None]:
def nms_dbtex(df_pred: pd.DataFrame, num_slices_info: dict) -> pd.DataFrame:
    df_pred = df_pred.reset_index().set_index(["StudyUID", "View"])
    indices = np.unique(df_pred.index, return_index=True)[1]
    indices = [df_pred.index[_] for _ in sorted(indices)]
    df_pred = df_pred.sort_index()
    df_pred_new = pd.DataFrame(columns=['PatientID','StudyUID','View',
                                        'X','Width','Y','Height','Z','Depth',
                                        'Score',
                                        'index'])
    for index in indices:
        df_temp = pd.DataFrame(columns=['PatientID','StudyUID','View',
                                        'X','Width','Y','Height','Z','Depth',
                                        'Score',
                                        'index'])
        for pred in df_pred.loc[index].sort_values(['Score']).itertuples():
            added = False
            if len(df_temp) != 0:
                for temp in df_temp.itertuples():
                    if is_match(temp, pred,
                                num_slices_info[temp.PatientID][temp.StudyUID][temp.View] / 4):
                        df_temp.loc[temp.Index,'PatientID'] = pred.PatientID
                        df_temp.loc[temp.Index,'StudyUID'] = pred.Index[0]
                        df_temp.loc[temp.Index,'View'] = pred.Index[1]
                        df_temp.loc[temp.Index,'X'] = pred.X
                        df_temp.loc[temp.Index,'Width'] = pred.Width
                        df_temp.loc[temp.Index,'Y'] = pred.Y
                        df_temp.loc[temp.Index,'Height'] = pred.Height
                        df_temp.loc[temp.Index,'Z'] = pred.Z
                        df_temp.loc[temp.Index,'Depth'] = pred.Depth
                        df_temp.loc[temp.Index,'Score'] = pred.Score
                        df_temp.loc[temp.Index,'index'] = pred.index

                        added = True
                        break
            if not added:
                df_temp =\
                  df_temp\
                  .append(pd.DataFrame([[pred.PatientID,pred.Index[0],pred.Index[1],
                                         pred.X,pred.Width,pred.Y,pred.Height,pred.Z,pred.Depth,
                                         pred.Score,
                                         pred.index]],
                                       columns=['PatientID','StudyUID','View',
                                                'X','Width','Y','Height','Z','Depth',
                                                'Score',
                                                'index']),
                          ignore_index=True)
        df_pred_new = df_pred_new.append(df_temp, ignore_index=True)

    df_pred_new = df_pred_new.sort_values(['index']).reset_index(drop=True).drop(columns=['index'])
    
    return df_pred_new

def is_match(lower_prob_pred, higher_prob_pred, slice_offset, min_dist=100):
    lower_prob_pred_x = lower_prob_pred.X + lower_prob_pred.Width / 2
    lower_prob_pred_y = lower_prob_pred.Y + lower_prob_pred.Height / 2
    lower_prob_pred_z = lower_prob_pred.Z + lower_prob_pred.Depth / 2

    higher_prob_pred_x = higher_prob_pred.X + higher_prob_pred.Width / 2
    higher_prob_pred_y = higher_prob_pred.Y + higher_prob_pred.Height / 2
    higher_prob_pred_z = higher_prob_pred.Z + higher_prob_pred.Depth / 2
    
    # 2D distance between lower and higher probability prediction center points
    dist = np.linalg.norm((lower_prob_pred_x - higher_prob_pred_x,
                           lower_prob_pred_y - higher_prob_pred_y))

    # compute radius based on higher probability prediction box size
    dist_threshold =\
        np.sqrt(higher_prob_pred.Width ** 2 + higher_prob_pred.Height ** 2) / 2.0
    dist_threshold = max(dist_threshold, min_dist)
    slice_diff = np.abs(lower_prob_pred_z - higher_prob_pred_z)

    # TP if predicted center within radius and slice within slice offset
    return dist <= dist_threshold and slice_diff <= slice_offset

# ---------------
# Training Set   
# ---------------

### Loading Slice (2D) Predictions and Metadata

In [None]:
data = 'training'

sls = {}
vol_ids_temp = []
for location in [ \
#                 '<location 1 of predictions for DBT volume slices>'
#                 '<location 2 of predictions for DBT volume slices>'
#                 .
#                 .
#                 .
                ]:
    vol_ids = np.unique([_.split('.')[0] for _ in os.listdir(location)])
    print('total number of slices:', len(os.listdir(location)))
    print('total number of DBT scans', len(vol_ids))
    print('')
    for i, vol_id in enumerate(vol_ids):
        i += len(vol_ids_temp)
        if i not in sls:
            sls[i] = {}
        l = glob.glob(os.path.join(location,vol_id+'.*'))
        for j in range(len(l)):
            with open(l[j], 'rb') as f:
                sl = pickle.load(f)
            sl_idx = int(l[j].rsplit('.',1)[0].rsplit('_')[-1])
            if sl_idx in sls[i]:
                for key in sls[i][sl_idx].keys():
                    sls[i][sl_idx][key] =\
                        np.concatenate((sls[i][sl_idx][key],sl[key]),axis=0)
            else:
                sls[i][sl_idx] = sl
    vol_ids_temp = np.concatenate((vol_ids_temp, vol_ids), axis=0)

location = '' # <specified location of SPIE data and metadata>

train_paths = pd.read_csv(os.path.join(location,'BCS-DBT file-paths-train.csv'))
train_boxes = pd.read_csv(os.path.join(location,'BCS-DBT boxes-train.csv'))
train_labels = pd.read_csv(os.path.join(location,'BCS-DBT labels-train.csv'))
print('total number of ground truth boxes:', len(train_boxes))
print('total number of training set DBT scans:', len(train_labels))
prepare_submission = train_paths[['PatientID','StudyUID','View']].copy()
prepare_submission['VolID'] =\
    train_paths['descriptive_path'].apply(lambda x: x.rsplit('/',2)[1].split('.')[0])

vol_ids = vol_ids_temp

i_ = len(vol_ids)

subset_labels = pd.DataFrame()
for i, vol_id in enumerate(vol_ids):
    subset_labels = subset_labels.append(train_labels[prepare_submission['VolID'] == vol_id],
                                         ignore_index=True)
    if i == i_:
        break

subset_boxes = pd.DataFrame()
for ID in subset_labels[['StudyUID', 'View']].itertuples():
    subset_boxes =\
        subset_boxes.append(train_boxes[(train_boxes['StudyUID'] == ID.StudyUID) &\
                                        (train_boxes['View'] == ID.View)],
                            ignore_index=True)

print('number of ground truth boxes (in split(s)):', len(subset_boxes))
print('number of training set dbt scans (in split(s)):', len(subset_labels))

### 3D Candidate Generation Optimization

In [None]:
parameters = {'continuity': np.nan,
              'score_threshold': np.nan,
              'depth_threshold': np.nan,
              'intersection_thresholds': [np.nan, np.nan],
              'metrics': [-np.inf, -np.inf]}

apply_nms_3D = False

intersection_mode = 'IoSIB'
box_scores_function = np.mean
top_n_scores = 10
weighted_boxes_fusion = False

for continuity in [0.5]:
    for score_threshold in [0.85]:
        for depth_threshold in [1]:
            for intersection_thresholds in [[0.75, 1]]:
                slice_groups = {}
                boxes3D = {}
                scores = {}
                box2D_scores = {}
                num_slices = {}
                for _ in range(len(sls)):
                    slice_groups[_], boxes3D[_], scores[_], box2D_scores[_] =\
                        sls_to_vol(sls[_],
                                   continuity=continuity,
                                   score_threshold=score_threshold,
                                   depth_threshold=depth_threshold,
                                   intersection_thresholds=intersection_thresholds,
                                   intersection_mode=intersection_mode,
                                   box_scores_function=box_scores_function,
                                   top_n_scores=top_n_scores,
                                   weighted_boxes_fusion=weighted_boxes_fusion)
                    num_slices[_] = len(sls[_])
                    if _ == i_:
                        break
                print('*****************************')
                print('* Snapshot of sample volume *')
                print('*****************************')
                print('volume index:', _)
                print('number of slices:', len(sls[_]))
                print('size of slice groups:')
                for __ in slice_groups[_]:
                    print(len(slice_groups[_][__]))
                print('boxes:', boxes3D[_])
                print('boxes:', scores[_])
                
                submission_file = pd.DataFrame(columns=['PatientID',
                                                        'StudyUID',
                                                        'View',
                                                        'X',
                                                        'Width',
                                                        'Y',
                                                        'Height',
                                                        'Z',
                                                        'Depth',
                                                        'Score'])
                PatientID_dict = {}
                for i, vol_id in enumerate(vol_ids):
                    prepare_submission_row =\
                        prepare_submission[prepare_submission['VolID'] == vol_id]

                    PatientID = list(prepare_submission_row['PatientID'])[0]
                    StudyUID = list(prepare_submission_row['StudyUID'])[0]
                    View = list(prepare_submission_row['View'])[0]

                    if PatientID in PatientID_dict:
                        if not StudyUID in PatientID_dict[PatientID]:
                            PatientID_dict[PatientID][StudyUID] = {}
                    else:
                        PatientID_dict[PatientID] = {}
                        PatientID_dict[PatientID][StudyUID] = {}
                    PatientID_dict[PatientID][StudyUID][View] = num_slices[i]
                    for j in boxes3D[i]:
                        X = int(boxes3D[i][j]['x'][0])
                        Width = int(np.diff(boxes3D[i][j]['x']))
                        Y = int(boxes3D[i][j]['y'][0])
                        Height = int(np.diff(boxes3D[i][j]['y']))
                        Z = int(boxes3D[i][j]['z'][0])
                        Depth = int(np.diff(boxes3D[i][j]['z'])) + 1
                        Score = scores[i][j]
                        submission_file_row = pd.DataFrame([[PatientID,
                                                             StudyUID,
                                                             View,
                                                             X,
                                                             Width,
                                                             Y,
                                                             Height,
                                                             Z,
                                                             Depth,
                                                             Score]],
                                                           columns=['PatientID',
                                                                    'StudyUID',
                                                                    'View',
                                                                    'X',
                                                                    'Width',
                                                                    'Y',
                                                                    'Height',
                                                                    'Z',
                                                                    'Depth',
                                                                    'Score'])

                        submission_file =\
                            submission_file.append(submission_file_row, ignore_index=True)
                    if i == i_:
                        break

                if apply_nms_3D:
                    print('************************************************')
                    print('* Non-Maximum Suppression of 3D Candidate List *')
                    print('************************************************')
                    submission_file = nms_dbtex(submission_file, PatientID_dict)
                
                print('************************************************')
                print('* Snapshot of submission file and ground truth *')
                print('************************************************')
                print(submission_file.loc[submission_file['PatientID'] == 'DBT-P00060'])
                print(train_boxes[train_boxes['PatientID'] == 'DBT-P00060'])
                for PatientID in PatientID_dict:
                    for StudyUID in PatientID_dict[PatientID]:
                        for View in PatientID_dict[PatientID][StudyUID]:
                            if not (PatientID == 'DBT-P00060' and\
                                    StudyUID == 'DBT-S00787' and\
                                    View == 'rcc'):
                                continue
                            for i, df_row_i in submission_file[(submission_file['PatientID'] == PatientID) &\
                                                               (submission_file['StudyUID'] == StudyUID) &\
                                                               (submission_file['View'] == View)].iterrows():
                                for j, df_row_j in train_boxes[(train_boxes['PatientID'] == PatientID) &\
                                                               (train_boxes['StudyUID'] == StudyUID) &\
                                                               (train_boxes['View'] == View)].iterrows():
                                    print(i, j,
                                          PatientID,
                                          View,
                                          is_tp(df_row_i, df_row_j,
                                                slice_offset=PatientID_dict[PatientID][StudyUID][View] / 4))
                print('**********************')
                print('* Evaluation Metrics *')
                print('**********************')
                tpr_positive, tpr_all =\
                    evaluate(df_labels=subset_labels,
                             df_pred=submission_file,
                             df_true=subset_boxes,
                             num_slices_info=PatientID_dict,
                             fps_per_vol=(1.0, 2.0, 3.0, 4.0))
                print('tpr_positive:', tpr_positive)
                print('tpr_all:', tpr_all)
                print('avg_tpr_positive:', np.mean(tpr_positive[:4]))
                print('avg_tpr_all:', np.mean(tpr_all[:4]))
                tpr_positive_2, tpr_all_2 =\
                    evaluate(df_labels=subset_labels,
                             df_pred=submission_file,
                             df_true=subset_boxes,
                             num_slices_info=PatientID_dict,
                             fps_per_vol=(2.0,))
                print('tpr_positive:', tpr_positive_2)
                print('tpr_all:', tpr_all_2)

                if np.mean(tpr_positive) > parameters['metrics'][0] or \
                   (np.mean(tpr_positive) == parameters['metrics'][0] and \
                    tpr_all_2 > parameters['metrics'][1]):
                    parameters.update({'continuity': continuity,
                                       'score_threshold': score_threshold,
                                       'depth_threshold': depth_threshold,
                                       'intersection_thresholds': intersection_thresholds,
                                       'metrics': [np.mean(tpr_positive),
                                                   tpr_all_2]})

In [None]:
training_set_predictions = find_tps(df_pred=submission_file,
                                    df_true=subset_boxes,
                                    num_slices_info=PatientID_dict)
training_set_predictions

# ------------------------
# Validation (Test) Set   
# ------------------------

### Loading Slice (2D) Predictions and Metadata

In [None]:
data = 'test' # 'validation' 'test'

sls = {}
for location in [ \
#                 '<location 1 of predictions for DBT volume slices>'
#                 '<location 2 of predictions for DBT volume slices>'
#                 .
#                 .
#                 .
                ]:
    vol_ids = np.unique([_.split('.')[0] for _ in os.listdir(location)])
    print('total number of slices:', len(os.listdir(location)))
    print('total number of DBT scans', len(vol_ids))
    print('')
    for i, vol_id in enumerate(vol_ids):
        if i not in sls:
            sls[i] = {}
        l = glob.glob(os.path.join(location,vol_id+'.*'))
        for j in range(len(l)):
            with open(l[j], 'rb') as f:
                sl = pickle.load(f)
            sl_idx = int(l[j].rsplit('.',1)[0].rsplit('_')[-1])
            if sl_idx in sls[i]:
                for key in sls[i][sl_idx].keys():
                    sls[i][sl_idx][key] =\
                        np.concatenate((sls[i][sl_idx][key],sl[key]),axis=0)
            else:
                sls[i][sl_idx] = sl

location = '../{}/metadata'.format(data)
paths = pd.read_csv(os.path.join(location, 'BCS-DBT file-paths-{}.csv'.format(data)))
print('total number of {} set DBT scans:'.format(data), len(paths))
prepare_submission = paths[['PatientID','StudyUID','View']].copy()
prepare_submission['VolID'] =\
    paths['descriptive_path'].apply(lambda x: x.rsplit('/',2)[1].split('.')[0])

### 3D Candidate Generation

In [None]:
i_ = len(vol_ids)

apply_nms_3D = False

continuity = 0.5
score_threshold = 0.85
depth_threshold = 1
intersection_thresholds = [0.75, 1]
intersection_mode = 'IoSIB'
box_scores_function = np.mean
top_n_scores = 10
weighted_boxes_fusion = False

slice_groups = {}
boxes3D = {}
scores = {}
box2D_scores = {}
num_slices = {}
for _ in range(len(sls)):
    slice_groups[_], boxes3D[_], scores[_], box2D_scores[_] =\
        sls_to_vol(sls[_],
                   continuity=continuity,
                   score_threshold=score_threshold,
                   depth_threshold=depth_threshold,
                   intersection_thresholds=intersection_thresholds,
                   intersection_mode=intersection_mode,
                   box_scores_function=box_scores_function,
                   top_n_scores=top_n_scores,
                   weighted_boxes_fusion=weighted_boxes_fusion)
    num_slices[_] = len(sls[_])
    if _ == i_:
        break

print('*****************************')
print('* Snapshot of sample volume *')
print('*****************************')
print('volume index:', _)
print('number of slices:', len(sls[_]))
print('size of slice groups:')
for __ in slice_groups[_]:
    print(len(slice_groups[_][__]))
print('boxes:', boxes3D[_])
print('scores:', scores[_])

submission_file = pd.DataFrame(columns=['PatientID',
                                        'StudyUID',
                                        'View',
                                        'X',
                                        'Width',
                                        'Y',
                                        'Height',
                                        'Z',
                                        'Depth',
                                        'Score'])
PatientID_dict = {}
for i, vol_id in enumerate(vol_ids):
    prepare_submission_row = prepare_submission[prepare_submission['VolID'] == vol_id]
    
    PatientID = list(prepare_submission_row['PatientID'])[0]
    StudyUID = list(prepare_submission_row['StudyUID'])[0]
    View = list(prepare_submission_row['View'])[0]
    
    if PatientID in PatientID_dict:
        if not StudyUID in PatientID_dict[PatientID]:
            PatientID_dict[PatientID][StudyUID] = {}
    else:
        PatientID_dict[PatientID] = {}
        PatientID_dict[PatientID][StudyUID] = {}
    PatientID_dict[PatientID][StudyUID][View] = num_slices[i]
    for j in boxes3D[i]:
        X = int(boxes3D[i][j]['x'][0])
        Width = int(np.diff(boxes3D[i][j]['x']))
        Y = int(boxes3D[i][j]['y'][0])
        Height = int(np.diff(boxes3D[i][j]['y']))
        Z = int(boxes3D[i][j]['z'][0])
        Depth = int(np.diff(boxes3D[i][j]['z'])) + 1
        Score = scores[i][j]
        submission_file_row = pd.DataFrame([[PatientID,
                                             StudyUID,
                                             View,
                                             X,
                                             Width,
                                             Y,
                                             Height,
                                             Z,
                                             Depth,
                                             Score]],
                                           columns=['PatientID',
                                                    'StudyUID',
                                                    'View',
                                                    'X',
                                                    'Width',
                                                    'Y',
                                                    'Height',
                                                    'Z',
                                                    'Depth',
                                                    'Score'])

        submission_file =\
            submission_file.append(submission_file_row, ignore_index=True)
    if i == i_:
        break
        
if apply_nms_3D:
    print('************************************************')
    print('* Non-Maximum Suppression of 3D Candidate List *')
    print('************************************************')
    submission_file = nms_dbtex(submission_file, PatientID_dict)

print('*******************************')
print('* Snapshot of submission file *')
print('*******************************')
if data == 'validation':
    PID = 'DBT-P01293'
elif data == 'test':
    PID = 'DBT-P02609'
print(submission_file.loc[submission_file['PatientID'] == PID])