In [50]:
import os
from os.path import join
from matplotlib import pyplot as plt
import pickle
import numpy as np

In [51]:
partner_dir = r'/local/scratch-2/gp518/preprocessed_data/PARTNER'
imgs_names = os.listdir(join(partner_dir, 'images'))
lbls_names = os.listdir(join(partner_dir, 'labels'))
subject_ids = sorted(list(set([i.split('-')[0] for i in imgs_names])))
subject_ids[:10]


['1002',
 '1003',
 '1004',
 '1006',
 '1008',
 '1011',
 '1015',
 '1016',
 '1019',
 '1020']

In [52]:

_dict = {}
for subject_id in subject_ids:
    _dict[subject_id] = {
        'horizontal_flip': 'NO', 
        'cancer_label': {
            'benign': 0,
            'right_benign': 0,
            'malignant': 0,
            'left_benign': 0,
            'unknown': 0,
            'right_malignant': 0,
            'left_malignant': 0
            }
        }

    for laterality, laterality_long in {'L': 'left', 'R': 'right'}.items():
        has_label_filename = None
        for view in ['CC', 'MLO']:
            filename = f'{subject_id}-{laterality}-{view}.png'
            if filename not in imgs_names:
                continue
            _dict[subject_id][f'{laterality}-{view}'] = [filename[:-len('.png')]]
            if filename in lbls_names:
                np_array = np.array(plt.imread(join(partner_dir, 'labels', filename)))
                N = np_array.sum()
                # print(filename, N)
                if N > 0:
                    _dict[subject_id][f'{laterality}-{view}_malignant_seg'] = [filename[:-len('.png')]]
                    has_label_filename = filename

        if has_label_filename is not None:
            _dict[subject_id]['cancer_label']['malignant'] += 1
            _dict[subject_id]['cancer_label'][f'{laterality_long}_malignant'] += 1
_dict



{'1002': {'horizontal_flip': 'NO',
  'cancer_label': {'benign': 0,
   'right_benign': 0,
   'malignant': 1,
   'left_benign': 0,
   'unknown': 0,
   'right_malignant': 1,
   'left_malignant': 0},
  'L-CC': ['1002-L-CC'],
  'L-MLO': ['1002-L-MLO'],
  'R-CC': ['1002-R-CC'],
  'R-CC_malignant_seg': ['1002-R-CC'],
  'R-MLO': ['1002-R-MLO'],
  'R-MLO_malignant_seg': ['1002-R-MLO']},
 '1003': {'horizontal_flip': 'NO',
  'cancer_label': {'benign': 0,
   'right_benign': 0,
   'malignant': 1,
   'left_benign': 0,
   'unknown': 0,
   'right_malignant': 0,
   'left_malignant': 1},
  'L-CC': ['1003-L-CC'],
  'L-MLO': ['1003-L-MLO'],
  'L-MLO_malignant_seg': ['1003-L-MLO'],
  'R-CC': ['1003-R-CC'],
  'R-MLO': ['1003-R-MLO']},
 '1004': {'horizontal_flip': 'NO',
  'cancer_label': {'benign': 0,
   'right_benign': 0,
   'malignant': 1,
   'left_benign': 0,
   'unknown': 0,
   'right_malignant': 0,
   'left_malignant': 1},
  'L-CC': ['1004-L-CC'],
  'L-CC_malignant_seg': ['1004-L-CC'],
  'L-MLO': ['1004

In [53]:
_list = [j for i, j in _dict.items()]
os.makedirs('/local/scratch-2/gp518/preprocessed_data/PARTNER_GLAM', exist_ok=True)
with open('/local/scratch-2/gp518/preprocessed_data/PARTNER_GLAM/exam_list_before_cropping.pkl', 'wb') as handle:
    pickle.dump(_list, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [9]:
with open('/local/scratch-2/gp518/preprocessed_data/PARTNER/exam_list_before_cropping.pkl', 'rb') as f:
    data = pickle.load(f)
data

[{'horizontal_flip': 'NO',
  'cancer_label': {'benign': 0,
   'right_benign': 0,
   'malignant': 1,
   'left_benign': 0,
   'unknown': 0,
   'right_malignant': 0,
   'left_malignant': 1},
  'L-CC': ['1179-L-CC'],
  'L-CC_malignant_seg': ['1179-L-CC'],
  'L-MLO': ['1179-L-MLO'],
  'L-MLO_malignant_seg': ['1179-L-MLO'],
  'R-CC': ['1179-R-CC'],
  'R-MLO': ['1179-R-MLO']},
 {'horizontal_flip': 'NO',
  'cancer_label': {'benign': 0,
   'right_benign': 0,
   'malignant': 1,
   'left_benign': 0,
   'unknown': 0,
   'right_malignant': 1,
   'left_malignant': 0},
  'L-CC': ['1150-L-CC'],
  'L-MLO': ['1150-L-MLO'],
  'R-CC': ['1150-R-CC'],
  'R-MLO': ['1150-R-MLO'],
  'R-MLO_malignant_seg': ['1150-R-MLO']},
 {'horizontal_flip': 'NO',
  'cancer_label': {'benign': 0,
   'right_benign': 0,
   'malignant': 1,
   'left_benign': 0,
   'unknown': 0,
   'right_malignant': 1,
   'left_malignant': 0},
  'L-CC': ['PBCP_0604_T-L-CC'],
  'L-MLO': ['PBCP_0604_T-L-MLO'],
  'R-CC': ['PBCP_0604_T-R-CC'],
  'R-CC

In [4]:
with open('/home/gp518/projects/GLAM/sample_data/exam_list_before_cropping.pkl', 'rb') as f:
    data = pickle.load(f)
data

[{'horizontal_flip': 'NO',
  'L-CC': ['0_L-CC'],
  'L-MLO': ['0_L-MLO'],
  'R-MLO': ['0_R-MLO'],
  'R-CC': ['0_R-CC'],
  'cancer_label': {'benign': 1,
   'right_benign': 1,
   'malignant': 0,
   'left_benign': 0,
   'unknown': 0,
   'right_malignant': 0,
   'left_malignant': 0},
  'L-CC_benign_seg': ['0_L-CC_benign'],
  'L-CC_malignant_seg': ['0_L-CC_malignant'],
  'L-MLO_benign_seg': ['0_L-MLO_benign'],
  'L-MLO_malignant_seg': ['0_L-MLO_malignant'],
  'R-MLO_benign_seg': ['0_R-MLO_benign'],
  'R-MLO_malignant_seg': ['0_R-MLO_malignant'],
  'R-CC_benign_seg': ['0_R-CC_benign'],
  'R-CC_malignant_seg': ['0_R-CC_malignant']},
 {'horizontal_flip': 'NO',
  'L-CC': ['1_L-CC'],
  'L-MLO': ['1_L-MLO'],
  'R-MLO': ['1_R-MLO'],
  'R-CC': ['1_R-CC'],
  'cancer_label': {'benign': 0,
   'right_benign': 0,
   'malignant': 1,
   'left_benign': 0,
   'unknown': 0,
   'right_malignant': 1,
   'left_malignant': 0},
  'L-CC_benign_seg': ['1_L-CC_benign'],
  'L-CC_malignant_seg': ['1_L-CC_malignant'],
 

In [5]:
with open('/local/scratch-2/gp518/preprocessed_data/PARTNER_cropped/exam_list_after_cropping.pkl', 'rb') as f:
    data = pickle.load(f)
data

[{'horizontal_flip': 'NO',
  'cancer_label': {'benign': 0,
   'right_benign': 0,
   'malignant': 1,
   'left_benign': 0,
   'unknown': 0,
   'right_malignant': 0,
   'left_malignant': 1},
  'L-CC': ['1179-L-CC'],
  'L-CC_malignant_seg': ['1179-L-CC'],
  'L-MLO': ['1179-L-MLO'],
  'L-MLO_malignant_seg': ['1179-L-MLO'],
  'R-CC': ['1179-R-CC'],
  'R-MLO': ['1179-R-MLO'],
  'window_location': {'L-CC': [(637, 4901, 0, 2850)],
   'R-CC': [(361, 4651, 2090, 4915)],
   'L-MLO': [(0, 4208, 0, 3338)],
   'R-MLO': [(0, 3964, 1638, 4915)]},
  'rightmost_points': {'L-CC': [((2302, 2345), 2799)],
   'R-CC': [((1909, 2513), 2775)],
   'L-MLO': [((2325, 2378), 3287)],
   'R-MLO': [((2134, 2166), 3227)]},
  'bottommost_points': {'L-CC': [(4213, (100, 102))],
   'R-CC': [(4239, (346, 468))],
   'L-MLO': [(4157, (100, 100))],
   'R-MLO': [(3913, (1611, 1650))]},
  'distance_from_starting_side': {'L-CC': [0],
   'R-CC': [0],
   'L-MLO': [0],
   'R-MLO': [0]}},
 {'horizontal_flip': 'NO',
  'cancer_label':

In [6]:
with open('/local/scratch-2/gp518/preprocessed_data/PARTNER_cropped/exam_list_final.pkl', 'rb') as f:
    data = pickle.load(f)
data

[{'horizontal_flip': 'NO',
  'cancer_label': {'benign': 0,
   'right_benign': 0,
   'malignant': 1,
   'left_benign': 0,
   'unknown': 0,
   'right_malignant': 0,
   'left_malignant': 1},
  'L-CC': ['1179-L-CC'],
  'L-CC_malignant_seg': ['1179-L-CC'],
  'L-MLO': ['1179-L-MLO'],
  'L-MLO_malignant_seg': ['1179-L-MLO'],
  'R-CC': ['1179-R-CC'],
  'R-MLO': ['1179-R-MLO'],
  'window_location': {'L-CC': [(637, 4901, 0, 2850)],
   'R-CC': [(361, 4651, 2090, 4915)],
   'L-MLO': [(0, 4208, 0, 3338)],
   'R-MLO': [(0, 3964, 1638, 4915)]},
  'rightmost_points': {'L-CC': [((2302, 2345), 2799)],
   'R-CC': [((1909, 2513), 2775)],
   'L-MLO': [((2325, 2378), 3287)],
   'R-MLO': [((2134, 2166), 3227)]},
  'bottommost_points': {'L-CC': [(4213, (100, 102))],
   'R-CC': [(4239, (346, 468))],
   'L-MLO': [(4157, (100, 100))],
   'R-MLO': [(3913, (1611, 1650))]},
  'distance_from_starting_side': {'L-CC': [0],
   'R-CC': [0],
   'L-MLO': [0],
   'R-MLO': [0]},
  'best_center': {'L-CC': [(2342, 1838)],
   