In [1]:
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import cv2
from tqdm import tqdm

In [None]:
metadata_csv = "./data/DDSM_marked/metadata.csv"
df = pd.read_csv(metadata_csv, index_col=0)
df["name"] = df["patient_id"] + "_" + df["left or right breast"] + "_" +  df["image view"] + "_" +  df["abnormality type"] + ".jpg" 
df = df[df["abnormality type"] == "mass"]


In [2]:
df = pd.read_csv("data/df.csv", index_col=0)
df.head()
df["abnormality_type"].unique()

array(['mass'], dtype=object)

In [8]:
df.query("patient_id == 'P_01682'")

Unnamed: 0,patient_id,breast_density,left_or_right_breast,image_view,abnormality_id,abnormality_type,mass_shape,mass_margins,assessment,pathology,subtlety,calc_type,calc_distribution
1173,P_01682,3,LEFT,CC,1,mass,IRREGULAR,ILL_DEFINED,5,MALIGNANT,3,,
1174,P_01682,3,LEFT,MLO,1,mass,LOBULATED,MICROLOBULATED,5,MALIGNANT,5,,


In [6]:
df["assessment"].value_counts()

assessment
4    702
5    372
3    364
0    162
2     91
1      3
Name: count, dtype: int64

In [24]:
mask_file = f"./data/DDSM_marked/masks/{df.iloc[0]['name']}"
scan_file = f"./data/DDSM_marked/resized/{df.iloc[0]['name']}"
scan = cv2.imread(scan_file)
mask = cv2.imread(mask_file, cv2.IMREAD_GRAYSCALE)
# mask = mask // 255

In [91]:
def create_masked_image(scan_file, mask_file):
    image = cv2.imread(scan_file)
    mask = cv2.imread(mask_file, cv2.IMREAD_GRAYSCALE)
    mask = mask // 255

    flood_fill_mask = mask.copy()
    h, w = mask.shape[:2]
    flood_fill_temp = np.zeros((h + 2, w + 2), np.uint8)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(contour)
    flood_fill_seed_point = (x + w // 2, y + h // 2)
    cv2.floodFill(flood_fill_mask, flood_fill_temp, flood_fill_seed_point, 1)
    flood_fill_mask = cv2.bitwise_not(flood_fill_mask)
    combined_mask = cv2.bitwise_not(cv2.bitwise_or(mask, flood_fill_mask))

    masked_image = np.zeros_like(image)

    # Copy the original image values to the new image only where the combined mask is 1
    masked_image[combined_mask == 1] = image[combined_mask == 1]

    masked_image_file = Path("./data/masked_images")/Path(scan_file).name

    cv2.imwrite(str(masked_image_file), masked_image)


In [92]:
for row in tqdm(df.iterrows(), total=len(df)):
    scan_file = f"./data/DDSM_marked/resized/{row[1]['name']}"
    mask_file = f"./data/DDSM_marked/masks/{row[1]['name']}"
    create_masked_image(scan_file, mask_file)

100%|██████████| 1696/1696 [00:35<00:00, 47.42it/s]
