In [1]:
from PIL import Image
from scipy.ndimage import gaussian_filter
from skimage.measure import label, regionprops
from skimage.segmentation import expand_labels
from typing import Tuple
from IPython.display import display
import plotly.graph_objects as go
import tqdm
import os
import matplotlib.pyplot as plt
import numpy as np

In [2]:
ECG_FOLDER = '../../data/ecg_ahus_phone'
SAVE_FOLDER = '../../data/redacted_ecg_ahus_phone'
DISPLAY = False

In [7]:
def redact_image(
        image_path: str,
        redacted_treshold_grayscale: int = 60,
        min_redacted_area_proportion: float = 3e-5,
        expand_redact_pixels: int = 10,
        display: bool = False
    ) -> Tuple[np.ndarray, np.ndarray]:


    img = Image.open(image_path)
    img_gray = img.convert('L')
    img_np = np.array(img_gray)

    if display:
        plt.imshow(img_np)

    img_blurred = gaussian_filter(img_np, sigma=5)
    redacted = img_blurred < redacted_treshold_grayscale

    if display:
        plt.figure()
        plt.imshow(redacted, cmap='gray')
        plt.show()


    labeled = label(redacted)
    regions = regionprops(labeled)

    regions_filtered = []
    ids = []

    for region in regions:
        if region.area / img_np.size > min_redacted_area_proportion:
            regions_filtered.append(region)
            ids.append(region.label)


    areas = [region.area for region in regions_filtered]
    perimeters = [region.perimeter for region in regions_filtered]

    if display:
        plt.plot(areas, perimeters, 'o')
        plt.xlabel('Area')
        plt.ylabel('Perimeter')
        plt.title('Area vs Perimeter of Regions')
        plt.grid()

    regions_after_filtering = np.isin(labeled, ids)
    regions_after_filtering = expand_labels(regions_after_filtering, distance=expand_redact_pixels)

    if display:
        plt.figure()
        plt.imshow(regions_after_filtering, cmap='gray')

    img_redacted = np.array(img)
    img_redacted[regions_after_filtering > 0] = np.array([0, 255, 0])

    return img_redacted, regions_after_filtering


names = []
sizes = []


scatter = go.Scatter(
    x=[], y=[], mode='markers+text',
    text=[], textposition='top center',
    marker=dict(size=10)
)

fig = go.FigureWidget(data=[scatter])
display(fig)

image_names = tqdm.tqdm(os.listdir(ECG_FOLDER), desc='Redacting images', unit='image')

for image_name in image_names:
    path = os.path.join(ECG_FOLDER, image_name)

    redacted_image, regions_after_filtering = redact_image(path, display=DISPLAY)
    names.append(image_name)
    sizes.append(np.sum(regions_after_filtering > 0)/regions_after_filtering.size)

    Image.fromarray(redacted_image).save(os.path.join(SAVE_FOLDER, image_name))

    with fig.batch_update():
        fig.data[0].x = np.zeros_like(sizes)
        fig.data[0].y = sizes
        fig.data[0].text = names



FigureWidget({
    'data': [{'marker': {'size': 10},
              'mode': 'markers+text',
              'text': [],
              'textposition': 'top center',
              'type': 'scatter',
              'uid': 'af0807f9-2614-4c8a-8f38-37a09fbba081',
              'x': [],
              'y': []}],
    'layout': {'template': '...'}
})

Redacting images:   0%|          | 0/325 [00:00<?, ?image/s]

Redacting images:  25%|██▍       | 80/325 [01:47<05:29,  1.35s/image]


KeyboardInterrupt: 