# Preprocess

In [1]:
import os
from uuid import uuid4

from prefect import flow, task
import numpy as np
from skimage.filters import try_all_threshold
import matplotlib.pyplot as plt

from bcd.container import BCDContainer
from bcd.preprocess.image.convert import ImageConverter, ImageConverterParams
from bcd.core.orchestration.task import Task
from bcd.dal.repo.task import TaskRepo
from bcd.config import Config


In [2]:
Config.set_log_level('INFO')
Config.set_mode('dev')

In [3]:
IMAGE_FILEPATH = "data/image/1_dev"

In [4]:
container = BCDContainer()
container.init_resources()
container.wire(
    packages=[
        "bcd.preprocess.image",
    ]
)
task_repo = container.dal.task_repo()
image_repo = container.dal.image_repo()

## Stage 0: Convert Images

In [5]:
def images_converted() -> bool:
    return len(os.listdir(IMAGE_FILEPATH)) > 0

#@task
def convert_images(frac: float):    
    params = ImageConverterParams(frac=frac)
    task = Task.create(method=ImageConverter, params=params)
    task.job_id = str(uuid4())
    task.run()
    task_repo.add(task=task)
    

#@flow(name="Create Images", log_prints=True)
def create_images(frac: float = 0.1, force: bool = False):
    """Creates png images in the current environment."""
    if force or not images_converted():
        image_repo.delete_by_mode()
        convert_images(frac=frac)

create_images(force=True)

100%|██████████| 354/354 [04:46<00:00,  1.23it/s]


## Stage 1: Artifact Removal

### Stage 1.0 Binary Thresholding

In [None]:
def plot_image_thresholds(image: np.ndarray):
    fig, ax = try_all_threshold(image, figsize=(10,8), verbose=False)
    plt.show()


In [None]:
def plot_thresholds():
    STRATUM = ['abnormality_type', 'cancer', 'image_view']
    _, images = image_repo.sample(n=2, groupby=STRATUM)
    for _, image in images.items():
        plot_image_thresholds(image.pixel_data.astype('float'))  

plot_thresholds()