In [None]:
from pathlib import Path

import utils
import dataset

ROOT_DIR = Path('../')
utils.set_log_lvl('WARN')

labels = utils.load_json(ROOT_DIR / 'dataset' /
                         'Thyroid_AI_data_20220103.json')
mass_df = dataset.get_mass_info(labels)
mass_df.to_csv(ROOT_DIR / 'output' / 'mass.csv')

mass_df.info()

In [None]:
from pathlib import Path

import utils
import dataset

ROOT_DIR = Path('../')
utils.set_log_lvl('WARN')

path_list = list((ROOT_DIR / 'dataset' / 'ndpi').glob('*.ndpi'))
slide_df = dataset.get_slide_info(path_list)
slide_df.to_csv(ROOT_DIR / 'output' / 'slide_info.csv')

slide_df.info()

In [None]:
import pandas
import concurrent.futures
from pathlib import Path
from tqdm.auto import tqdm

import utils
from find_cell import find_cell

ROOT_DIR = Path('../')
utils.set_log_lvl('WARN')

slide_df = pandas.read_csv(
    ROOT_DIR / 'output' / 'slide_info.csv', index_col=0)
mass_df = pandas.read_csv(ROOT_DIR / 'output' / 'mass.csv', index_col=0)
# no source
mass_df = mass_df[mass_df['slide'].isin(slide_df.index)]
# blocklist
mass_df = mass_df[~mass_df['slide'].isin(['PTC_20_088_P'])]
# filter tag
mass_df = mass_df[mass_df['tag'].isin(['PTC', 'BFC'])]
# sample
mass_df = mass_df.sample(32, random_state=0xdeadbeef)
reader = utils.SlideReader(ROOT_DIR / 'dataset' / 'ndpi')


def job(data):
    index, value = data
    return find_cell(index, value, reader, slide_df.loc[value["slide"]])


with concurrent.futures.ThreadPoolExecutor() as executor:
    result = list(
        tqdm(executor.map(job, mass_df.iterrows()), total=len(mass_df)))
    result = [item for sublist in result for item in sublist]

    cell_df = pandas.DataFrame(
        result, columns=['mass', 'cx', 'cy', 'width', 'height', 'angle'])
    cell_df.to_csv(ROOT_DIR / 'output' / 'cell.csv')
    cell_df.info()

In [None]:
import cv2
import numpy
from pathlib import Path
from PIL import Image

import utils

ROOT_DIR = Path('../')
utils.set_log_lvl('WARN')

cell_df = pandas.read_csv(ROOT_DIR / 'output' / 'cell.csv', index_col=0)
mass_df = pandas.read_csv(ROOT_DIR / 'output' / 'mass.csv', index_col=0)
reader = utils.SlideReader(ROOT_DIR / 'dataset' / 'ndpi')
buf_display = utils.BufferedDisplay(4, 4, 400, 400)

for mass, group in cell_df.groupby('mass'):
    src = mass_df.loc[mass]
    image = reader.read_bbox(src["slide"], utils.get_bbox(src))  # type: ignore
    image = cv2.cvtColor(numpy.array(image), cv2.COLOR_RGBA2BGR)

    for _, cell in group.iterrows():
        image = cv2.ellipse(image, box=((cell['cx'], cell['cy']), (
            cell['width'], cell['height']), cell['angle']), color=(0, 255, 255))  # type: ignore

    image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    buf_display.display(image)

buf_display.flush()