In [None]:
# Parse Labels (output mass.csv)

from pathlib import Path

import utils
import dataset

ROOT_DIR = Path('../')
utils.set_log_lvl('WARN')

labels = utils.load_json(ROOT_DIR / 'dataset' /
                         'Thyroid_AI_data_20220103.json')
mass_df = dataset.get_mass_info(labels)
mass_df.to_csv(ROOT_DIR / 'output' / 'mass.csv')

mass_df.info()

In [None]:
# Slide Info (output slide_info.csv)

from pathlib import Path

import utils
import dataset

ROOT_DIR = Path('../')
utils.set_log_lvl('WARN')

path_list = list((ROOT_DIR / 'dataset' / 'ndpi').glob('*.ndpi'))
slide_df = dataset.get_slide_info(path_list)
slide_df.to_csv(ROOT_DIR / 'output' / 'slide_info.csv')

slide_df.info()

In [None]:
# Find Cells (output cell.csv)

import pandas
import concurrent.futures
from pathlib import Path
from tqdm.auto import tqdm

import utils
from find_cell import find_cell

ROOT_DIR = Path('../')
utils.set_log_lvl('WARN')

slide_df = pandas.read_csv(
    ROOT_DIR / 'output' / 'slide_info.csv', index_col=0)
mass_df = pandas.read_csv(ROOT_DIR / 'output' / 'mass.csv', index_col=0)
# no source
mass_df = mass_df[mass_df['slide'].isin(slide_df.index)]
# blocklist
mass_df = mass_df[~mass_df['slide'].isin(['PTC_20_088_P'])]
# filter tag
mass_df = mass_df[mass_df['tag'].isin(['PTC', 'BFC'])]
# sample
# mass_df = mass_df.sample(64, random_state=0xdeadbeef)


reader = None


def proc_init(folder):
    global reader
    utils.set_log_lvl('WARN')
    reader = utils.SlideReader(folder)


def transfom_args(row):
    (uid, slide, _, x_min, y_min, width, height) = row
    return (uid, slide, (x_min, y_min, width, height), slide_df.loc[slide].to_dict())


def proxy(args):
    global reader
    (uid, slide, bbox, info) = args
    result = find_cell(slide, bbox, reader, info)  # type: ignore
    result = [(uid,) + item for item in result]
    return result


with concurrent.futures.ProcessPoolExecutor(
    initializer=proc_init,
    initargs=(ROOT_DIR / 'dataset' / 'ndpi',),
) as executor:
    jobs = map(transfom_args, mass_df.itertuples())
    result = tqdm(executor.map(proxy, jobs, chunksize=16), total=len(mass_df))
    result = [item for sublist in result for item in sublist]

    cell_df = pandas.DataFrame(
        result, columns=['mass_uid', 'cx', 'cy', 'width', 'height', 'angle', 'score'])
    cell_df.to_csv(ROOT_DIR / 'output' / 'cell.csv')
    cell_df.info()

In [None]:
# Visualize Mass

import cv2
import numpy
import pandas
import concurrent.futures
from tqdm.auto import tqdm
from pathlib import Path
from PIL import Image

import utils

ROOT_DIR = Path('../')
utils.set_log_lvl('WARN')

cell_df = pandas.read_csv(ROOT_DIR / 'output' / 'cell.csv', index_col=0)
mass_df = pandas.read_csv(ROOT_DIR / 'output' / 'mass.csv', index_col=0)
reader = utils.SlideReader(ROOT_DIR / 'dataset' / 'ndpi')

def read_mass(data):
    mass_uid, cells = data
    mass = mass_df.loc[mass_uid]
    image = reader.read_bbox(
        mass["slide"], utils.get_bbox(mass))  # type: ignore
    image = cv2.cvtColor(numpy.array(image), cv2.COLOR_RGBA2BGR)

    for cell in cells.itertuples(index=False):
        box = ((cell.cx, cell.cy), (cell.width, cell.height), cell.angle)
        hue = round(200 * cell.score)  # type: ignore
        image = cv2.ellipse(image, box=box, color=(  # type: ignore
            40, 55 + hue, 255 - hue))

    return Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))


mass_iter = cell_df.groupby('mass_uid')
with concurrent.futures.ThreadPoolExecutor() as executor:
    for mass in tqdm(executor.map(read_mass, mass_iter), total=len(mass_iter)):
        display(mass)

In [None]:
# Visualize Cell

import numpy
import math
import pandas
import concurrent.futures
from tqdm.auto import tqdm
from pathlib import Path
from PIL import Image

import utils

ROOT_DIR = Path('../')
utils.set_log_lvl('WARN')

cell_df = pandas.read_csv(ROOT_DIR / 'output' /
                          'cell.csv', index_col=0)
# filter score
cell_df = cell_df[cell_df['score'] > 0.75]
# sort score
cell_df = cell_df.sort_values('score')

mass_df = pandas.read_csv(ROOT_DIR / 'output' / 'mass.csv', index_col=0)
reader = utils.SlideReader(ROOT_DIR / 'dataset' / 'ndpi')
buf_display = utils.BufferedDisplay(40, 40, 50, 50)


def read_cell(cell):
    mass = mass_df.loc[cell.mass_uid]

    off_x = math.floor(cell.cx - 60)  # type: ignore
    off_y = math.floor(cell.cy - 60)  # type: ignore
    cx = cell.cx - off_x
    cy = cell.cy - off_y
    cw = math.ceil(cell.width * 1.4)  # type: ignore
    ch = math.ceil(cell.height * 1.4)  # type: ignore

    image = reader.read_bbox(
        # type: ignore
        mass["slide"], (mass["x_min"] + off_x, mass["y_min"] + off_y, 120, 120))
    image = numpy.array(image.convert("RGB"))

    cell_image = utils.crop_rotated_rectangle(
        image, (cx, cy), (cw, ch), cell.angle)  # type: ignore

    return Image.fromarray(cell_image)


with concurrent.futures.ThreadPoolExecutor() as executor:
    for cell in tqdm(executor.map(read_cell, cell_df.itertuples(index=False)), total=len(cell_df)):
        buf_display.display(cell)

buf_display.flush()

In [None]:
# Describe Score

import pandas
from pathlib import Path

ROOT_DIR = Path('../')
cell_df = pandas.read_csv(ROOT_DIR / 'output' /
                          'cell.csv', index_col=0)
cell_df['score'].describe()