In [13]:
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.config import get_cfg
from detectron2.engine import default_argument_parser, default_setup, launch
from typing import Dict, Set, List, Tuple, Iterator
import os
import logging
import json
import copy

# hacky way to register
from ubteacher.modeling import *
from ubteacher.engine import *
from ubteacher.engine.trainer import UBTeacherTrainer, UBRCNNTeacherTrainer, BaselineTrainer
from ubteacher import add_ubteacher_config
from ubteacher.utils.utils import (TrainHelper, 
                    get_categorical_map, 
                    get_annotypes_for_dataset)

In [16]:
train_sample = '/mnt/RSX/Datasets_pathology/SRI_OSCC_lymph_train'

In [53]:
def find_anno_dir(parent_dir: str) -> List[str]:
    """
    Find qupath exported annotations directory
    """
    
    if os.path.exists(os.path.join(parent_dir, 'xupath_annotations_latest')):
        return os.path.join(parent_dir, 'qupath_annotations_latest')
    else:
        anno_dirs = []
        for root, dirs, files in os.walk(parent_dir):
            for d in dirs:
                if 'annotations' in d:
                    anno_dirs.append(os.path.join(root, d))
        # user chooses if there are multiple annotation folders
        print('Found multiple annotation folders:')
        for i, anno_dir in enumerate(anno_dirs):
            print(f'{i}: {os.path.relpath(anno_dir, parent_dir)}')
        choice = input('Choose annotation folder index')
        if choice.isdigit() and int(choice) < len(anno_dirs):
            return anno_dirs[int(choice)]    
        else:
            raise ValueError('Annotation folder not found')

In [110]:
def search_recursive(d: Dict, key: str) -> Iterator:
        """Helper function for finding which level of json annotations has
        the matching key.
        """
        for k, v in d.items():
            if isinstance(v, Dict):
                for match in search_recursive(v, key):
                    yield match
            if k == key:
                # generator function - saves in memory until called
                # (use for loop to call)
                yield v

In [89]:
find_anno_dir(train_sample)

Found multiple annotation folders:
0: qupath_annotations_latest
1: qupath_annotations_V4
2: qupath_annotations_V3
3: qupath_annotations_V2
4: qupath_annotations_V1


ValueError: Annotation folder not found

In [90]:
def parse_json_by_task(json_file):
    """
    Parse json file by task
    """
    with open(json_file, 'r') as f:
        data = json.load(f)
        return data

In [125]:
json_file = '/mnt/RSX/Datasets_pathology/SRI_OSCC_lymph_train/qupath_annotations_latest/Case 1 G7.json'
data = parse_json_by_task(json_file)
tissue_types = ['lymph', 'non-lymph']
tissue_data = []
for i in data:
    if any(tissue in list(search_recursive(i, 'name')) for tissue in tissue_types):
        tissue_data.append(i)

In [169]:
# get coordinates for all relevant tissues and create boxes

def get_box_coords(json_file, tissue_types):
    
    with open(json_file, 'r') as f:
        data = json.load(f)
    tissue_data = []
    for i in data:
        if any(tissue in list(search_recursive(i, 'name')) for tissue in tissue_types):
            tissue_data.append(i)
    coords = []
    for k in tissue_data:
        ## add names to k 
        k['geometry']['name'] = list(search_recursive(k, 'name'))[0]
        del k['geometry']['type']
        coords.append(next(search_recursive(k, 'geometry')))
        
    return coords

In [168]:
get_box_coords(json_file, tissue_types)

[{'coordinates': [[[14701, 1589],
    [39083, 1589],
    [39083, 27357],
    [14701, 27357],
    [14701, 1589]]],
  'name': 'non-lymph'},
 {'coordinates': [[[1629, 10865],
    [21905, 10865],
    [21905, 32764],
    [1629, 32764],
    [1629, 10865]]],
  'name': 'non-lymph'}]

In [None]:
## get to detectron2 coco format