# Data Preparation

In [1]:
import numpy as np
import os
from read_roi import read_roi_file
import shutil
from tqdm.notebook import tqdm

In [None]:
def mkdir(path):
    if not os.path.exists(path):os.mkdir(path)

In [2]:
def parse_roi(name, img_size=512):
    """
    return: class, relative coords, relative size of the bounding box 
    """
    path = os.path.join("practice", name, f"Device_{name}_0000.roi")
    roi = read_roi_file(path)[f"Device_{name}_0000"]
    x, y, w, h = roi['left'], roi['top'], roi['width'], roi['height']
    rx, ry, rw, rh = np.array([x, y, w, h])/img_size
    rcx = rx+rw/2
    rcy = ry+rh/2
    line = f"{0} {rcx} {rcy} {rw} {rh}"
    return line

In [3]:
def write_label(name, label_dir="labels"):
    """
    parse roi file > write label txt file
    """
    content = parse_roi(name)
    label_path = os.path.join(label_dir, f"{name}.txt")
    with open(label_path, "w") as labelFile:
        labelFile.write(content)

In [6]:
def write_meta(names, fname, meta_dir="meta", image_dir="images"):
    meta_file = os.path.join(meta_dir, f"{fname}.txt")
    with open(meta_file, "w") as file:
        for name in tqdm(names):
            line = os.path.join(image_dir, f"{name}.png\n")
            file.write(line)

In [10]:
dirs = ("images", "labels", "meta", "cfg", "backup")
image_dir, label_dir, meta_dir, cfg_dir, backup_dir = dirs
for directory in dirs: mkdir(directory)

## 1. Image, Label

In [5]:
names = [x for x in os.listdir("practice") if os.path.isdir("practice/" + x)]

In [11]:
for name in tqdm(names):
    write_label(name, label_dir)

HBox(children=(FloatProgress(value=0.0, max=2000.0), HTML(value='')))




In [13]:
for name in tqdm(names):
    src = os.path.join("practice", name, f"{name}.png")
    dst = os.path.join(image_dir, f"{name}.png")
    shutil.copyfile(src, dst)

HBox(children=(FloatProgress(value=0.0, max=2000.0), HTML(value='')))




## 2. Meta

In [7]:
np.random.seed(0)
sample = names[:]
np.random.shuffle(sample)
valid_size = 200
train, valid = sample[:-valid_size], sample[-valid_size:]
if not os.path.exists(meta_dir):
    os.mkdir(meta_dir)

In [8]:
write_meta(train, "train", meta_dir, image_dir)
write_meta(valid, "valid", meta_dir, image_dir)

HBox(children=(FloatProgress(value=0.0, max=1800.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))




# 3. Config

In [12]:
data_path = os.path.join(cfg_dir, "cxr.data")
with open(data_path, "w") as datafile:
    content = f"""classes=1
train = {os.path.join(meta_dir, "train.txt")}
valid = {os.path.join(meta_dir, "valid.txt")}
names = {os.path.join(cfg_dir, "cxr.names")}
backup = {backup_dir}
    """
    datafile.write(content)

In [13]:
with open(os.path.join(cfg_dir, "cxr.names"), "w") as namefile:
    namefile.write("L")