In [23]:
import os
import json
import random
import xml.etree.ElementTree as ET

import torch
import torchvision.transforms.functional as FT

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# label_map
voc_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
              'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')

label_map = {k: v + 1 for v, k in enumerate(voc_labels)}
label_map['background'] = 0
rev_label_map = {v: k for k, v in label_map.items()}

# Color map for bounding boxes of detected objects from https://sashat.me/2017/01/11/list-of-20-simple-distinct-colors/
distinct_colors = ['#e6194b', '#3cb44b', '#ffe119', '#0082c8', '#f58231', '#911eb4', '#46f0f0', '#f032e6',
                   '#d2f53c', '#fabebe', '#008080', '#000080', '#aa6e28', '#fffac8', '#800000', '#aaffc3', '#808000',
                   '#ffd8b1', '#e6beff', '#808080', '#FFFFFF']

label_color_map = {k: distinct_colors[i] for i, k in enumerate(label_map.keys())}


def parse_annotation(annotation_path):
    """Parse Annotation

    Parameter
    ---------
    

    Return: dict
    ------
    boxes: a list of list
    labels: a list
    difficulties: a list

    """

    tree = ET.parse(annotation_path)
    root = tree.getroot()

    boxes = list()
    labels = list()
    difficulties = list()
    for object in root.iter('object'):

        difficult = int(object.find('difficult').text == '1')

        label = object.find('name').text.lower().strip()
        if label not in label_map:
            continue

        bbox = object.find('bndbox')
        xmin = int(bbox.find('xmin').text) - 1
        ymin = int(bbox.find('ymin').text) - 1
        xmax = int(bbox.find('xmax').text) - 1
        ymax = int(bbox.find('ymax').text) - 1

        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label_map[label])
        difficulties.append(difficult)

    return {'boxes': boxes, 'labels': labels, 'difficulties': difficulties}


def create_data_lists(voc07_path, output_folder):
    """
    Create lists of images, the bounding boxes and labels of the objects in these images, and save these to file.

    :param voc07_path: path to the 'VOC2007' folder
    ex) voc07_path = /data/yjkim/data/pascal/VOCdevkit/VOC2007
    
    :param voc12_path: path to the 'VOC2012' folder
    ex) voc12_path = /data/yjkim/data/pascal/VOCdevkit/VOC2012

    :param output_folder: folder where the JSONs must be saved
    ex) /data/yjkim/data/pascal/VOCdevkit/VOC2007/jsonlist
    """
    voc07_path = os.path.abspath('/disk1/jtku/dev/od/data/VOCdevkit/VOC2007')
   # output_folder = os.path.abspath('/home/jtku/dev/od/data/VOCdevkit/VOC2007/jsonlist')
    train_images = list()
    train_objects = list()
    n_objects = 0

    # Training data
    for path in [voc07_path]:

        # Find IDs of images in training data
        with open(os.path.join(path, 'ImageSets/Main/trainval.txt')) as f:
            ids = f.read().splitlines()

        for id in ids:
            # Parse annotation's XML file
            objects = parse_annotation(os.path.join(path, 'Annotations', id + '.xml'))
            if len(objects) == 0:
                continue
            n_objects += len(objects)
            train_objects.append(objects)
            train_images.append(os.path.join(path, 'JPEGImages', id + '.jpg'))

    assert len(train_objects) == len(train_images)

    # Save to file
    with open(os.path.join(output_folder, 'TRAIN_images.json'), 'w') as j:
        json.dump(train_images, j)
    with open(os.path.join(output_folder, 'TRAIN_objects.json'), 'w') as j:
        json.dump(train_objects, j)
    with open(os.path.join(output_folder, 'label_map.json'), 'w') as j:
        json.dump(label_map, j)  # save label map too

    print('\nThere are %d training images containing a total of %d objects. Files have been saved to %s.' % (
        len(train_images), n_objects, os.path.abspath(output_folder)))

    # Validation data
    test_images = list()
    test_objects = list()
    n_objects = 0

    # Find IDs of images in validation data
    with open(os.path.join(voc07_path, '/disk1/jtku/dev/od/data/VOCdevkit/VOC2007/ImageSets/Main/test.txt')) as f:
        ids = f.read().splitlines()

    for id in ids:
        # Parse annotation's XML file
        objects = parse_annotation(os.path.join(voc07_path, '/disk1/jtku/dev/od/data/VOCdevkit/VOC2007/Annotations', id + '.xml'))
        if len(objects) == 0:
            continue
        test_objects.append(objects)
        n_objects += len(objects)
        test_images.append(os.path.join(voc07_path, '/disk1/jtku/dev/od/data/VOCdevkit/VOC2007/JPEGImages', id + '.jpg'))

    assert len(test_objects) == len(test_images)

    # Save to file
    with open(os.path.join(output_folder, 'TEST_images.json'), 'w') as j:
        json.dump(test_images, j)
    with open(os.path.join(output_folder, 'TEST_objects.json'), 'w') as j:
        json.dump(test_objects, j)

    print('\nThere are %d validation images containing a total of %d objects. Files have been saved to %s.' % (
        len(test_images), n_objects, os.path.abspath(output_folder)))

In [24]:
create_data_lists('/disk1/jtku/dev/od/data/VOCdevkit/VOC2007', '/disk1/jtku/dev/od/data/VOCdevkit/VOC2007/jsonlist')


There are 5011 training images containing a total of 15033 objects. Files have been saved to /disk1/jtku/dev/od/data/VOCdevkit/VOC2007/jsonlist.

There are 4952 validation images containing a total of 14856 objects. Files have been saved to /disk1/jtku/dev/od/data/VOCdevkit/VOC2007/jsonlist.


# LITS MAKE_JSON


In [1]:
import os
import json
import random
import xml.etree.ElementTree as ET

import torch
import torchvision.transforms.functional as FT

#cup 지정해주기?
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


# label_map
voc_labels = ('background', 'tumor')

voc_labels = ('tumor','background')


label_map = {k: v + 1 for v, k in enumerate(voc_labels)}
print(label_map)
# 배경 키 값 0으로 지정
label_map['background'] = 0
print(label_map)
rev_label_map = {v: k for k, v in label_map.items()}

# Color map for bounding boxes of detected objects from https://sashat.me/2017/01/11/list-of-20-simple-distinct-colors/
distinct_colors = ['#e6194b', '#FFFFFF']

# Class & box color match
label_color_map = {k: distinct_colors[i] for i, k in enumerate(label_map.keys())}


def parse_annotation(annotation_path): 
    """Parse Annotation

    Parameter
    ---------
    

    Return: dict
    ------
    boxes: a list of list
    labels: a list
    difficulties: a list

    """

    tree = ET.parse(annotation_path)
    root = tree.getroot()

    boxes = list()
    labels = list()
    difficulties = list()
    
    for object in root.iter('object'):

        difficult = int(object.find('difficult').text == '1')

        label = object.find('name').text.lower().strip()
        if label not in label_map:
            continue

        bbox = object.find('bndbox')
        xmin = int(bbox.find('xmin').text) - 1
        ymin = int(bbox.find('ymin').text) - 1
        xmax = int(bbox.find('xmax').text) - 1
        ymax = int(bbox.find('ymax').text) - 1

        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label_map[label])
        difficulties.append(difficult)

    return {'boxes': boxes, 'labels': labels, 'difficulties': difficulties}


def create_data_lists(mask_path, output_folder):
    """
    Create lists of images, the bounding boxes and labels of the objects in these images, and save these to file.

    :param voc07_path: path to the 'VOC2007' folder
    ex) voc07_path = /data/yjkim/data/pascal/VOCdevkit/VOC2007
    
    :param voc12_path: path to the 'VOC2012' folder
    ex) voc12_path = /data/yjkim/data/pascal/VOCdevkit/VOC2012

    :param output_folder: folder where the JSONs must be saved
    ex) /data/yjkim/data/pascal/VOCdevkit/VOC2007/jsonlist
    """
    mask_path = os.path.abspath('/disk1/jtku/dev/mask/xml')
   # output_folder = os.path.abspath('/home/jtku/dev/od/data/VOCdevkit/VOC2007/jsonlist')
    train_images = list()
    train_objects = list()
    n_objects = 0

    # Training data
    for path in [mask_path]:

        # Find IDs of images in training data
        with open(os.path.join(path, 'ImageSets/Main/trainval.txt')) as f:
            ids = f.read().splitlines()

        for id in ids:
            # Parse annotation's XML file
            objects = parse_annotation(os.path.join(path, 'Annotations', id + '.xml'))
            if len(objects) == 0:
                continue
            n_objects += len(objects)
            train_objects.append(objects)
            train_images.append(os.path.join(path, 'JPEGImages', id + '.jpg'))

    assert len(train_objects) == len(train_images)

    # Save to file
    with open(os.path.join(output_folder, 'TRAIN_images.json'), 'w') as j:
        json.dump(train_images, j)
    with open(os.path.join(output_folder, 'TRAIN_objects.json'), 'w') as j:
        json.dump(train_objects, j)
    with open(os.path.join(output_folder, 'label_map.json'), 'w') as j:
        json.dump(label_map, j)  # save label map too

    print('\nThere are %d training images containing a total of %d objects. Files have been saved to %s.' % (
        len(train_images), n_objects, os.path.abspath(output_folder)))

    # Validation data
    test_images = list()
    test_objects = list()
    n_objects = 0

    # Find IDs of images in validation data
    with open(os.path.join(voc07_path, '/disk1/jtku/dev/od/data/VOCdevkit/VOC2007/ImageSets/Main/test.txt')) as f:
        ids = f.read().splitlines()

    for id in ids:
        # Parse annotation's XML file
        objects = parse_annotation(os.path.join(voc07_path, '/disk1/jtku/dev/od/data/VOCdevkit/VOC2007/Annotations', id + '.xml'))
        if len(objects) == 0:
            continue
        test_objects.append(objects)
        n_objects += len(objects)
        test_images.append(os.path.join(voc07_path, '/disk1/jtku/dev/od/data/VOCdevkit/VOC2007/JPEGImages', id + '.jpg'))

    assert len(test_objects) == len(test_images)

    # Save to file
    with open(os.path.join(output_folder, 'TEST_images.json'), 'w') as j:
        json.dump(test_images, j)
    with open(os.path.join(output_folder, 'TEST_objects.json'), 'w') as j:
        json.dump(test_objects, j)

    print('\nThere are %d validation images containing a total of %d objects. Files have been saved to %s.' % (
        len(test_images), n_objects, os.path.abspath(output_folder)))

IndexError: list index out of range

In [29]:
for object in root.iter('object'):
    print(object)

NameError: name 'root' is not defined

{'tumor': 1, 'background': 2}
{'tumor': 1, 'background': 0}


In [23]:
print("voc_lavels :", voc_labels)

print("\n\nlabel_map :",label_map)

print("\n\nrev_label_map :",rev_label_map)

print("\n\ndistinct_colors :",distinct_colors)

print("\n\nlabel_color_map :",label_color_map)

voc_lavels : ('tumor', 'background')


label_map : {'tumor': 1, 'background': 0}


rev_label_map : {1: 'tumor', 0: 'background'}


distinct_colors : ['#e6194b', '#FFFFFF']


label_color_map : {'tumor': '#e6194b', 'background': '#FFFFFF'}


In [2]:
voc_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
              'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')

label_map = {k: v + 1 for v, k in enumerate(voc_labels)}
label_map['background'] = 0
rev_label_map = {v: k for k, v in label_map.items()}

# Color map for bounding boxes of detected objects from https://sashat.me/2017/01/11/list-of-20-simple-distinct-colors/
distinct_colors = ['#e6194b', '#3cb44b', '#ffe119', '#0082c8', '#f58231', '#911eb4', '#46f0f0', '#f032e6',
                   '#d2f53c', '#fabebe', '#008080', '#000080', '#aa6e28', '#fffac8', '#800000', '#aaffc3', '#808000',
                   '#ffd8b1', '#e6beff', '#808080', '#FFFFFF']

label_color_map = {k: distinct_colors[i] for i, k in enumerate(label_map.keys())}

In [11]:
import nibabel as nib
from medpy.io import load, save
import numpy as np
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import parse

xmlp = ET.XMLParser(encoding="EUC-KR")

tree = ET.parse('/disk1/jtku/dev/mask/xml/000.xml', parser=xmlp)

root = tree.getroot()

SegmentationBoxList = root.findall("SegmentationBoxList")




ValueError: multi-byte encodings are not supported

In [20]:
import os
import numpy as np
import nibabel as nib

img = nib.load("/disk1/jtku/dev/mask/convert/segmentation-0.nii")

img.shape



# img_data = img.get_data()

# img_data_arr = np.asarray(img_data)

# print(img_data_arr)

(512, 512, 75)

In [32]:
from xml.etree.ElementTree import parse



tree = parse('/disk1/jtku/dev/mask/xml/000.xml')

s = b'\xc4\xda\xbd\xba\xc7\xc7\xc1\xf6\xbc\xf6'
type(s), s 

u = s.decode('euc-kr')
type(u), u

ValueError: multi-byte encodings are not supported

# Final_make_json

In [39]:
import os
import glob
import json

# Making a json file
from od.utils import create_data_lists
import xml.etree.ElementTree as ET

HCC_PATH = "/disk1/jtku/dev/final"
OUTPUT_PATH = os.path.join(HCC_PATH, "jsonpath")


# paths // 폴더 안의 파일들 path list 불러오기
dcm_paths = glob.glob(os.path.join(HCC_PATH,"dcmImages","*.dcm"))
label_map = {"background":0,"hcc":1}

def hcc_parse_annotation(annotation_path):
    tree = ET.parse(annotation_path)
    root = tree.getroot()
    
    boxes = list()
    labels = list()
    
    for object in root.iter('object'):
        label = "hcc"
        if label not in label_map:
            continue
        bbox = object.find('bndbox')
        xmin = int(bbox.find('xmin').text) - 1
        ymin = int(bbox.find('ymin').text) - 1
        xmax = int(bbox.find('xmax').text) - 1
        ymax = int(bbox.find('ymax').text) - 1
        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label_map[label])
    
    return {'boxes': boxes, 'labels': labels}
def create_hcc_data_lists(paths,  output_folder):
    paths = os.path.abspath(paths)
    #voc12_path = os.path.abspath(voc12_path)
    train_images = list()
    train_objects = list()
    # Training data
    for path in dcm_paths[:-148]:
        file = path.split(sep="/")[-1] # 맨마지막
        idx = file.replace(".dcm","") # 확장자 제거
        objects = hcc_parse_annotation(os.path.join(HCC_PATH, 'Annotations', idx + '.xml'))
        train_objects.append(objects)
        train_images.append(os.path.join(HCC_PATH, 'dcmImages', idx + '.dcm'))
            
    assert len(train_objects) == len(train_images), 'train_objects == train_images가 아니랸다 친구야'
    
    
    # Save to file
    with open(os.path.join(output_folder, 'TRAIN_images.json'), 'w') as j:
        json.dump(train_images, j)
    with open(os.path.join(output_folder, 'TRAIN_objects.json'), 'w') as j:
        json.dump(train_objects, j)
    with open(os.path.join(output_folder, 'label_map.json'), 'w') as j:
        json.dump(label_map, j)  # save label map too
    print('\nThere are %d training images. Files have been saved to %s.' % (
        len(train_images), os.path.abspath(output_folder)))
    # Validation data
    test_images = list()
    test_objects = list()
    
    for path in dcm_paths[-148:]:
        file = path.split(sep="/")[-1] # 맨마지막
        idx = file.replace(".dcm","") # 확장자 제거
        objects = hcc_parse_annotation(os.path.join(HCC_PATH, 'Annotations', idx + '.xml'))
        test_objects.append(objects)
        test_images.append(os.path.join(HCC_PATH, 'dcmImages', idx + '.dcm'))
            
    assert len(test_objects) == len(test_images)
    # Save to file
    with open(os.path.join(output_folder, 'TEST_images.json'), 'w') as j:
        json.dump(test_images, j)
    with open(os.path.join(output_folder, 'TEST_objects.json'), 'w') as j:
        json.dump(test_objects, j)
    print('\nThere are %d validation images. Files have been saved to %s.' % (
        len(test_images),os.path.abspath(output_folder)))

In [40]:
create_hcc_data_lists('/disk1/jtku/dev/final/Annotations/', '/disk1/jtku/dev/final/jsonpath/')


There are 592 training images. Files have been saved to /disk1/jtku/dev/final/jsonpath.

There are 148 validation images. Files have been saved to /disk1/jtku/dev/final/jsonpath.


In [None]:
model.train()
train_loss = []
train_accu = []
i = 0
for epoch in range(15):
    for data, target in train_loader:
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()    # calc gradients
        train_loss.append(loss.data[0])
        optimizer.step()   # update gradients
        prediction = output.data.max(1)[1]   # first column has actual prob.
        accuracy = prediction.eq(target.data).sum()/batch_size*100
        train_accu.append(accuracy)
        if i % 1000 == 0:
            print('Train Step: {}\tLoss: {:.3f}\tAccuracy: {:.3f}'.format(i, loss.data[0], accuracy))
        i += 1