In [None]:
import numpy as np 
import scipy as sp
import scipy.misc, scipy.ndimage.interpolation
import pandas as pd
import os
import nibabel as nib
import cv2
import matplotlib.pyplot as plt 
from skimage import data, exposure
from glob import glob
import json
import warnings
warnings.filterwarnings('ignore', '.*output shape of zoom.*')

In [None]:
# Find bounding box of 3d data
def bounding_box(image, expand_voxels = 10):
    rows = np.where(np.any(image, axis = (0, 2)))[0]
    cols = np.where(np.any(image, axis = (0, 1)))[0]
    depths = np.where(np.any(image, axis = (1, 2)))[0]
        
    if rows.size == 0 or cols.size == 0 or depths.size == 0:
        return -1, -1, -1, -1, -1, -1
    else:
        rmin, rmax = rows[[0, -1]]
        cmin, cmax = cols[[0, -1]]
        dmin, dmax = depths[[0, -1]]
            
        max_row = image.shape[1]
        max_col = image.shape[2]
        
        rmin = 0 if rmin - expand_voxels < 0 else rmin - expand_voxels
        rmax = max_row if rmax + expand_voxels >= max_row else rmax + expand_voxels + 1
        cmin = 0 if cmin - expand_voxels < 0 else cmin - expand_voxels
        cmax = max_col if cmax + expand_voxels >= max_col else cmax + expand_voxels + 1       
        
        return rmin, rmax, cmin, cmax, dmin, dmax + 1

In [None]:
def create_train_json(image_paths, json_paths, json_save_path):
    #     categories = [{'id': 0, 'name': 'zh_str'}, {'id': 1, 'name': 'zh_ch'}, {'id': 2, 'name': 'en+num_str'}, 
#                   {'id': 3, 'name': 'zh+en+num_str'}, {'id': 4, 'name': 'zh_str+ch'}, {'id': 5, 'name': 'other'}]
    categories = [{'id': 0, 'name': 'text'}]
    
    if os.path.isfile(json_save_path):
        with open(json_save_path, 'r') as f:
            json_file = json.load(f)
        annotations = json_file['annotations']
        image_infos = json_file['images']
    else:
        json_file = {}
        annotations = []
        image_infos = []
        
    annotation_id = len(annotations)
    image_id = len(image_infos)
    for index in range(len(image_paths)):
        # Load image
        img = cv2.imread(image_paths[index])
        # Create annotation for image
        image_info = {
            "id": image_id,
            "file_name": 'TrainDataset/img/' + os.path.split(image_paths[index])[-1],
            "segm_file": 'TrainDataset/img/' + 'gt_' + os.path.split(image_paths[index])[-1][:-3] + 'txt',
            "width": img.shape[1],
            "height": img.shape[0]
        }
        image_infos.append(image_info)
        #print(image_info)
        
        if json_paths is not None:
            with open(json_paths[index], 'r') as f:
                anns = json.load(f)
            
            for ann in anns['shapes']:
                points = np.array(ann['points']).astype('int32')
                area = cv2.contourArea(points)
                #category_id = ann['group_id']
                category_id = 0 if ann['group_id'] != 255 else 255
                
                annotation_info = {
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": category_id,
                    "iscrowd": 0,
                    "area": area,
                    "bbox": [int(min(points[:,0])), int(min(points[:,1])), int(max(points[:,0]) - min(points[:,0])), int(max(points[:,1]) - min(points[:,1]))],
                    "segmentation": [points.flatten().tolist()]
                }
                #print(annotation_info)
                annotations.append(annotation_info)
                annotation_id += 1
        image_id += 1
        
    json_file.update({'images': image_infos, 'annotations': annotations, 'categories': categories})
    with open(json_save_path, 'w') as outfile:
        json.dump(json_file, outfile)
        
        
def create_test_json(image_paths, json_paths, json_save_path):
#     categories = [{'id': 0, 'name': 'zh_str'}, {'id': 1, 'name': 'zh_ch'}, {'id': 2, 'name': 'en+num_str'}, 
#                   {'id': 3, 'name': 'zh+en+num_str'}, {'id': 4, 'name': 'zh_str+ch'}, {'id': 5, 'name': 'other'}]
    categories = [{'id': 0, 'name': 'text'}]
    
    if os.path.isfile(json_save_path):
        with open(json_save_path, 'r') as f:
            json_file = json.load(f)
        annotations = json_file['annotations']
        image_infos = json_file['images']
    else:
        json_file = {}
        annotations = []
        image_infos = []
        
    annotation_id = len(annotations)
    image_id = len(image_infos)
    for index in range(len(image_paths)):
        # Load image
        img = cv2.imread(image_paths[index])
        # Create annotation for image
        image_info = {
            "id": image_id,
            "file_name": 'PublicTestDataset/img/' + os.path.split(image_paths[index])[-1],
            "segm_file": 'PublicTestDataset/img/' + 'gt_' + os.path.split(image_paths[index])[-1][:-3] + 'txt',
            "width": img.shape[1],
            "height": img.shape[0]
        }
        image_infos.append(image_info)
        #print(image_info)
        
        if json_paths is not None:
            with open(json_paths[index], 'r') as f:
                anns = json.load(f)
            
            for ann in anns['shapes']:
                points = np.array(ann['points']).astype('int32')
                area = cv2.contourArea(points)
                #category_id = ann['group_id']
                category_id = 0 if ann['group_id'] != 255 else 255
                
                annotation_info = {
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": category_id,
                    "iscrowd": 0,
                    "area": area,
                    "bbox": [int(min(points[:,0])), int(min(points[:,1])), int(max(points[:,0]) - min(points[:,0])), int(max(points[:,1]) - min(points[:,1]))],
                    "segmentation": [points.flatten().tolist()]
                }
                #print(annotation_info)
                annotations.append(annotation_info)
                annotation_id += 1
        image_id += 1
        
    json_file.update({'images': image_infos, 'annotations': annotations, 'categories': categories})
    with open(json_save_path, 'w') as outfile:
        json.dump(json_file, outfile)
        
        
def create_private_test_json(image_paths, json_paths, json_save_path):
#     categories = [{'id': 0, 'name': 'zh_str'}, {'id': 1, 'name': 'zh_ch'}, {'id': 2, 'name': 'en+num_str'}, 
#                   {'id': 3, 'name': 'zh+en+num_str'}, {'id': 4, 'name': 'zh_str+ch'}, {'id': 5, 'name': 'other'}]
    categories = [{'id': 0, 'name': 'text'}]
    
    if os.path.isfile(json_save_path):
        with open(json_save_path, 'r') as f:
            json_file = json.load(f)
        annotations = json_file['annotations']
        image_infos = json_file['images']
    else:
        json_file = {}
        annotations = []
        image_infos = []
        
    annotation_id = len(annotations)
    image_id = len(image_infos)
    for index in range(len(image_paths)):
        # Load image
        img = cv2.imread(image_paths[index])
        # Create annotation for image
        image_info = {
            "id": image_id,
            "file_name": 'PrivateTestDataset/img/' + os.path.split(image_paths[index])[-1],
            "segm_file": 'PrivateTestDataset/img/' + 'gt_' + os.path.split(image_paths[index])[-1][:-3] + 'txt',
            "width": img.shape[1],
            "height": img.shape[0]
        }
        image_infos.append(image_info)
        #print(image_info)
        
        if json_paths is not None:
            with open(json_paths[index], 'r') as f:
                anns = json.load(f)
            
            for ann in anns['shapes']:
                points = np.array(ann['points']).astype('int32')
                area = cv2.contourArea(points)
                #category_id = ann['group_id']
                category_id = 0 if ann['group_id'] != 255 else 255
                
                annotation_info = {
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": category_id,
                    "iscrowd": 0,
                    "area": area,
                    "bbox": [int(min(points[:,0])), int(min(points[:,1])), int(max(points[:,0]) - min(points[:,0])), int(max(points[:,1]) - min(points[:,1]))],
                    "segmentation": [points.flatten().tolist()]
                }
                #print(annotation_info)
                annotations.append(annotation_info)
                annotation_id += 1
        image_id += 1
        
    json_file.update({'images': image_infos, 'annotations': annotations, 'categories': categories})
    with open(json_save_path, 'w') as outfile:
        json.dump(json_file, outfile)

In [None]:
path_for_train_data = './TrainDataset/img/'
path_for_train_json = './TrainDataset/json/'
path_for_test_data = './PublicTestDataset/img/'
path_for_private_test_data = './PrivateTestDataset/img/'

train_prefix_len = len(path_for_train_data) + len('img_')
train_images = glob(os.path.join(path_for_train_data,'img_*'))
train_images = sorted(glob(os.path.join(path_for_train_data,'img_*')), key = lambda x: int(os.path.split(x)[-1][4:-4]))
print(len(train_images),' matching training image files found.')

train_json_prefix_len = len(path_for_train_json) + len('img_')
train_jsons = glob(os.path.join(path_for_train_json,'img_*'))
train_jsons = sorted(glob(os.path.join(path_for_train_json,'img_*')), key = lambda x: int(os.path.split(x)[-1][4:-5]))
print(len(train_jsons),' matching training json files found.')

test_prefix_len = len(path_for_test_data) + len('img_*')
test_images = glob(os.path.join(path_for_test_data,'img_*'))
test_images = sorted(glob(os.path.join(path_for_test_data,'img_*')), key = lambda x: int(os.path.split(x)[-1][4:-4]))
print(len(test_images),' matching testing image files found.')

private_test_prefix_len = len(path_for_private_test_data) + len('img_*')
private_test_images = glob(os.path.join(path_for_private_test_data,'img_*'))
private_test_images = sorted(glob(os.path.join(path_for_private_test_data,'img_*')), key = lambda x: int(os.path.split(x)[-1][4:-4]))
print(len(private_test_images),' matching private testing image files found.')

4000  matching training image files found.
4000  matching training json files found.
1000  matching testing image files found.
2500  matching private testing image files found.


In [None]:
json_save_path = os.path.join('TrainDataset/', 'instances_training.json')
create_train_json(train_images, train_jsons, json_save_path)

In [None]:
json_save_path = os.path.join('PublicTestDataset/', 'instances_public_test.json')
create_test_json(test_images, None, json_save_path)

In [None]:
json_save_path = os.path.join('PrivateTestDataset/', 'instances_private_test.json')
create_private_test_json(private_test_images, None, json_save_path)