# Intro
This notebook provide function and script to convert DOTA annotation to tensorflow object detection format. If you want to explore the dataset, please

In [2]:
%matplotlib inline
import numpy as np
import os
from DOTA_devkit.DOTA import DOTA
from DOTA_devkit import dota_utils as util
import pylab
pylab.rcParams['figure.figsize'] = (10.0, 10.0)


## Split images
Split images and annotation to smaller images. The default size is 1024x1024

In [4]:
from DOTA_devkit.ImgSplit_multi_process import splitbase

In [4]:
cur_dir = !pwd

hi_res_dir = 'DOTA_devkit/example'
split_dir = 'DOTA_devkit/examplesplit'
# hi_res_dir = 'dataset/high_res_train/'
# split_dir = 'dataset/train/'
# hi_res_dir = 'dataset/high_res_val/'
# split_dir = 'dataset/val/'

hi_res_dir = os.path.join(cur_dir[0],hi_res_dir)
split_dir = os.path.join(cur_dir[0],split_dir)

if not os.path.exists(split_dir):
    !mkdir $split_dir

split_im_dir = os.path.join(split_dir,'images')
if not os.path.exists(split_im_dir):
    !mkdir $split_im_dir

split_txt_dir = os.path.join(split_dir,'labelTxt')
if not os.path.exists(split_txt_dir):
    !mkdir $split_txt_dir

split = splitbase(hi_res_dir,split_dir,choosebestpoint=True)
split.splitdata(1)

padding: True


## Remove junk images from training set
There are many image in the training set which have no object at all, which slowdown the training process
Or as an alternative, set suffle to true while training the dataset

## Split train/val set
DOTA provide val set, but it's relatively big for using in trainning evaluation. Instead, we randomly choose 300 images from full set to use as val set (or you can choose from training dataset as well). The below script will consume the full data directory, randomly choose 300 images and copy these images (and annotations as well) to a new dir

In [29]:
import random
import shutil
cur_dir = !pwd
num_vals = 300
full_data_dir = 'dataset/val_full'
full_im_dir = os.path.join(full_data_dir,'images')
full_txt_dir = os.path.join(full_data_dir,'labelTxt')

if not os.path.exists(full_im_dir):
    sys.exit("images folder not found in " + full_data_dir)
if not os.path.exists(full_txt_dir):
    sys.exit("labelTxt folder not found in " + full_data_dir)

val_data_dir = 'dataset/val'
if not os.path.exists(val_data_dir):
    !mkdir $val_data_dir
val_im_dir = os.path.join(val_data_dir,'images')
if not os.path.exists(val_im_dir):
    !mkdir $val_im_dir
val_txt_dir = os.path.join(val_data_dir,'labelTxt')
if not os.path.exists(val_txt_dir):
    !mkdir $val_txt_dir

img_list = []
for x in os.listdir(full_im_dir):
    if not os.path.isdir(x):
        img_list.append(x)
random.seed(300)
selected_list = random.choices(img_list,k=num_vals)
print(len(selected_list))
for img in selected_list:
    # print(img)
    shutil.copy(src=os.path.join(full_im_dir,img),dst=val_im_dir)
    txt = img.replace('.png','.txt')
    shutil.copy(src=os.path.join(full_txt_dir,txt),dst=val_txt_dir)



300


## Convert to tensorflow object detection records

In [30]:
import tensorflow as tf
import sys
import io
import PIL.Image
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util

In [31]:
def create_tf_example(data,
                      imagepath,
                      label_map_dict,
                      filename,
                      ignore_difficult_instances=True
                      ):
  # TODO(user): Populate the following variables from your example.
  full_path = os.path.join(imagepath, filename + '.png')
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_png)
  image = PIL.Image.open(encoded_png_io)
  if image.format != 'PNG':
    raise ValueError('Image format not PNG')

  width = 1024
  height = 1024
  image_format = None # b'jpeg' or b'png'

  xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
  xmaxs = [] # List of normalized right x coordinates in bounding box
             # (1 per box)
  ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
  ymaxs = [] # List of normalized bottom y coordinates in bounding box
             # (1 per box)
  classes_text = [] # List of string class name of bounding box (1 per box)
  classes = [] # List of integer class id of bounding box (1 per box)
  difficult_obj = []
  for obj in data:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
      continue
    xmin = max(obj['bndbox'][0], 0)
    ymin = max(obj['bndbox'][1], 0)
    xmax = min(obj['bndbox'][2], width - 1)
    ymax = min(obj['bndbox'][3], height - 1)

    difficult_obj.append(int(difficult))

    xmins.append(float(xmin) / width)
    ymins.append(float(ymin) / height)
    xmaxs.append(float(xmax) / width)
    ymaxs.append(float(ymax) / height)

    classes_text.append(obj['name'].encode('utf8'))
    if (obj['name'] in label_map_dict):
        classes.append(label_map_dict[obj['name']])

    else:
        print('>>>>>>>>>>>>>')
        continue


  tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_png),
      'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))
  #print 'tf_example: ', tf_example
  return tf_example



In [33]:
cur_dir = !pwd

data_dir = 'dataset/val/'
label_map = 'annotations/dota_label_map.pbtxt'
output_name = 'dota_val.record'
index_file = 'val.txt'
data_dir = os.path.join(cur_dir[0],data_dir)
label_map = os.path.join(cur_dir[0],label_map)

# create index file
data_im_dir = os.path.join(data_dir,'images')

!cd $data_dir && find $data_im_dir -type f -name *.png > $index_file

if not os.path.exists(os.path.join(data_dir, index_file)):
    # print os.path.join(data_dir, indexfile)
    raise ValueError('{} not in the path: {}'.format(index_file, data_dir))

output_path = os.path.join(data_dir, 'tf_records')
if not os.path.exists(output_path):
    os.makedirs(output_path)

writer = tf.python_io.TFRecordWriter(os.path.join(output_path, output_name))
print ('start-------')
# TODO(user): Write code to read in your dataset to examples variable

imagepath = os.path.join(data_dir, 'images')
f = open(os.path.join(data_dir, index_file), 'r')
lines = f.readlines()
txtlist = [x.strip().replace(r'images', r'labelTxt').replace('.png', '.txt') for x in lines]
# txtlist = util.GetFileFromThisRootDir(os.path.join(data_dir, 'wordlabel'))
for fullname in txtlist:
    data = util.parse_dota_rec(fullname)
    # print 'len(data):', len(data)
    # print 'data:', data
    # assert len(data) >= 0, "there exists empty data: " + fullname
    basename = os.path.basename(os.path.splitext(fullname)[0])
    label_map_dict = label_map_util.get_label_map_dict(label_map)
    # print 'label_map_dict', label_map_dict
    tf_example = create_tf_example(data,
                                    imagepath,
                                    label_map_dict,
                                    basename)
    writer.write(tf_example.SerializeToString())
writer.close()
print("Done, record is written to ")

start-------
Done, record is written to 
