In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob
import random

In [3]:
from annotation_formatter import AnnotationFormatter

In [4]:
# http://code.activestate.com/recipes/303060-group-a-list-into-sequential-n-tuples/
def group(lst, n):
    """group([0,3,4,10,2,3], 2) => [(0,3), (4,10), (2,3)]
    
    Group a list into consecutive n-tuples. Incomplete tuples are
    discarded e.g.
    
    >>> group(range(10), 3)
    [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
    """
    return zip(*[lst[i::n] for i in range(n)]) 

In [5]:
# each file corresponds to an image
annotation_files = sorted(glob.glob("data/train/labels/*"))
annotation_files_as_pairs = list(group(annotation_files, 2))
random.Random(4).shuffle(annotation_files_as_pairs)
index = int(0.8*len(annotation_files_as_pairs))
# now create the lists
train_files = []
val_files = []
for (pre, post) in annotation_files_as_pairs[0:index]:
    train_files.append(pre)
    train_files.append(post)
for (pre, post) in annotation_files_as_pairs[index:]:
    val_files.append(pre)
    val_files.append(post)

In [6]:
print("Num train files: {}\nNum val files: {}".format(len(train_files), len(val_files)))

Num train files: 4478
Num val files: 1120


In [7]:
# create train and validation datasets for instance segmentation
train_annotation_formatter = AnnotationFormatter(instance_segmentation=True)
for filename in train_files:
    train_annotation_formatter.add_image_from_filename(filename)
train_annotation_formatter.write_to_json("xview_instance_segmentation_dataset_train.json")

val_annotation_formatter = AnnotationFormatter(instance_segmentation=True)
for filename in val_files:
    val_annotation_formatter.add_image_from_filename(filename)
val_annotation_formatter.write_to_json("xview_instance_segmentation_dataset_val.json")

In [8]:
# the same but for semantic segmentation
train_annotation_formatter = AnnotationFormatter(instance_segmentation=False)
for filename in train_files:
    train_annotation_formatter.add_image_from_filename(filename)
train_annotation_formatter.write_to_json("xview_semantic_segmentation_dataset_train.json")

val_annotation_formatter = AnnotationFormatter(instance_segmentation=False)
for filename in val_files:
    val_annotation_formatter.add_image_from_filename(filename)
val_annotation_formatter.write_to_json("xview_semantic_segmentation_dataset_val.json")