In [49]:
import json
import os
import numpy as np
import datetime
import skimage

In [55]:
class Dataset(object):
    def __init__(self):
        now = datetime.datetime.now()
        self.info_info = [{'year':now.year,'version':'','description':'','contributor':'','url':'','date_created':str(now)}]
        # Background is always the first class
        #list of dictionaries
        self.license_info = [{'id': 0, 'name': 'GEOJsonToCOCO', 'url': ''}]
        self.categories_info = [{'id': 0, 'name': 'BG', 'supercategory': ''}]
        #list of dictionaries
        self.images_info = []
        self.annotations_info = []
        
    def load_geojson_file(self, filePath, imagePath):
        json_file = open(filePath)
        geo_json = json.load(json_file)
        json_file.close()
        
        imageFilename = os.path.split(imagePath)[1]
        #split out the digits at the end of the filename
        imageID = int(imageFilename.split('.')[0].split('_')[-1][3:])
        image = skimage.io.imread(imagePath)
        imageWidth = image.shape[0]
        imageHeight = image.shape[1]
        #Add all the images file names
        self.add_image(imageFilename, imageID, imageWidth, imageHeight)
        
        #Add polygons
        for feature in geo_json['features']:
            if feature['geometry']['type'] == 'Polygon':
                polygonList, polyArea = self.build_polygonListAndArea(feature['geometry'])
                objectID = feature['properties']['OBJECTID_1']
                category_id = feature['type']
                self.add_annotation(objectID, imageID, category_id, polygonList, polyArea)
            
        for feature in geojson_json['features']:
                cat_id = feature['properties']['OBJECTID']
                cat_name = feature['geometry']['type']
                if cat_id < 1:
                    cat_id = cat_id + 1
                self.add_class(cat_id, cat_name)
    
    def build_polygonListAndArea(self, geometry):
        polygonList = []
        x = []
        y = []
        #print(geometry['coordinates'][0])
        for coordinate in geometry['coordinates'][0]:
            polygonList.append(coordinate[0])
            x.append(coordinate[0])
            polygonList.append(coordinate[1])
            y.append(coordinate[1])
        xArr = np.asarray(x)
        yArr = np.asarray(y)
        polyArea = self.polygon_area(xArr, yArr)
        return polygonList, polyArea
    
    def polygon_area(self, x, y):
        correction = x[-1] * y[0] - y[-1]* x[0]
        main_area = np.dot(x[:-1], y[1:]) - np.dot(y[:-1], x[1:])
        return 0.5*np.abs(main_area + correction)
    
    def add_annotation(self, objectID, imageID, category_id, polygonList, polyArea):
        self.annotations_info.append({'id' : objectID, 
                                       'image_id' : imageID, 
                                       'category_id' : category_id, 
                                       'segmentation' : [polygonList], 
                                       'area' : polyArea, 
                                       'bbox' : [], 
                                       'iscrowd' : 0})
                    
    def add_image(self, imageFilename, imageID, imageWidth, imageHeight):
        self.images_info.append({'id':imageID,
                                 'width':imageWidth,
                                 'height':imageHeight,
                                 'file_name': imageFilename,
                                 'license':'',
                                 'flicker_url':'',
                                 'coco_url':'',
                                 'date_captured':''})
        
    def load_data(self, annotation_dir, images_dir):
        #Only here to speed up adding categories (we're only dealing with one category for buildings) 
        self.add_category('1', 'Feature')
        
        for jsonFile in os.listdir(annotation_dir):
            if jsonFile.split('.')[-1] == 'geojson':
                jsonFilename = jsonFile.split('_', 1)[1:][0].split('.',1)[0]
                    
                #Pull the folder name for the image_dir (PAN, MIL, etc)
                imageType = image_dir.split('/')[-2]
                #Assemble image name that corresponds with the annotation file
                image = imageType + '_' + jsonFilename + '.tif'
                imagePath = os.path.join(image_dir, image)
                    
                filePath = os.path.join(annotation_dir, jsonFile)
                    
                #Execute for every geojson file in the folder
                self.load_geojson_file(filePath, imagePath)
                
    def add_category(self, cat_id, cat_name):
        if next((item for item in self.categories_info if item["id"] == cat_id), False):
            # cat_id combination already available, skip
            return
        self.categories_info.append({
            "id": cat_id,
            "name": cat_name,
            "supercategory": ''
        })        
    
    def write_json(self, outputName):
        json_dict = {'license': self.license_info, 'info': self.info_info, 'categories': self.categories_info,
                     'images': self.images_info, 'annotations': self.annotations_info}
        with open(outputName, 'w', encoding='utf-8') as f:
            json.dump(json_dict, f, ensure_ascii=False, indent=4)
        

In [56]:
dataset = Dataset()
annotation_dir = '/mnt/shared/bryan/spacenet-data/AOI_5_Khartoum_Train/geojson/buildings/'
image_dir = '/mnt/shared/bryan/spacenet-data/AOI_5_Khartoum_Train/PAN/'
dataset.load_data(annotation_dir, image_dir)
dataset.write_json('/mnt/shared/bryan/SpaceNetImageExploration/test.json')

In [46]:
dataset

<__main__.Dataset at 0x7f2295770a90>