In [10]:
import os
import json
import geojson
import pandas as pd
from tqdm import tqdm
from osgeo import gdal
from sklearn.model_selection import train_test_split

# custom functions
import sys
sys.path.append('../')
from utils.functions import grab_certain_file
# TODO delete functions.py in data_preperation folder. Use main utils instead.

'''
Read NSO tiles and annotations geojsons, convert lat/lon of tile to pixel coordinates and save pixel coordinates into
.json file If more than on .json file can be saved as one via_regions.json.
Source:  https://github.com/rl02898/detectron2-spacenet. JDP Edits:some and saving json with origins of tile in jsons to allow stiching back
of the tiles.
'''

In [11]:
# Divide dataset and set a random seed for reproducibility of the splits for next script
# TODO: path nso_train and nso_geojson to tiling.py output => Better move output of tiling to those 2 folder in NSO/ 

RANDOM_SEED = 560


# Change name to dataset...
nso_train = "../NSO/train"
nso_test = "../NSO/test"
nso_val = "../NSO/val"
nso_geojson = "../NSO/geojsons"

In [15]:
RANDOM_SEED = 560

nso_small_tiles = "../NSO/NSO_small_tiles"
nso_geojson = "../NSO/geojsons"

# Create JSONs for Detectron2 NO test set
#nso_images = grab_certain_file(".tif", nso_small_tiles)
#train, val = train_test_split(nso_images, test_size=0.2, random_state=RANDOM_SEED)

# Create JSONs for Detectron2 WITH test set
nso_images = grab_certain_file(".tif", nso_small_tiles)
train, test = train_test_split(nso_images, test_size=0.20, random_state=RANDOM_SEED)
train, val = train_test_split(train, test_size=0.25, random_state=RANDOM_SEED)

In [18]:
# Training set

# Create an empty dictionary to store the training set of annotations and their pixel coordinates
train_dict = {}

# Loop over each image in the training set
for file in tqdm(train, desc="Creating JSONs for Detectron2 on train", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
    file_path = os.path.join(nso_small_tiles, file)
    img_id = file.split(".tif")[0]
    geojson_path = os.path.join(nso_geojson, f"{img_id}.geojson")
    
    #Not all tiles have annotations, thus:
    if os.path.exists(geojson_path):
        
        # Load the geojson in gj
        with open(geojson_path) as f:
            gj = geojson.load(f)
        
        # Create a dictionary to store the regions (annotations spatial features) for the image
        regions = {}
        num_buildings = len(gj["features"])
        print (num_buildings) 
        # Open the image with gdal to get pixel size and origin if feature exists
        if num_buildings > 0:
            gdal_image = gdal.Open(file_path)
            
            # Get the pixel width and height(0.5 for nso) and the origin coordinates
            #https://www.gis.usu.edu/~chrisg/python/2009/lectures/ospy_slides4.pdf
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]
            
            # Loop over each building/assets in the image
            for i in range(num_buildings):
                
                # Get the polygon points for the asset
                #https://stackoverflow.com/questions/23306653/python-accessing-nested-json-data
                points = gj["features"][i]["geometry"]["coordinates"][0]
                
                # If there is only one point, unwarp it=>check
                if len(points) == 1:
                    points = points[0]

                #Empty lists to store pixel coordinates
                all_points_x, all_points_y = [], []
                
                # Convert the lat/long points to pixel coordinates by substacting origin
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))
                    
                # Create a dictionary to store the asset footprint
                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }
                print (regions)
        #Should probably save origin x and y here but we still have the og tiles and imgid and allow to stich tiles back together
        #TODO: same for tiles without annot.Eg create json with empty regions in the else
        dictionary = {"file_ref": '',
                      "size": os.path.getsize(file_path),
                      "filename": file.replace(".tif", ".png"),
                      "base64_img_data": '',
                      "file_attributes": {},
                      "regions": regions,
                      "origin_x": originX,
                      "origin_y": originY
                     }
        print (dictionary)
        train_dict[file.replace(".tif", ".png")] = dictionary
    else:
        continue

with open("../NSO/train/nso.json", "w") as f:
    json.dump(train_dict, f)

Creating JSONs for Detectron2 on train:   1%|          | 151/22719 [00:00<00:20, 1086.69it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [380, 380, 331, 317, 332, 117, 106, 93, 103, 0, 0, 380], 'all_points_y': [412, 414, 568, 566, 516, 442, 483, 471, 437, 398, 269, 412], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [380, 380, 331, 317, 332, 117, 106, 93, 103, 0, 0, 380], 'all_points_y': [412, 414, 568, 566, 516, 442, 483, 471, 437, 398, 269, 412], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [380, 380, 331, 317, 332, 117, 106, 93, 103, 0, 0, 380], 'all_points_y': [412, 414, 568, 566, 516, 442, 483, 471, 437, 398, 269, 412], 'category': 0}, 'region_attributes': {}}}
{'file_ref': '', 'size': 3318900, 'filename': '11_20221009_111423_SV1-03_SV_RD_8bit_RGB_50cm_Rhoon_11000_13000.png', 'base64_img_data': '', 'file_attributes': {}, 'regions': {'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [380, 380, 331, 317, 332, 117, 106, 93, 10

Creating JSONs for Detectron2 on train:   6%|▌         | 1407/22719 [00:00<00:04, 4815.97it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [999, 1000, 1000, 999], 'all_points_y': [344, 335, 344, 344], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [999, 1000, 1000, 999], 'all_points_y': [344, 335, 344, 344], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [999, 1000, 1000, 999], 'all_points_y': [344, 335, 344, 344], 'category': 0}, 'region_attributes': {}}}
{'file_ref': '', 'size': 3165795, 'filename': '6_20220717_104955_SV2-01_SV_RD_8bit_RGB_50cm_Venlo_13000_6000.png', 'base64_img_data': '', 'file_attributes': {}, 'regions': {'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [999, 1000, 1000, 999], 'all_points_y': [344, 335, 344, 344], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [999, 1000, 1000, 999], 'all_points_y': [344, 335, 344, 344], 'category': 0}, 'region_attributes'

Creating JSONs for Detectron2 on train:  12%|█▏        | 2766/22719 [00:00<00:03, 5726.14it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [116, 67, 61, 115, 116], 'all_points_y': [4, 20, 0, 0, 4], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [116, 67, 61, 115, 116], 'all_points_y': [4, 20, 0, 0, 4], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [116, 67, 61, 115, 116], 'all_points_y': [4, 20, 0, 0, 4], 'category': 0}, 'region_attributes': {}}}
{'file_ref': '', 'size': 3227064, 'filename': '20_20220729_103928_SV2-01_SV_RD_8bit_RGB_50cm_Nuenen_29000_15000.png', 'base64_img_data': '', 'file_attributes': {}, 'regions': {'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [116, 67, 61, 115, 116], 'all_points_y': [4, 20, 0, 0, 4], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [116, 67, 61, 115, 116], 'all_points_y': [4, 20, 0, 0, 4], 'category': 0}, 'region_attributes': {}}}, 'ori

Creating JSONs for Detectron2 on train:  15%|█▍        | 3403/22719 [00:00<00:03, 5413.12it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1, 0, 0, 18, 36, 0, 0, 20, 57, 84, 50, 102, 141, 149, 190, 141, 159, 140, 113, 96, 122, 99, 75, 53, 74, 15, 0, 0, 1], 'all_points_y': [166, 165, 119, 148, 133, 87, 84, 69, 126, 108, 47, 8, 66, 60, 129, 173, 208, 221, 187, 198, 230, 252, 217, 237, 263, 310, 286, 166, 166], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1, 0, 0, 18, 36, 0, 0, 20, 57, 84, 50, 102, 141, 149, 190, 141, 159, 140, 113, 96, 122, 99, 75, 53, 74, 15, 0, 0, 1], 'all_points_y': [166, 165, 119, 148, 133, 87, 84, 69, 126, 108, 47, 8, 66, 60, 129, 173, 208, 221, 187, 198, 230, 252, 217, 237, 263, 310, 286, 166, 166], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1, 0, 0, 18, 36, 0, 0, 20, 57, 84, 50, 102, 141, 149, 190, 141, 159, 140, 113, 96, 122, 99, 75, 53, 74, 15, 0, 0, 1], 'all_points_y': [166, 165, 119, 148, 133, 87, 84, 69, 126, 

Creating JSONs for Detectron2 on train:  22%|██▏       | 5002/22719 [00:01<00:03, 5457.10it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 998, 1000, 1000, 874, 866, 827, 848, 858, 869, 896, 940, 975, 988, 1000], 'all_points_y': [478, 503, 507, 508, 649, 547, 563, 533, 498, 506, 494, 517, 462, 490, 466, 478], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 998, 1000, 1000, 874, 866, 827, 848, 858, 869, 896, 940, 975, 988, 1000], 'all_points_y': [478, 503, 507, 508, 649, 547, 563, 533, 498, 506, 494, 517, 462, 490, 466, 478], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 998, 1000, 1000, 874, 866, 827, 848, 858, 869, 896, 940, 975, 988, 1000], 'all_points_y': [478, 503, 507, 508, 649, 547, 563, 533, 498, 506, 494, 517, 462, 490, 466, 478], 'category': 0}, 'region_attributes': {}}}
{'file_ref': '', 'size': 3377169, 'filename': '14_20220830_110404_SV1-03_SV_RD_8bit_RGB_50cm_Amsterdam_17000_10000.png', 'base64_

Creating JSONs for Detectron2 on train:  25%|██▍       | 5643/22719 [00:01<00:04, 3901.75it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [75, 58, 54, 84, 73, 70, 78, 76, 67, 65, 72, 69, 41, 47, 31, 33, 27, 22, 0, 0, 6, 0, 0, 20, 20, 8, 16, 79, 75], 'all_points_y': [458, 452, 469, 480, 479, 492, 497, 503, 500, 510, 514, 525, 514, 496, 488, 480, 477, 487, 477, 474, 460, 458, 437, 443, 438, 430, 413, 440, 458], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [75, 58, 54, 84, 73, 70, 78, 76, 67, 65, 72, 69, 41, 47, 31, 33, 27, 22, 0, 0, 6, 0, 0, 20, 20, 8, 16, 79, 75], 'all_points_y': [458, 452, 469, 480, 479, 492, 497, 503, 500, 510, 514, 525, 514, 496, 488, 480, 477, 487, 477, 474, 460, 458, 437, 443, 438, 430, 413, 440, 458], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [75, 58, 54, 84, 73, 70, 78, 76, 67, 65, 72, 69, 41, 47, 31, 33, 27, 22, 0, 0, 6, 0, 0, 20, 20, 8, 16, 79, 75], 'all_points_y': [458, 452, 469, 480, 479, 492, 497, 503, 500, 51

Creating JSONs for Detectron2 on train:  29%|██▊       | 6503/22719 [00:01<00:03, 4825.58it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [852, 852, 803, 789, 804, 589, 578, 565, 575, 429, 414, 404, 418, 377, 362, 316, 322, 303, 291, 248, 258, 242, 229, 197, 213, 198, 219, 400, 404, 225, 238, 852], 'all_points_y': [696, 698, 852, 850, 800, 726, 767, 755, 721, 665, 707, 703, 660, 645, 689, 666, 636, 632, 665, 643, 613, 603, 643, 627, 586, 578, 516, 577, 567, 507, 465, 696], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [852, 852, 803, 789, 804, 589, 578, 565, 575, 429, 414, 404, 418, 377, 362, 316, 322, 303, 291, 248, 258, 242, 229, 197, 213, 198, 219, 400, 404, 225, 238, 852], 'all_points_y': [696, 698, 852, 850, 800, 726, 767, 755, 721, 665, 707, 703, 660, 645, 689, 666, 636, 632, 665, 643, 613, 603, 643, 627, 586, 578, 516, 577, 567, 507, 465, 696], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [852, 852, 803, 789, 804, 589, 578, 565, 575, 

Creating JSONs for Detectron2 on train:  35%|███▍      | 7949/22719 [00:01<00:02, 5294.27it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [5, 0, 0, 5], 'all_points_y': [610, 613, 605, 610], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [5, 0, 0, 5], 'all_points_y': [610, 613, 605, 610], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [5, 0, 0, 5], 'all_points_y': [610, 613, 605, 610], 'category': 0}, 'region_attributes': {}}}
{'file_ref': '', 'size': 3324898, 'filename': '2_20220724_110844_SV1-04_SV_RD_8bit_RGB_50cm_Dongen_14000_9000.png', 'base64_img_data': '', 'file_attributes': {}, 'regions': {'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [5, 0, 0, 5], 'all_points_y': [610, 613, 605, 610], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [5, 0, 0, 5], 'all_points_y': [610, 613, 605, 610], 'category': 0}, 'region_attributes': {}}}, 'origin_x': 123104.0, 'origin_y': 406872.

Creating JSONs for Detectron2 on train:  40%|████      | 9172/22719 [00:01<00:02, 4886.76it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [962, 971, 978, 1000, 1000, 995, 988, 917, 920, 916, 923, 910, 902, 896, 907, 896, 884, 867, 872, 864, 846, 837, 835, 841, 836, 827, 822, 847, 860, 893, 886, 880, 874, 899, 908, 927, 923, 916, 909, 927, 944, 953, 961, 948, 962], 'all_points_y': [622, 651, 646, 682, 693, 695, 684, 731, 741, 743, 761, 770, 756, 744, 771, 780, 756, 765, 773, 778, 744, 746, 740, 737, 726, 727, 711, 699, 726, 706, 697, 697, 684, 668, 696, 685, 675, 677, 662, 648, 638, 660, 654, 630, 622], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [962, 971, 978, 1000, 1000, 995, 988, 917, 920, 916, 923, 910, 902, 896, 907, 896, 884, 867, 872, 864, 846, 837, 835, 841, 836, 827, 822, 847, 860, 893, 886, 880, 874, 899, 908, 927, 923, 916, 909, 927, 944, 953, 961, 948, 962], 'all_points_y': [622, 651, 646, 682, 693, 695, 684, 731, 741, 743, 761, 770, 756, 744, 771, 780, 756, 765, 773, 778, 744, 746, 74

Creating JSONs for Detectron2 on train:  46%|████▌     | 10478/22719 [00:02<00:02, 4861.09it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [12, 27, 0, 0, 12], 'all_points_y': [882, 932, 942, 837, 882], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [12, 27, 0, 0, 12], 'all_points_y': [882, 932, 942, 837, 882], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [12, 27, 0, 0, 12], 'all_points_y': [882, 932, 942, 837, 882], 'category': 0}, 'region_attributes': {}}}
{'file_ref': '', 'size': 66462, 'filename': '23_20220717_103405_SV1-01_SV_RD_8bit_RGB_50cm_Made_3000_24000.png', 'base64_img_data': '', 'file_attributes': {}, 'regions': {'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [12, 27, 0, 0, 12], 'all_points_y': [882, 932, 942, 837, 882], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [12, 27, 0, 0, 12], 'all_points_y': [882, 932, 942, 837, 882], 'category': 0}, 'region_attribut

Creating JSONs for Detectron2 on train:  48%|████▊     | 11006/22719 [00:02<00:02, 3985.15it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 990, 954, 934, 945, 883, 908, 891, 908, 932, 965, 942, 956, 1000], 'all_points_y': [582, 722, 714, 758, 740, 726, 676, 640, 623, 608, 628, 582, 560, 544, 582], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 990, 954, 934, 945, 883, 908, 891, 908, 932, 965, 942, 956, 1000], 'all_points_y': [582, 722, 714, 758, 740, 726, 676, 640, 623, 608, 628, 582, 560, 544, 582], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 990, 954, 934, 945, 883, 908, 891, 908, 932, 965, 942, 956, 1000], 'all_points_y': [582, 722, 714, 758, 740, 726, 676, 640, 623, 608, 628, 582, 560, 544, 582], 'category': 0}, 'region_attributes': {}}}
{'file_ref': '', 'size': 2808859, 'filename': '12_20220719_105242_SV1-04_SV_RD_8bit_RGB_50cm_Eemshaven_25000_21000.png', 'base64_img_data': '', 'file_attributes': {}, 

Creating JSONs for Detectron2 on train:  51%|█████▏    | 11671/22719 [00:02<00:02, 4459.56it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [584, 585, 627, 625, 641, 641, 625, 625, 642, 642, 625, 624, 646, 642, 703, 704, 726, 728, 711, 711, 750, 752, 712, 712, 751, 753, 713, 714, 754, 757, 717, 717, 755, 756, 745, 745, 715, 716, 634, 631, 621, 623, 605, 605, 574, 573, 561, 561, 554, 553, 584], 'all_points_y': [486, 600, 601, 576, 577, 564, 564, 543, 544, 534, 535, 509, 508, 468, 468, 506, 503, 534, 534, 537, 533, 564, 563, 573, 571, 595, 595, 602, 599, 627, 625, 638, 634, 668, 668, 659, 657, 669, 672, 606, 605, 669, 669, 610, 610, 599, 600, 516, 516, 486, 486], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [584, 585, 627, 625, 641, 641, 625, 625, 642, 642, 625, 624, 646, 642, 703, 704, 726, 728, 711, 711, 750, 752, 712, 712, 751, 753, 713, 714, 754, 757, 717, 717, 755, 756, 745, 745, 715, 716, 634, 631, 621, 623, 605, 605, 574, 573, 561, 561, 554, 553, 584], 'all_points_y': [486, 600, 601, 576, 577, 5

Creating JSONs for Detectron2 on train:  56%|█████▌    | 12757/22719 [00:02<00:02, 3813.42it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [2, 6, 1, 25, 29, 35, 36, 66, 90, 53, 45, 20, 17, 4, 0, 0, 2], 'all_points_y': [841, 840, 800, 796, 834, 830, 823, 821, 989, 993, 905, 909, 901, 901, 901, 822, 841], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [2, 6, 1, 25, 29, 35, 36, 66, 90, 53, 45, 20, 17, 4, 0, 0, 2], 'all_points_y': [841, 840, 800, 796, 834, 830, 823, 821, 989, 993, 905, 909, 901, 901, 901, 822, 841], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [2, 6, 1, 25, 29, 35, 36, 66, 90, 53, 45, 20, 17, 4, 0, 0, 2], 'all_points_y': [841, 840, 800, 796, 834, 830, 823, 821, 989, 993, 905, 909, 901, 901, 901, 822, 841], 'category': 0}, 'region_attributes': {}}}
{'file_ref': '', 'size': 66462, 'filename': '7_20220811_110743_SV1-04_SV_RD_8bit_RGB_50cm_EttenLeur_30000_7000.png', 'base64_img_data': '', 'file_attributes': {}, 'regions': {'0': {'shape_

Creating JSONs for Detectron2 on train:  60%|██████    | 13711/22719 [00:03<00:02, 3954.10it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [999, 1000, 1000, 999], 'all_points_y': [476, 474, 477, 476], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [999, 1000, 1000, 999], 'all_points_y': [476, 474, 477, 476], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [999, 1000, 1000, 999], 'all_points_y': [476, 474, 477, 476], 'category': 0}, 'region_attributes': {}}}
{'file_ref': '', 'size': 3403362, 'filename': '10_20220719_105318_SV1-04_SV_RD_8bit_RGB_50cm_Heel_19000_19000.png', 'base64_img_data': '', 'file_attributes': {}, 'regions': {'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [999, 1000, 1000, 999], 'all_points_y': [476, 474, 477, 476], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [999, 1000, 1000, 999], 'all_points_y': [476, 474, 477, 476], 'category': 0}, 'region_attributes':

Creating JSONs for Detectron2 on train:  67%|██████▋   | 15239/22719 [00:03<00:01, 5553.88it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [362, 526, 559, 548, 551, 562, 572, 514, 500, 490, 503, 482, 468, 459, 475, 453, 441, 428, 444, 422, 412, 402, 415, 395, 383, 370, 386, 360, 348, 312, 337, 305, 292, 283, 249, 361, 358, 252, 245, 351, 342, 242, 234, 334, 362], 'all_points_y': [810, 746, 830, 839, 858, 850, 874, 896, 854, 859, 898, 903, 867, 870, 911, 919, 878, 883, 921, 929, 891, 896, 936, 942, 900, 905, 946, 958, 913, 924, 966, 984, 944, 948, 862, 819, 811, 850, 840, 801, 779, 816, 796, 759, 810], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [362, 526, 559, 548, 551, 562, 572, 514, 500, 490, 503, 482, 468, 459, 475, 453, 441, 428, 444, 422, 412, 402, 415, 395, 383, 370, 386, 360, 348, 312, 337, 305, 292, 283, 249, 361, 358, 252, 245, 351, 342, 242, 234, 334, 362], 'all_points_y': [810, 746, 830, 839, 858, 850, 874, 896, 854, 859, 898, 903, 867, 870, 911, 919, 878, 883, 921, 929, 891, 896, 936, 9

Creating JSONs for Detectron2 on train:  72%|███████▏  | 16419/22719 [00:03<00:01, 5484.62it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [95, 63, 101, 112, 111, 95], 'all_points_y': [39, 0, 0, 14, 21, 39], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [95, 63, 101, 112, 111, 95], 'all_points_y': [39, 0, 0, 14, 21, 39], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [95, 63, 101, 112, 111, 95], 'all_points_y': [39, 0, 0, 14, 21, 39], 'category': 0}, 'region_attributes': {}}}
{'file_ref': '', 'size': 3434636, 'filename': '25_20220906_104605_SV1-03_SV_RD_8bit_RGB_50cm_Heerlen_17000_18000.png', 'base64_img_data': '', 'file_attributes': {}, 'regions': {'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [95, 63, 101, 112, 111, 95], 'all_points_y': [39, 0, 0, 14, 21, 39], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [95, 63, 101, 112, 111, 95], 'all_points_y': [39, 0, 0, 14, 21, 3

Creating JSONs for Detectron2 on train:  77%|███████▋  | 17553/22719 [00:03<00:01, 5000.24it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [420, 401, 400, 356, 358, 379, 380, 379, 352, 348, 341, 341, 325, 325, 353, 353, 381, 380, 350, 351, 324, 324, 352, 352, 390, 389, 352, 353, 255, 255, 212, 214, 267, 272, 303, 303, 275, 275, 259, 259, 216, 216, 250, 273, 306, 304, 271, 266, 266, 214, 218, 266, 274, 305, 306, 363, 418, 420], 'all_points_y': [841, 841, 828, 830, 841, 838, 854, 885, 885, 883, 882, 877, 877, 899, 896, 902, 901, 935, 935, 928, 931, 952, 952, 959, 960, 983, 984, 1000, 1000, 990, 966, 947, 966, 948, 957, 939, 926, 914, 913, 937, 916, 895, 909, 904, 911, 885, 878, 867, 877, 872, 809, 828, 817, 828, 791, 789, 807, 841], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [420, 401, 400, 356, 358, 379, 380, 379, 352, 348, 341, 341, 325, 325, 353, 353, 381, 380, 350, 351, 324, 324, 352, 352, 390, 389, 352, 353, 255, 255, 212, 214, 267, 272, 303, 303, 275, 275, 259, 259, 216, 216, 250, 273, 306, 30

Creating JSONs for Detectron2 on train:  83%|████████▎ | 18893/22719 [00:03<00:00, 5897.05it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [543, 543, 562, 568, 583, 576, 583, 590, 606, 598, 560, 545, 549, 550, 566, 569, 610, 607, 567, 469, 465, 472, 543], 'all_points_y': [460, 482, 486, 476, 489, 501, 506, 496, 514, 524, 489, 490, 558, 616, 611, 579, 542, 582, 624, 622, 489, 459, 460], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [543, 543, 562, 568, 583, 576, 583, 590, 606, 598, 560, 545, 549, 550, 566, 569, 610, 607, 567, 469, 465, 472, 543], 'all_points_y': [460, 482, 486, 476, 489, 501, 506, 496, 514, 524, 489, 490, 558, 616, 611, 579, 542, 582, 624, 622, 489, 459, 460], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [543, 543, 562, 568, 583, 576, 583, 590, 606, 598, 560, 545, 549, 550, 566, 569, 610, 607, 567, 469, 465, 472, 543], 'all_points_y': [460, 482, 486, 476, 489, 501, 506, 496, 514, 524, 489, 490, 558, 616, 611, 579, 542, 582, 62

Creating JSONs for Detectron2 on train:  90%|████████▉ | 20369/22719 [00:04<00:00, 6470.74it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [767, 757, 918, 926, 952, 947, 961, 956, 966, 970, 1000, 1000, 998, 994, 946, 907, 862, 844, 759, 742, 753, 736, 715, 683, 743, 767], 'all_points_y': [521, 542, 625, 609, 625, 640, 645, 664, 667, 656, 673, 708, 707, 718, 695, 762, 733, 721, 674, 653, 625, 611, 655, 632, 508, 521], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [767, 757, 918, 926, 952, 947, 961, 956, 966, 970, 1000, 1000, 998, 994, 946, 907, 862, 844, 759, 742, 753, 736, 715, 683, 743, 767], 'all_points_y': [521, 542, 625, 609, 625, 640, 645, 664, 667, 656, 673, 708, 707, 718, 695, 762, 733, 721, 674, 653, 625, 611, 655, 632, 508, 521], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [767, 757, 918, 926, 952, 947, 961, 956, 966, 970, 1000, 1000, 998, 994, 946, 907, 862, 844, 759, 742, 753, 736, 715, 683, 743, 767], 'all_points_y': [521, 542, 6

Creating JSONs for Detectron2 on train: 100%|██████████| 22719/22719 [00:04<00:00, 5071.28it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [503, 487, 476, 506, 492, 535, 542, 569, 566, 585, 591, 615, 634, 646, 625, 622, 551, 578, 499, 488, 504, 503], 'all_points_y': [623, 630, 586, 575, 506, 492, 518, 511, 491, 481, 509, 503, 498, 552, 560, 552, 571, 670, 689, 636, 632, 623], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [503, 487, 476, 506, 492, 535, 542, 569, 566, 585, 591, 615, 634, 646, 625, 622, 551, 578, 499, 488, 504, 503], 'all_points_y': [623, 630, 586, 575, 506, 492, 518, 511, 491, 481, 509, 503, 498, 552, 560, 552, 571, 670, 689, 636, 632, 623], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [503, 487, 476, 506, 492, 535, 542, 569, 566, 585, 591, 615, 634, 646, 625, 622, 551, 578, 499, 488, 504, 503], 'all_points_y': [623, 630, 586, 575, 506, 492, 518, 511, 491, 481, 509, 503, 498, 552, 560, 552, 571, 670, 689, 636, 632, 623], 'categ




In [19]:
#Validation set
val_dict = {}

for file in tqdm(val, desc="Creating JSONs for Detectron2 on val", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
    file_path = os.path.join(nso_small_tiles, file)
    img_id = file.split(".tif")[0]
    geojson_path = os.path.join(nso_geojson, f"{img_id}.geojson")
    if os.path.exists(geojson_path): 
        with open(geojson_path) as f:
            gj = geojson.load(f)

        regions = {}
        num_buildings = len(gj["features"])
        if num_buildings > 0:
            gdal_image = gdal.Open(file_path)
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]

            for i in range(num_buildings):
                points = gj["features"][i]["geometry"]["coordinates"][0]
                if len(points) == 1:
                    points = points[0]

                all_points_x, all_points_y = [], []
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))

                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }

        dictionary = {"file_ref": '',
                      "size": os.path.getsize(file_path),
                      "filename": file.replace(".tif", ".png"),
                      "base64_img_data": '',
                      "file_attributes": {},
                      "regions": regions,
                      "origin_x": originX,
                      "origin_y": originY
                     }

        val_dict[file.replace(".tif", ".png")] = dictionary

with open("../NSO/val/nso.json", "w") as f:
    json.dump(val_dict, f)

Creating JSONs for Detectron2 on val: 100%|██████████| 7573/7573 [00:01<00:00, 5875.29it/s]


In [22]:
#Test set
test_dict = {}

for file in tqdm(test, desc="Creating JSONs for Detectron2 on test", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
    file_path = os.path.join(nso_small_tiles, file)
    img_id = file.split(".tif")[0]
    geojson_path = os.path.join(nso_geojson, f"{img_id}.geojson")
    if os.path.exists(geojson_path): 
        with open(geojson_path) as f:
            gj = geojson.load(f)

        regions = {}
        num_buildings = len(gj["features"])
        if num_buildings > 0:
            gdal_image = gdal.Open(file_path)
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]

            for i in range(num_buildings):
                points = gj["features"][i]["geometry"]["coordinates"][0]
                if len(points) == 1:
                    points = points[0]

                all_points_x, all_points_y = [], []
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))

                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }

        dictionary = {"file_ref": '',
                      "size": os.path.getsize(file_path),
                      "filename": file.replace(".tif", ".png"),
                      "base64_img_data": '',
                      "file_attributes": {},
                      "regions": regions,
                      "origin_x": originX,
                      "origin_y": originY
                     }

        test_dict[file.replace(".tif", ".png")] = dictionary

with open("../NSO/test/nso.json", "w") as f:
    json.dump(test_dict, f)

Creating JSONs for Detectron2 on test: 100%|██████████| 7574/7574 [00:01<00:00, 5931.06it/s]


In [20]:
# Create single via_region_data training dataset => can be skipped if only one .json file.=>But then change file name
jsons = ["../NSO/train/nso.json"]

result = {}
for file in jsons:
    with open(file, "r") as f:
        loaded = json.load(f)
        
    #https://realpython.com/iterate-through-dictionary-python/
    for key, value in loaded.items():
        result[key] = value

with open("../NSO/train/via_region_data.json", "w") as file:
    json.dump(result, file)

In [21]:
# Create via_region JSON for entire validation dataset => can be skipped if only one .json file.=>But then change file name
jsons = ["../NSO/val/nso.json"]

result = {}
for file in jsons:
    with open(file, "r") as f:
        loaded = json.load(f)
    for key, value in loaded.items():
        result[key] = value

with open("../NSO/val/via_region_data.json", "w") as file:
    json.dump(result, file)

print("Done creating JSONs")


Done creating JSONs


In [23]:
# Create via_region JSON for entire test dataset => can be skipped if only one .json file.=>But then change file name
jsons = ["../NSO/test/nso.json"]

result = {}
for file in jsons:
    with open(file, "r") as f:
        loaded = json.load(f)
    for key, value in loaded.items():
        result[key] = value

with open("../NSO/test/via_region_data.json", "w") as file:
    json.dump(result, file)

print("Done creating JSONs")

Done creating JSONs
