In [1]:
import os
import json
import geojson
import pandas as pd
from tqdm import tqdm
from osgeo import gdal
from sklearn.model_selection import train_test_split

# custom functions
import sys
sys.path.append('../')
from utils.functions import grab_certain_file
# TODO delete functions.py in data_preperation folder. Use main utils instead.

'''
Read NSO tiles and annotations geojsons, convert lat/lon of tile to pixel coordinates and save pixel coordinates into
.json file If more than on .json file can be saved as one via_regions.json.
Source:  https://github.com/rl02898/detectron2-spacenet. JDP Edits:some and saving json with origins of tile in jsons to allow stiching back
of the tiles.
'''

In [2]:
### EDIT: Seems to works now but be weary ###

In [3]:
### ARGS function ###

nso_path = "../NSO"
train_path = os.path.join(nso_path, "train")
test_path = os.path.join(nso_path, "test")
val_path = os.path.join(nso_path, "val")
geojson_path = os.path.join(nso_path, "geojsons")
small_tiles_path = os.path.join(nso_path, "NSO_small_tiles")

In [4]:
# Divide dataset and set a random seed for reproducibility of the splits for next script

RANDOM_SEED = 560

# Create JSONs for Detectron2 NO test set
#nso_images = grab_certain_file(".tif", small_tiles_path)
#train, val = train_test_split(nso_images, test_size=0.2, random_state=RANDOM_SEED)

# Create JSONs for Detectron2 WITH test set
nso_images = grab_certain_file(".tif", small_tiles_path)
train, test = train_test_split(nso_images, test_size=0.20, random_state=RANDOM_SEED)
train, val = train_test_split(train, test_size=0.25, random_state=RANDOM_SEED)

In [10]:
def geojson_to_json_pix_coords(dataset_split, small_tiles_path, geojson_path, dataset_path):
    """
    Converts geojson annotations to JSON format with pixel coordinates.

    Args:
        dataset_split (list): List of image files in the dataset split:train,test or val
        small_tiles_path (str): Path to the directory containing the small tiles.tif.
        geojson_path (str): Path to the directory containing the geojson files of the annotations.
        dataset_path (str): Path to the datasets train, val or test.

    Returns:
        None

    Description:
        This function iterates over each image in the dataset split and converts the corresponding geojson
        annotations to JSON format, with pixel coordinates calculated using GDAL. It creates a dictionary
        containing image file information and a regions dictionary storing the asset footprints with their
        respective shape attributes. The resulting JSON file is saved as "nso.json" in the dataset path.
    """
  
    # Create an empty dictionary to store the training/test/val set of annotations and their pixel coordinates
    dataset_dict = {}

    # Loop over each image in the training set
    for file in tqdm(dataset_split, desc=f"Creating JSONs for Detectron2 on {dataset_path}", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
        file_path = os.path.join(small_tiles_path, file)
        img_id = file.split(".tif")[0]
        geojson_image = os.path.join(geojson_path, f"{img_id}.geojson")

        #Not all tiles have annotations, thus:
        if os.path.exists(geojson_image):

            # Load the geojson in gj
            with open(geojson_image) as f:
                gj = geojson.load(f)

            # Create a dictionary to store the regions (annotations spatial features) for the image
            regions = {}
            num_buildings = len(gj["features"])
            #print (num_buildings) 

            # Open the image with gdal to get pixel size and origin if feature exists
            #if num_buildings > 0:
            gdal_image = gdal.Open(file_path)

            # Get the pixel width and height(0.5 for nso) and the origin coordinates
            #https://www.gis.usu.edu/~chrisg/python/2009/lectures/ospy_slides4.pdf
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]

            # Loop over each building/assets in the image
            for i in range(num_buildings):

                # Get the polygon points for the asset
                #https://stackoverflow.com/questions/23306653/python-accessing-nested-json-data
                points = gj["features"][i]["geometry"]["coordinates"][0]

                # If there is only one point, unwarp it=>check
                if len(points) == 1:
                    points = points[0]

                #Empty lists to store pixel coordinates
                all_points_x, all_points_y = [], []

                # Convert the lat/long points to pixel coordinates by substacting origin
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))

                # Create a dictionary to store the asset footprint
                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }
                print (regions)
            #Should probably save origin x and y here but we still have the og tiles and imgid and allow to stich tiles back together
            #TODO: same for tiles without annot.Eg create json with empty regions in the else
            dictionary = {"file_ref": '',
                          "size": os.path.getsize(file_path),
                          "filename": file.replace(".tif", ".png"),
                          "base64_img_data": '',
                          "file_attributes": {},
                          "regions": regions,
                          "origin_x": originX,
                          "origin_y": originY
                         }
            #print (dictionary)
            dataset_dict[file.replace(".tif", ".png")] = dictionary
        else:
            continue
            
    jsons_path = os.path.join(dataset_path,"nso.json")
    with open(jsons_path, "w") as f:
        json.dump(dataset_dict, f)



In [11]:
geojson_to_json_pix_coords(train, small_tiles_path, geojson_path, train_path)
geojson_to_json_pix_coords(test, small_tiles_path, geojson_path, test_path)
geojson_to_json_pix_coords(val, small_tiles_path, geojson_path, val_path)

Creating JSONs for Detectron2 on ../NSO/train:  24%|██▎       | 18581/78324 [00:00<00:00, 92651.19it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [27, 50, 76, 46, 73, 88, 78, 85, 93, 100, 124, 115, 143, 150, 174, 159, 107, 93, 120, 102, 98, 82, 63, 78, 54, 32, 15, 36, 9, 0, 0, 27], 'all_points_y': [652, 565, 577, 659, 669, 636, 627, 606, 609, 572, 581, 614, 626, 588, 597, 657, 642, 676, 690, 745, 745, 801, 791, 736, 728, 790, 780, 719, 709, 740, 645, 652], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [27, 50, 76, 46, 73, 88, 78, 85, 93, 100, 124, 115, 143, 150, 174, 159, 107, 93, 120, 102, 98, 82, 63, 78, 54, 32, 15, 36, 9, 0, 0, 27], 'all_points_y': [652, 565, 577, 659, 669, 636, 627, 606, 609, 572, 581, 614, 626, 588, 597, 657, 642, 676, 690, 745, 745, 801, 791, 736, 728, 790, 780, 719, 709, 740, 645, 652], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [27, 50, 76, 46, 73, 88, 78, 85, 93, 100, 124, 115, 143, 150, 174, 159, 107, 93, 120, 102, 98, 82,

Creating JSONs for Detectron2 on ../NSO/train:  47%|████▋     | 37166/78324 [00:00<00:00, 87993.93it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [282, 359, 371, 444, 426, 468, 486, 538, 516, 558, 520, 415, 392, 368, 390, 233, 282], 'all_points_y': [649, 714, 698, 764, 786, 822, 804, 852, 884, 922, 969, 877, 907, 885, 852, 711, 649], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [282, 359, 371, 444, 426, 468, 486, 538, 516, 558, 520, 415, 392, 368, 390, 233, 282], 'all_points_y': [649, 714, 698, 764, 786, 822, 804, 852, 884, 922, 969, 877, 907, 885, 852, 711, 649], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [282, 359, 371, 444, 426, 468, 486, 538, 516, 558, 520, 415, 392, 368, 390, 233, 282], 'all_points_y': [649, 714, 698, 764, 786, 822, 804, 852, 884, 922, 969, 877, 907, 885, 852, 711, 649], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [282, 359, 371, 444, 426, 468, 486, 538, 516, 558, 52

Creating JSONs for Detectron2 on ../NSO/train:  59%|█████▉    | 46020/78324 [00:00<00:00, 87942.90it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [173, 177, 205, 199, 220, 225, 352, 341, 363, 378, 418, 431, 413, 420, 422, 431, 432, 408, 402, 408, 405, 396, 390, 373, 379, 371, 374, 383, 387, 360, 358, 361, 358, 346, 340, 325, 328, 324, 328, 335, 339, 324, 316, 311, 314, 305, 276, 281, 286, 289, 264, 263, 259, 232, 232, 218, 211, 204, 181, 181, 149, 130, 152, 144, 173], 'all_points_y': [867, 895, 888, 862, 858, 882, 848, 808, 801, 852, 841, 892, 899, 927, 944, 943, 961, 972, 953, 952, 934, 937, 914, 920, 941, 944, 953, 953, 976, 986, 964, 962, 949, 951, 929, 935, 952, 956, 970, 969, 994, 1000, 1000, 979, 978, 950, 961, 985, 983, 1000, 1000, 992, 978, 983, 992, 1000, 1000, 989, 998, 1000, 1000, 915, 909, 876, 867], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [173, 177, 205, 199, 220, 225, 352, 341, 363, 378, 418, 431, 413, 420, 422, 431, 432, 408, 402, 408, 405, 396, 390, 373, 379, 371, 374, 383, 387, 360, 358

Creating JSONs for Detectron2 on ../NSO/train:  81%|████████  | 63249/78324 [00:00<00:00, 82845.20it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [75, 58, 54, 84, 73, 70, 78, 76, 67, 65, 72, 69, 41, 47, 31, 33, 27, 22, 0, 0, 6, 0, 0, 20, 20, 8, 16, 79, 75], 'all_points_y': [458, 452, 469, 480, 479, 492, 497, 503, 500, 510, 514, 525, 514, 496, 488, 480, 477, 487, 477, 474, 460, 458, 437, 443, 438, 430, 413, 440, 458], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [75, 58, 54, 84, 73, 70, 78, 76, 67, 65, 72, 69, 41, 47, 31, 33, 27, 22, 0, 0, 6, 0, 0, 20, 20, 8, 16, 79, 75], 'all_points_y': [458, 452, 469, 480, 479, 492, 497, 503, 500, 510, 514, 525, 514, 496, 488, 480, 477, 487, 477, 474, 460, 458, 437, 443, 438, 430, 413, 440, 458], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [75, 58, 54, 84, 73, 70, 78, 76, 67, 65, 72, 69, 41, 47, 31, 33, 27, 22, 0, 0, 6, 0, 0, 20, 20, 8, 16, 79, 75], 'all_points_y': [458, 452, 469, 480, 479, 492, 497, 503, 500, 510,

Creating JSONs for Detectron2 on ../NSO/train: 100%|██████████| 78324/78324 [00:00<00:00, 87005.04it/s]


{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 987, 985, 1000], 'all_points_y': [71, 97, 97, 74, 71], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 987, 985, 1000], 'all_points_y': [71, 97, 97, 74, 71], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 987, 985, 1000], 'all_points_y': [71, 97, 97, 74, 71], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 991, 992, 963, 965, 1000], 'all_points_y': [818, 917, 917, 881, 878, 817, 818], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 991, 992, 963, 965, 1000], 'all_points_y': [818, 917, 917, 881, 878, 817, 818], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 991, 992, 9

Creating JSONs for Detectron2 on ../NSO/test:  38%|███▊      | 9809/26108 [00:00<00:00, 98085.42it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 998, 1000, 1000, 874, 866, 827, 848, 858, 869, 896, 940, 975, 988, 1000], 'all_points_y': [478, 503, 507, 508, 649, 547, 563, 533, 498, 506, 494, 517, 462, 490, 466, 478], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 998, 1000, 1000, 874, 866, 827, 848, 858, 869, 896, 940, 975, 988, 1000], 'all_points_y': [478, 503, 507, 508, 649, 547, 563, 533, 498, 506, 494, 517, 462, 490, 466, 478], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [1000, 1000, 998, 1000, 1000, 874, 866, 827, 848, 858, 869, 896, 940, 975, 988, 1000], 'all_points_y': [478, 503, 507, 508, 649, 547, 563, 533, 498, 506, 494, 517, 462, 490, 466, 478], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [429, 405, 357, 343, 367, 335, 245, 249, 224, 217, 201, 238, 284, 30

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [420, 401, 400, 356, 358, 379, 380, 379, 352, 348, 341, 341, 325, 325, 353, 353, 381, 380, 350, 351, 324, 324, 352, 352, 390, 389, 352, 353, 255, 255, 212, 214, 267, 272, 303, 303, 275, 275, 259, 259, 216, 216, 250, 273, 306, 304, 271, 266, 266, 214, 218, 266, 274, 305, 306, 363, 418, 420], 'all_points_y': [841, 841, 828, 830, 841, 838, 854, 885, 885, 883, 882, 877, 877, 899, 896, 902, 901, 935, 935, 928, 931, 952, 952, 959, 960, 983, 984, 1000, 1000, 990, 966, 947, 966, 948, 957, 939, 926, 914, 913, 937, 916, 895, 909, 904, 911, 885, 878, 867, 877, 872, 809, 828, 817, 828, 791, 789, 807, 841], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [420, 401, 400, 356, 358, 379, 380, 379, 352, 348, 341, 341, 325, 325, 353, 353, 381, 380, 350, 351, 324, 324, 352, 352, 390, 389, 352, 353, 255, 255, 212, 214, 267, 272, 303, 303, 275, 275, 259, 259, 216, 216, 250, 273, 306, 304,

Creating JSONs for Detectron2 on ../NSO/test: 100%|██████████| 26108/26108 [00:00<00:00, 85876.56it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [185, 227, 225, 241, 241, 312, 312, 351, 353, 313, 314, 354, 357, 317, 317, 355, 356, 345, 345, 315, 316, 234, 231, 221, 223, 205, 205, 174, 173, 161, 161, 185, 185], 'all_points_y': [31, 32, 7, 8, 0, 0, 4, 2, 26, 26, 33, 30, 58, 56, 69, 65, 99, 99, 90, 88, 100, 103, 37, 36, 100, 100, 41, 41, 30, 31, 0, 0, 31], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [185, 227, 225, 241, 241, 312, 312, 351, 353, 313, 314, 354, 357, 317, 317, 355, 356, 345, 345, 315, 316, 234, 231, 221, 223, 205, 205, 174, 173, 161, 161, 185, 185], 'all_points_y': [31, 32, 7, 8, 0, 0, 4, 2, 26, 26, 33, 30, 58, 56, 69, 65, 99, 99, 90, 88, 100, 103, 37, 36, 100, 100, 41, 41, 30, 31, 0, 0, 31], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [185, 227, 225, 241, 241, 312, 312, 351, 353, 313, 314, 354, 357, 317, 317, 355, 356, 345, 345, 315, 3


Creating JSONs for Detectron2 on ../NSO/val:   0%|          | 0/26108 [00:00<?, ?it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [504, 480, 432, 418, 442, 410, 320, 324, 299, 292, 276, 313, 359, 379, 362, 358, 352, 345, 369, 383, 377, 392, 399, 462, 468, 482, 475, 480, 504], 'all_points_y': [907, 932, 885, 901, 926, 961, 881, 875, 848, 855, 836, 796, 847, 825, 810, 813, 807, 793, 768, 787, 792, 810, 803, 868, 859, 878, 885, 884, 907], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [504, 480, 432, 418, 442, 410, 320, 324, 299, 292, 276, 313, 359, 379, 362, 358, 352, 345, 369, 383, 377, 392, 399, 462, 468, 482, 475, 480, 504], 'all_points_y': [907, 932, 885, 901, 926, 961, 881, 875, 848, 855, 836, 796, 847, 825, 810, 813, 807, 793, 768, 787, 792, 810, 803, 868, 859, 878, 885, 884, 907], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [504, 480, 432, 418, 442, 410, 320, 324, 299, 292, 276, 313, 359, 379, 362, 358, 352, 345, 369, 383, 377, 39

Creating JSONs for Detectron2 on ../NSO/val:  62%|██████▏   | 16117/26108 [00:00<00:00, 81265.73it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [915, 896, 905, 889, 949, 915], 'all_points_y': [39, 20, 11, 0, 0, 39], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [915, 896, 905, 889, 949, 915], 'all_points_y': [39, 20, 11, 0, 0, 39], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [915, 896, 905, 889, 949, 915], 'all_points_y': [39, 20, 11, 0, 0, 39], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [221, 194, 181, 214, 220, 253, 246, 465, 552, 562, 512, 515, 477, 473, 371, 377, 487, 519, 519, 569, 577, 531, 533, 497, 494, 385, 389, 226, 221], 'all_points_y': [977, 991, 936, 928, 950, 939, 882, 835, 823, 860, 865, 877, 889, 874, 899, 945, 922, 914, 923, 910, 943, 954, 962, 978, 962, 984, 1000, 1000, 977], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_p

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [46, 65, 74, 89, 76, 88, 99, 117, 107, 162, 130, 126, 115, 103, 120, 116, 97, 85, 103, 90, 69, 52, 77, 63, 42, 0, 0, 46], 'all_points_y': [556, 577, 565, 578, 593, 604, 591, 609, 624, 678, 715, 711, 728, 713, 692, 688, 712, 697, 673, 662, 691, 672, 644, 631, 662, 619, 605, 556], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [46, 65, 74, 89, 76, 88, 99, 117, 107, 162, 130, 126, 115, 103, 120, 116, 97, 85, 103, 90, 69, 52, 77, 63, 42, 0, 0, 46], 'all_points_y': [556, 577, 565, 578, 593, 604, 591, 609, 624, 678, 715, 711, 728, 713, 692, 688, 712, 697, 673, 662, 691, 672, 644, 631, 662, 619, 605, 556], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [46, 65, 74, 89, 76, 88, 99, 117, 107, 162, 130, 126, 115, 103, 120, 116, 97, 85, 103, 90, 69, 52, 77, 63, 42, 0, 0, 46], 'all_points_y': [556, 577, 565, 578, 593, 604,

Creating JSONs for Detectron2 on ../NSO/val: 100%|██████████| 26108/26108 [00:00<00:00, 83187.01it/s]

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [7, 26, 44, 23, 75, 0, 0, 7], 'all_points_y': [933, 912, 929, 951, 1000, 1000, 926, 933], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [7, 26, 44, 23, 75, 0, 0, 7], 'all_points_y': [933, 912, 929, 951, 1000, 1000, 926, 933], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [7, 26, 44, 23, 75, 0, 0, 7], 'all_points_y': [933, 912, 929, 951, 1000, 1000, 926, 933], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [877, 863, 879, 877], 'all_points_y': [7, 0, 0, 7], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [877, 863, 879, 877], 'all_points_y': [7, 0, 0, 7], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [877, 863, 879, 877], 'all_points_y': [7




In [12]:
# Create single via_region_data training dataset => can be skipped if only one .json file.=>But then change file name
for d in ["train", "test", "val"]:
    jsons = [os.path.join(nso_path, d, "nso.json")]
    result = {}
    for file in jsons:
        with open(file, "r") as f:
            loaded = json.load(f)
            
        #https://realpython.com/iterate-through-dictionary-python/
        for key, value in loaded.items():
            result[key] = value
    via_region_p = os.path.join(nso_path, d, "via_region_data.json")
    with open(via_region_p, "w") as file:
        json.dump(result, file)
        
    print(f"Done creating JSONs {d}")

Done creating JSONs train
Done creating JSONs test
Done creating JSONs val


In [None]:
### From here on, old version that works ####

In [33]:
# Divide dataset and set a random seed for reproducibility of the splits for next script
# TODO: path nso_train and nso_geojson to tiling.py output => Better move output of tiling to those 2 folder in NSO/ 

RANDOM_SEED = 560


# Change name to dataset...
nso_train = "../NSO/train"
nso_test = "../NSO/test"
nso_val = "../NSO/val"
nso_geojson = "../NSO/geojsons"
nso_small_tiles = "../NSO/NSO_small_tiles"


In [34]:
# Create JSONs for Detectron2 NO test set
#nso_images = grab_certain_file(".tif", nso_small_tiles)
#train, val = train_test_split(nso_images, test_size=0.2, random_state=RANDOM_SEED)

# Create JSONs for Detectron2 WITH test set
nso_images = grab_certain_file(".tif", nso_small_tiles)
train, test = train_test_split(nso_images, test_size=0.20, random_state=RANDOM_SEED)
train, val = train_test_split(train, test_size=0.25, random_state=RANDOM_SEED)

In [35]:
# Training set

# Create an empty dictionary to store the training set of annotations and their pixel coordinates
train_dict = {}

# Loop over each image in the training set
for file in tqdm(train, desc="Creating JSONs for Detectron2 on train", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
    file_path = os.path.join(nso_small_tiles, file)
    img_id = file.split(".tif")[0]
    geojson_path = os.path.join(nso_geojson, f"{img_id}.geojson")
    
    #Not all tiles have annotations, thus:
    if os.path.exists(geojson_path):
        
        # Load the geojson in gj
        with open(geojson_path) as f:
            gj = geojson.load(f)
        
        # Create a dictionary to store the regions (annotations spatial features) for the image
        regions = {}
        num_buildings = len(gj["features"])
        #print (num_buildings) 
        
        # Open the image with gdal to get pixel size and origin if feature exists
        if num_buildings > 0:
            gdal_image = gdal.Open(file_path)
            
            # Get the pixel width and height(0.5 for nso) and the origin coordinates
            #https://www.gis.usu.edu/~chrisg/python/2009/lectures/ospy_slides4.pdf
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]
            
            # Loop over each building/assets in the image
            for i in range(num_buildings):
                
                # Get the polygon points for the asset
                #https://stackoverflow.com/questions/23306653/python-accessing-nested-json-data
                points = gj["features"][i]["geometry"]["coordinates"][0]
                
                # If there is only one point, unwarp it=>check
                if len(points) == 1:
                    points = points[0]

                #Empty lists to store pixel coordinates
                all_points_x, all_points_y = [], []
                
                # Convert the lat/long points to pixel coordinates by substacting origin
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))
                    
                # Create a dictionary to store the asset footprint
                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }
                #print (regions)
        #Should probably save origin x and y here but we still have the og tiles and imgid and allow to stich tiles back together
        #TODO: same for tiles without annot.Eg create json with empty regions in the else
        dictionary = {"file_ref": '',
                      "size": os.path.getsize(file_path),
                      "filename": file.replace(".tif", ".png"),
                      "base64_img_data": '',
                      "file_attributes": {},
                      "regions": regions,
                      "origin_x": originX,
                      "origin_y": originY
                     }
        #print (dictionary)
        train_dict[file.replace(".tif", ".png")] = dictionary
    else:
        continue

with open("../NSO/train/nso.json", "w") as f:
    json.dump(train_dict, f)

Creating JSONs for Detectron2 on train: 100%|██████████| 14846/14846 [00:00<00:00, 80333.15it/s]


In [36]:
#Validation set
val_dict = {}

for file in tqdm(val, desc="Creating JSONs for Detectron2 on val", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
    file_path = os.path.join(nso_small_tiles, file)
    img_id = file.split(".tif")[0]
    geojson_path = os.path.join(nso_geojson, f"{img_id}.geojson")
    if os.path.exists(geojson_path): 
        with open(geojson_path) as f:
            gj = geojson.load(f)

        regions = {}
        num_buildings = len(gj["features"])
        if num_buildings > 0:
            gdal_image = gdal.Open(file_path)
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]

            for i in range(num_buildings):
                points = gj["features"][i]["geometry"]["coordinates"][0]
                if len(points) == 1:
                    points = points[0]

                all_points_x, all_points_y = [], []
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))

                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }

        dictionary = {"file_ref": '',
                      "size": os.path.getsize(file_path),
                      "filename": file.replace(".tif", ".png"),
                      "base64_img_data": '',
                      "file_attributes": {},
                      "regions": regions,
                      "origin_x": originX,
                      "origin_y": originY
                     }

        val_dict[file.replace(".tif", ".png")] = dictionary

with open("../NSO/val/nso.json", "w") as f:
    json.dump(val_dict, f)

Creating JSONs for Detectron2 on val: 100%|██████████| 4949/4949 [00:00<00:00, 67255.09it/s]


In [37]:
#Test set
test_dict = {}

for file in tqdm(test, desc="Creating JSONs for Detectron2 on test", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
    file_path = os.path.join(nso_small_tiles, file)
    img_id = file.split(".tif")[0]
    geojson_path = os.path.join(nso_geojson, f"{img_id}.geojson")
    if os.path.exists(geojson_path): 
        with open(geojson_path) as f:
            gj = geojson.load(f)

        regions = {}
        num_buildings = len(gj["features"])
        if num_buildings > 0:
            gdal_image = gdal.Open(file_path)
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]

            for i in range(num_buildings):
                points = gj["features"][i]["geometry"]["coordinates"][0]
                if len(points) == 1:
                    points = points[0]

                all_points_x, all_points_y = [], []
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))

                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }

        dictionary = {"file_ref": '',
                      "size": os.path.getsize(file_path),
                      "filename": file.replace(".tif", ".png"),
                      "base64_img_data": '',
                      "file_attributes": {},
                      "regions": regions,
                      "origin_x": originX,
                      "origin_y": originY
                     }

        test_dict[file.replace(".tif", ".png")] = dictionary

with open("../NSO/test/nso.json", "w") as f:
    json.dump(test_dict, f)

Creating JSONs for Detectron2 on test: 100%|██████████| 4949/4949 [00:00<00:00, 69476.43it/s]


In [38]:
# Create single via_region_data training dataset => can be skipped if only one .json file.=>But then change file name
jsons = ["../NSO/train/nso.json"]

result = {}
for file in jsons:
    with open(file, "r") as f:
        loaded = json.load(f)
        
    #https://realpython.com/iterate-through-dictionary-python/
    for key, value in loaded.items():
        result[key] = value

with open("../NSO/train/via_region_data.json", "w") as file:
    json.dump(result, file)
    
print("Done creating JSONs train")

Done creating JSONs train


In [39]:
# Create via_region JSON for entire validation dataset => can be skipped if only one .json file.=>But then change file name
jsons = ["../NSO/val/nso.json"]

result = {}
for file in jsons:
    with open(file, "r") as f:
        loaded = json.load(f)
    for key, value in loaded.items():
        result[key] = value

with open("../NSO/val/via_region_data.json", "w") as file:
    json.dump(result, file)

print("Done creating JSONs val")


Done creating JSONs val


In [40]:
# Create via_region JSON for entire test dataset => can be skipped if only one .json file.=>But then change file name
jsons = ["../NSO/test/nso.json"]

result = {}
for file in jsons:
    with open(file, "r") as f:
        loaded = json.load(f)
    for key, value in loaded.items():
        result[key] = value

with open("../NSO/test/via_region_data.json", "w") as file:
    json.dump(result, file)

print("Done creating JSONs test")

Done creating JSONs test
