In [1]:
import os
import json
import geojson
import pandas as pd
from tqdm import tqdm
from osgeo import gdal
from sklearn.model_selection import train_test_split

# custom functions
import sys
sys.path.append('../')
from utils.functions import grab_certain_file
# TODO delete functions.py in data_preperation folder. Use main utils instead.

'''
Read NSO tiles and annotations geojsons, convert lat/lon of tile to pixel coordinates and save pixel coordinates into
.json file If more than one .json file can be saved as one via_regions.json.
Source:  https://github.com/rl02898/detectron2-spacenet. JDP Edits:some and saving json with origins of tile in jsons to allow stiching back of the tiles. Add empty json for tiles without nnotation to allow their reading in D2.
'''

In [2]:
### ARGS function ###

nso_path = "../NSO"
train_path = os.path.join(nso_path, "train")
test_path = os.path.join(nso_path, "test")
val_path = os.path.join(nso_path, "val")
geojson_path = os.path.join(nso_path, "geojsons")
small_tiles_path = os.path.join(nso_path, "NSO_small_tiles")

In [3]:
# Divide dataset and set a random seed for reproducibility of the splits for next script

RANDOM_SEED = 560

# Create JSONs for Detectron2 NO test set
#nso_images = grab_certain_file(".tif", small_tiles_path)
#train, val = train_test_split(nso_images, test_size=0.2, random_state=RANDOM_SEED)

# Create JSONs for Detectron2 WITH test set
nso_images = grab_certain_file(".tif", small_tiles_path)
train, test = train_test_split(nso_images, test_size=0.20, random_state=RANDOM_SEED)
train, val = train_test_split(train, test_size=0.25, random_state=RANDOM_SEED)

In [4]:
def geojson_to_json_pix_coords(dataset_split, small_tiles_path, geojson_path, dataset_path):
    """
    Converts geojson annotations to JSON format with pixel coordinates.

    Args:
        dataset_split (list): List of image files in the dataset split:train,test or val
        small_tiles_path (str): Path to the directory containing the small tiles.tif.
        geojson_path (str): Path to the directory containing the geojson files of the annotations.
        dataset_path (str): Path to the datasets train, val or test.

    Returns:
        None

    Description:
        This function iterates over each image in the dataset split and converts the corresponding geojson
        annotations to JSON format, with pixel coordinates calculated using GDAL. It creates a dictionary
        containing image file information and a regions dictionary storing the asset footprints with their
        respective shape attributes. The resulting JSON file is saved as "nso.json" in the dataset path.Images 
        with no annotation have "regions= {}"
    """
  
    # Create an empty dictionary to store the training/test/val set of annotations and their pixel coordinates
    dataset_dict = {}

    # Loop over each image in the training set
    for file in tqdm(dataset_split, desc=f"Creating JSONs for Detectron2 on {dataset_path}", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
        file_path = os.path.join(small_tiles_path, file)
        img_id = file.split(".tif")[0]
        geojson_image = os.path.join(geojson_path, f"{img_id}.geojson")

        #Not all tiles have annotations, thus:
        if os.path.exists(geojson_image):

            # Load the geojson in gj
            with open(geojson_image) as f:
                gj = geojson.load(f)

            # Create a dictionary to store the regions (annotations spatial features) for the image
            regions = {}
            num_buildings = len(gj["features"])
            #print (num_buildings) 

            # Open the image with gdal to get pixel size and origin if feature exists
            #if num_buildings > 0:
            gdal_image = gdal.Open(file_path)

            # Get the pixel width and height(0.5 for nso) and the origin coordinates
            #https://www.gis.usu.edu/~chrisg/python/2009/lectures/ospy_slides4.pdf
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]

            # Loop over each building/assets in the image
            for i in range(num_buildings):

                # Get the polygon points for the asset
                #https://stackoverflow.com/questions/23306653/python-accessing-nested-json-data
                points = gj["features"][i]["geometry"]["coordinates"][0]

                # If there is only one point, unwarp it=>check
                if len(points) == 1:
                    points = points[0]

                #Empty lists to store pixel coordinates
                all_points_x, all_points_y = [], []

                # Convert the lat/long points to pixel coordinates by substacting origin
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))

                # Create a dictionary to store the asset footprint
                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }
                #print (regions)
            #Should probably save origin x and y here but we still have the og tiles and imgid and allow to stich tiles back together
            #TODO: same for tiles without annot.Eg create json with empty regions in the else
            dictionary = {"file_ref": '',
                          "size": os.path.getsize(file_path),
                          "filename": file.replace(".tif", ".png"),
                          "base64_img_data": '',
                          "file_attributes": {},
                          "regions": regions,
                          "origin_x": originX,
                          "origin_y": originY
                         }
            #print (dictionary)
            dataset_dict[file.replace(".tif", ".png")] = dictionary
        else:
            # region is empty
            
            # still save data dic with empty regions and origins
            gdal_image = gdal.Open(file_path)
            # Get the pixel width and height(0.5 for nso) and the origin coordinates
            #https://www.gis.usu.edu/~chrisg/python/2009/lectures/ospy_slides4.pdf
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]
            
            dictionary = {"file_ref": '',
                          "size": os.path.getsize(file_path),
                          "filename": file.replace(".tif", ".png"),
                          "base64_img_data": '',
                          "file_attributes": {},
                          "regions": {},
                          "origin_x": originX,
                          "origin_y": originY
                         }
            #print (dictionary)
        dataset_dict[file.replace(".tif", ".png")] = dictionary
            
    jsons_path = os.path.join(dataset_path,"nso_with_empty_annotations.json")
    with open(jsons_path, "w") as f:
        json.dump(dataset_dict, f)

In [5]:
geojson_to_json_pix_coords(train, small_tiles_path, geojson_path, train_path)
geojson_to_json_pix_coords(test, small_tiles_path, geojson_path, test_path)
geojson_to_json_pix_coords(val, small_tiles_path, geojson_path, val_path)

Creating JSONs for Detectron2 on ../NSO/train: 100%|██████████| 78324/78324 [53:46<00:00, 24.27it/s]  
Creating JSONs for Detectron2 on ../NSO/test: 100%|██████████| 26108/26108 [16:50<00:00, 25.84it/s]
Creating JSONs for Detectron2 on ../NSO/val: 100%|██████████| 26108/26108 [15:47<00:00, 27.56it/s]


In [8]:
# Create single via_region_data training dataset => can be skipped if only one .json file.=>But then change file name
for d in ["train", "test", "val"]:
    jsons = [os.path.join(nso_path, d, "nso_with_empty_annotations.json")]
    result = {}
    for file in jsons:
        with open(file, "r") as f:
            loaded = json.load(f)
            
        #https://realpython.com/iterate-through-dictionary-python/
        for key, value in loaded.items():
            result[key] = value
    via_region_p = os.path.join(nso_path, d, "via_region_data_with_empty_annotations.json")
    with open(via_region_p, "w") as file:
        json.dump(result, file)
        
    print(f"Done creating JSONs {d}")
    
    

Done creating JSONs train
Done creating JSONs test
Done creating JSONs val


In [15]:
import pandas as pd
import json
# to check is the regions are well writen
#file_path_json = "../NSO/val/nso_with_empty_annotations.json"

 
train = "../NSO/train/via_region_data_with_no_annotations.json"
val = "../NSO/val/via_region_data_with_no_annotations.json"
test = "../NSO/test/via_region_data_with_no_annotations.json"
pths = [train, val, test]

In [17]:
dfs = []
for path in pths:
    df = pd.read_json(file_path_json, orient='index')
    dfs.append(df)

train_df = dfs[0]
val_df = dfs[1]
test_df = dfs[2]

In [20]:
train_df.head()

Unnamed: 0,file_ref,size,filename,base64_img_data,file_attributes,regions,origin_x,origin_y
150_20220614_105854_SV1-03_SV_RD_8bit_RGB_50cm_Veghel_11000_5000.png,,3037173,150_20220614_105854_SV1-03_SV_RD_8bit_RGB_50cm...,,{},{},162032.5,408122.0
87_20220718_101820_SV1-01_SV_RD_8bit_RGB_50cm_Oploo_11000_18000.png,,3049207,87_20220718_101820_SV1-01_SV_RD_8bit_RGB_50cm_...,,{},{},190134.5,408473.5
162_20220929_104939_SV1-03_SV_RD_8bit_RGB_50cm_Heino_21000_7000.png,,3006442,162_20220929_104939_SV1-03_SV_RD_8bit_RGB_50cm...,,{},{},206648.5,488978.0
104_20221009_111411_SV1-03_SV_RD_8bit_RGB_50cm_Driehuizen_19000_29000.png,,3342059,104_20221009_111411_SV1-03_SV_RD_8bit_RGB_50cm...,,{},{},122634.0,514784.0
29_20221101_105023_SV1-04_SV_RD_8bit_RGB_50cm_Dalfsen_11000_19000.png,,3225428,29_20221101_105023_SV1-04_SV_RD_8bit_RGB_50cm_...,,{},{},219572.5,505988.0


In [30]:
i=0
e=0
for index, row in train_df.iterrows():

    if row['regions'] == {}:
        # Perform desired actions for matching rows
        i=i+1
    else:
        e=e+1
        print (row['regions'])
        # Additional actions...
print("no regions: ", i,"with regions: ",  e)

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [504, 480, 432, 418, 442, 410, 320, 324, 299, 292, 276, 313, 359, 379, 362, 358, 352, 345, 369, 383, 377, 392, 399, 462, 468, 482, 475, 480, 504], 'all_points_y': [907, 932, 885, 901, 926, 961, 881, 875, 848, 855, 836, 796, 847, 825, 810, 813, 807, 793, 768, 787, 792, 810, 803, 868, 859, 878, 885, 884, 907], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [504, 480, 432, 418, 442, 410, 320, 324, 299, 292, 276, 313, 359, 379, 362, 358, 352, 345, 369, 383, 377, 392, 399, 462, 468, 482, 475, 480, 504], 'all_points_y': [907, 932, 885, 901, 926, 961, 881, 875, 848, 855, 836, 796, 847, 825, 810, 813, 807, 793, 768, 787, 792, 810, 803, 868, 859, 878, 885, 884, 907], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [7, 17, 23, 15, 28, 60, 51, 73, 65, 56, 40, 23, 35, 38, 26, 17, 0, 0, 0, 7], 'all_points_y': [523, 528, 511,

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [421, 360, 348, 331, 344, 306, 427, 421], 'all_points_y': [35, 6, 68, 61, 15, 0, 0, 35], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [421, 360, 348, 331, 344, 306, 427, 421], 'all_points_y': [35, 6, 68, 61, 15, 0, 0, 35], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [662, 661, 660, 662, 664, 671, 773, 929, 944, 1000, 1000, 942, 658, 662], 'all_points_y': [390, 372, 356, 360, 365, 371, 153, 223, 206, 240, 450, 565, 405, 390], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [662, 661, 660, 662, 664, 671, 773, 929, 944, 1000, 1000, 942, 658, 662], 'all_points_y': [390, 372, 356, 360, 365, 371, 153, 223, 206, 240, 450, 565, 405, 390], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [686, 685, 684, 6

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [368, 365, 389, 389, 365, 364, 385, 386, 361, 362, 387, 388, 364, 364, 386, 386, 411, 417, 469, 471, 413, 411, 471, 470, 413, 416, 470, 470, 0, 0, 9, 5, 21, 24, 186, 220, 283, 284, 324, 323, 297, 301, 368], 'all_points_y': [483, 573, 575, 599, 599, 609, 612, 630, 633, 651, 652, 675, 678, 696, 693, 714, 715, 787, 788, 868, 874, 905, 906, 962, 959, 999, 997, 1000, 1000, 984, 984, 860, 866, 564, 571, 550, 557, 586, 587, 539, 539, 485, 483], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [368, 365, 389, 389, 365, 364, 385, 386, 361, 362, 387, 388, 364, 364, 386, 386, 411, 417, 469, 471, 413, 411, 471, 470, 413, 416, 470, 470, 0, 0, 9, 5, 21, 24, 186, 220, 283, 284, 324, 323, 297, 301, 368], 'all_points_y': [483, 573, 575, 599, 599, 609, 612, 630, 633, 651, 652, 675, 678, 696, 693, 714, 715, 787, 788, 868, 874, 905, 906, 962, 959, 999, 997, 1000, 1000, 984, 984, 860, 866, 

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [827, 813, 830, 823, 803, 789, 811, 819, 829, 834, 839, 846, 833, 840, 895, 881, 875, 827], 'all_points_y': [739, 725, 703, 694, 719, 702, 678, 663, 655, 648, 655, 647, 639, 631, 680, 684, 677, 739], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [827, 813, 830, 823, 803, 789, 811, 819, 829, 834, 839, 846, 833, 840, 895, 881, 875, 827], 'all_points_y': [739, 725, 703, 694, 719, 702, 678, 663, 655, 648, 655, 647, 639, 631, 680, 684, 677, 739], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [10, 25, 14, 25, 34, 46, 45, 38, 34, 50, 56, 53, 55, 62, 58, 68, 75, 104, 95, 109, 116, 169, 193, 189, 206, 194, 178, 173, 183, 174, 162, 157, 167, 158, 146, 142, 155, 144, 131, 128, 138, 128, 116, 111, 121, 118, 120, 126, 134, 121, 99, 64, 72, 63, 53, 49, 57, 46, 35, 10], 'all_points_y': [305, 296, 276, 267, 290, 279, 274, 27

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [15, 5, 21, 15, 17, 28, 40, 36, 37, 47, 62, 48, 74, 80, 98, 91, 96, 105, 115, 107, 114, 110, 71, 71, 104, 98, 0, 0, 15], 'all_points_y': [452, 475, 480, 515, 516, 479, 481, 517, 521, 475, 479, 529, 533, 504, 509, 538, 540, 510, 513, 546, 549, 572, 561, 566, 584, 614, 584, 448, 452], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [15, 5, 21, 15, 17, 28, 40, 36, 37, 47, 62, 48, 74, 80, 98, 91, 96, 105, 115, 107, 114, 110, 71, 71, 104, 98, 0, 0, 15], 'all_points_y': [452, 475, 480, 515, 516, 479, 481, 517, 521, 475, 479, 529, 533, 504, 509, 538, 540, 510, 513, 546, 549, 572, 561, 566, 584, 614, 584, 448, 452], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [200, 138, 120, 117, 191, 200], 'all_points_y': [44, 71, 6, 0, 0, 44], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', '

{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [49, 62, 87, 113, 105, 115, 137, 109, 86, 18, 12, 2, 5, 0, 0, 12, 88, 92, 49], 'all_points_y': [732, 760, 746, 763, 768, 795, 806, 856, 844, 893, 883, 890, 898, 900, 712, 707, 713, 729, 732], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [49, 62, 87, 113, 105, 115, 137, 109, 86, 18, 12, 2, 5, 0, 0, 12, 88, 92, 49], 'all_points_y': [732, 760, 746, 763, 768, 795, 806, 856, 844, 893, 883, 890, 898, 900, 712, 707, 713, 729, 732], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [570, 546, 564, 609, 596, 603, 616, 630, 628, 644, 646, 680, 677, 688, 690, 717, 704, 687, 680, 643, 628, 580, 583, 574, 571, 545, 570], 'all_points_y': [792, 781, 736, 756, 794, 798, 756, 761, 768, 772, 765, 782, 789, 790, 781, 789, 843, 838, 858, 842, 884, 864, 858, 854, 864, 851, 792], 'category': 0}, 'region_attributes': {}}, '1': {'shape