In [1]:
import os
import json
import geojson
import pandas as pd
from tqdm import tqdm
from osgeo import gdal
from sklearn.model_selection import train_test_split

# custom functions
import sys
sys.path.append('../')
from utils.functions import grab_certain_file
# TODO delete functions.py in data_preperation folder. Use main utils instead.

'''
Read NSO tiles and annotations geojsons, convert lat/lon of tile to pixel coordinates and save pixel coordinates into
.json file If more than on .json file can be saved as one via_regions.json.
Source:  https://github.com/rl02898/detectron2-spacenet. JDP Edits:some and saving json with origins of tile in jsons to allow stiching back
of the tiles.
'''

In [3]:
### ARGS function ###

nso_path = "../NSO_1000"
train_path = os.path.join(nso_path, "train")
test_path = os.path.join(nso_path, "test")
val_path = os.path.join(nso_path, "val")
geojson_path = os.path.join(nso_path, "geojsons")
small_tiles_path = os.path.join(nso_path, "NSO_small_tiles")

In [4]:
# Divide dataset and set a random seed for reproducibility of the splits for next script

RANDOM_SEED = 560

# Create JSONs for Detectron2 NO test set
#nso_images = grab_certain_file(".tif", small_tiles_path)
#train, val = train_test_split(nso_images, test_size=0.2, random_state=RANDOM_SEED)

# Create JSONs for Detectron2 WITH test set
nso_images = grab_certain_file(".tif", small_tiles_path)
train, test = train_test_split(nso_images, test_size=0.20, random_state=RANDOM_SEED)
train, val = train_test_split(train, test_size=0.25, random_state=RANDOM_SEED)

In [7]:
def geojson_to_json_pix_coords(dataset_split, small_tiles_path, geojson_path, dataset_path):
    """
    Converts geojson annotations to JSON format with pixel coordinates.

    Args:
        dataset_split (list): List of image files in the dataset split:train,test or val
        small_tiles_path (str): Path to the directory containing the small tiles.tif.
        geojson_path (str): Path to the directory containing the geojson files of the annotations.
        dataset_path (str): Path to the datasets train, val or test.

    Returns:
        None

    Description:
        This function iterates over each image in the dataset split and converts the corresponding geojson
        annotations to JSON format, with pixel coordinates calculated using GDAL. It creates a dictionary
        containing image file information and a regions dictionary storing the asset footprints with their
        respective shape attributes. The resulting JSON file is saved as "nso.json" in the dataset path.Images 
        with no annotation have regions= {}
    """
  
    # Create an empty dictionary to store the training/test/val set of annotations and their pixel coordinates
    dataset_dict = {}

    # Loop over each image in the training set
    for file in tqdm(dataset_split, desc=f"Creating JSONs for Detectron2 on {dataset_path}", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
        file_path = os.path.join(small_tiles_path, file)
        img_id = file.split(".tif")[0]
        geojson_image = os.path.join(geojson_path, f"{img_id}.geojson")

        #Not all tiles have annotations, thus:
        if os.path.exists(geojson_image):

            # Load the geojson in gj
            with open(geojson_image) as f:
                gj = geojson.load(f)

            # Create a dictionary to store the regions (annotations spatial features) for the image
            regions = {}
            num_buildings = len(gj["features"])
            #print (num_buildings) 

            # Open the image with gdal to get pixel size and origin if feature exists
            #if num_buildings > 0:
            gdal_image = gdal.Open(file_path)

            # Get the pixel width and height(0.5 for nso) and the origin coordinates
            #https://www.gis.usu.edu/~chrisg/python/2009/lectures/ospy_slides4.pdf
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]

            # Loop over each building/assets in the image
            for i in range(num_buildings):

                # Get the polygon points for the asset
                #https://stackoverflow.com/questions/23306653/python-accessing-nested-json-data
                points = gj["features"][i]["geometry"]["coordinates"][0]

                # If there is only one point, unwarp it=>check
                if len(points) == 1:
                    points = points[0]

                #Empty lists to store pixel coordinates
                all_points_x, all_points_y = [], []

                # Convert the lat/long points to pixel coordinates by substacting origin
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))

                # Create a dictionary to store the asset footprint
                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }
                #print (regions)
            #Should probably save origin x and y here but we still have the og tiles and imgid and allow to stich tiles back together
            #TODO: same for tiles without annot.Eg create json with empty regions in the else
            dictionary = {"file_ref": '',
                          "size": os.path.getsize(file_path),
                          "filename": file.replace(".tif", ".png"),
                          "base64_img_data": '',
                          "file_attributes": {},
                          "regions": regions,
                          "origin_x": originX,
                          "origin_y": originY
                         }
            #print (dictionary)
            dataset_dict[file.replace(".tif", ".png")] = dictionary
        else:
            # region is empty
            
            # stl save data dic with empty regions and origins
            gdal_image = gdal.Open(file_path)
            # Get the pixel width and height(0.5 for nso) and the origin coordinates
            #https://www.gis.usu.edu/~chrisg/python/2009/lectures/ospy_slides4.pdf
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]
            
            dictionary = {"file_ref": '',
                          "size": os.path.getsize(file_path),
                          "filename": file.replace(".tif", ".png"),
                          "base64_img_data": '',
                          "file_attributes": {},
                          "regions": {},
                          "origin_x": originX,
                          "origin_y": originY
                         }
            #print (dictionary)
        dataset_dict[file.replace(".tif", ".png")] = dictionary
            
    jsons_path = os.path.join(dataset_path,"nso_with_no_annotations.json")
    with open(jsons_path, "w") as f:
        json.dump(dataset_dict, f)

In [30]:
geojson_to_json_pix_coords(train, small_tiles_path, geojson_path, train_path)
geojson_to_json_pix_coords(test, small_tiles_path, geojson_path, test_path)
geojson_to_json_pix_coords(val, small_tiles_path, geojson_path, val_path)

Creating JSONs for Detectron2 on ../NSO_1000/val: 100%|██████████| 4949/4949 [00:06<00:00, 819.62it/s] 


In [32]:
# Create single via_region_data training dataset => can be skipped if only one .json file.=>But then change file name
for d in ["train", "test", "val"]:
    jsons = [os.path.join(nso_path, d, "nso_with_no_annotations.json.json")]
    result = {}
    for file in jsons:
        with open(file, "r") as f:
            loaded = json.load(f)
            
        #https://realpython.com/iterate-through-dictionary-python/
        for key, value in loaded.items():
            result[key] = value
    via_region_p = os.path.join(nso_path, d, "via_region_data__with_no_annotations.json")
    with open(via_region_p, "w") as file:
        json.dump(result, file)
        
    print(f"Done creating JSONs {d}")
    
    

Done creating JSONs train
Done creating JSONs test
Done creating JSONs val


In [None]:
# to check is the regions are well writen
file_path_json = "../NSO_1000/val/nso_with_no_annotations.json"

with open (file_path_json) as f:
    data = json.load(f)
key = "25_20220730_110228_SV2-01_SV_RD_8bit_RGB_50cm_Ketelmeer_24000_30000.png"


if key in data:
    print(data[key])