In [46]:
import os
import json
import geojson
import pandas as pd
from tqdm import tqdm
from osgeo import gdal
from sklearn.model_selection import train_test_split

# custom functions
import sys
sys.path.append('../')
from utils.functions import grab_certain_file
# TODO delete functions.py in data_preperation folder. Use main utils instead.

'''
Read NSO tiles and annotations geojsons, convert lat/lon of tile to pixel coordinates and save pixel coordinates into
.json file If more than on .json file can be saved as one via_regions.json.
Source:  https://github.com/rl02898/detectron2-spacenet. JDP Edits:some and saving json with origins of tile in jsons to allow stiching back
of the tiles.
'''

In [41]:
# Divide dataset and set a random seed for reproducibility of the splits for next script
# TODO: path nso_train and nso_geojson to tiling.py output => Better move output of tiling to those 2 folder in NSO/ 

RANDOM_SEED = 560


# Change name to dataset...
nso_train = "../NSO/train"
nso_val = "../NSO/val"
nso_geojson = "../NSO/geojsons"
#nso_images = grab_certain_file(".tif", nso_train)
#train, val = train_test_split(nso_images, test_size=0.2, random_state=RANDOM_SEED)
#train = nso_images

In [48]:
RANDOM_SEED = 560

nso_train = "../NSO/NSO_small_tiles"
nso_geojson = "../NSO/geojsons"
# Create JSONs for Detectron2
nso_images = grab_certain_file(".tif", nso_train)
train, val = train_test_split(nso_images, test_size=0.2, random_state=RANDOM_SEED)

In [49]:
# Training set

# Create an empty dictionary to store the training set of annotations and their pixel coordinates
train_dict = {}

# Loop over each image in the training set
for file in tqdm(train, desc="Creating JSONs for Detectron2 on train", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
    file_path = os.path.join(nso_train, file)
    img_id = file.split(".tif")[0]
    geojson_path = os.path.join(nso_geojson, f"{img_id}.geojson")
    
    #Not all tiles have annotations, thus:
    if os.path.exists(geojson_path):
        
        # Load the geojson in gj
        with open(geojson_path) as f:
            gj = geojson.load(f)
        
        # Create a dictionary to store the regions (annotations spatial features) for the image
        regions = {}
        num_buildings = len(gj["features"])
        print (num_buildings) 
        # Open the image with gdal to get pixel size and origin if feature exists
        if num_buildings > 0:
            gdal_image = gdal.Open(file_path)
            
            # Get the pixel width and height(0.5 for nso) and the origin coordinates
            #https://www.gis.usu.edu/~chrisg/python/2009/lectures/ospy_slides4.pdf
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]
            
            # Loop over each building/assets in the image
            for i in range(num_buildings):
                
                # Get the polygon points for the asset
                #https://stackoverflow.com/questions/23306653/python-accessing-nested-json-data
                points = gj["features"][i]["geometry"]["coordinates"][0]
                
                # If there is only one point, unwarp it=>check
                if len(points) == 1:
                    points = points[0]

                #Empty lists to store pixel coordinates
                all_points_x, all_points_y = [], []
                
                # Convert the lat/long points to pixel coordinates by substacting origin
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))
                    
                # Create a dictionary to store the asset footprint
                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }
                print (regions)
        #Should probably save origin x and y here but we still have the og tiles and imgid and allow to stich tiles back together
        #TODO: same for tiles without annot.Eg create json with empty regions in the else
        dictionary = {"file_ref": '',
                      "size": os.path.getsize(file_path),
                      "filename": file.replace(".tif", ".png"),
                      "base64_img_data": '',
                      "file_attributes": {},
                      "regions": regions,
                      "origin_x": originX,
                      "origin_y": originY
                     }
        print (dictionary)
        train_dict[file.replace(".tif", ".png")] = dictionary
    else:
        continue

with open("../NSO/train/nso.json", "w") as f:
    json.dump(train_dict, f)

Creating JSONs for Detectron2 on train:  74%|███████▎  | 1656/2249 [00:00<00:00, 8459.23it/s]

2
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [503, 487, 476, 506, 492, 535, 542, 569, 566, 585, 591, 615, 634, 646, 625, 622, 551, 578, 499, 488, 504, 503], 'all_points_y': [623, 630, 586, 575, 506, 492, 518, 511, 491, 481, 509, 503, 498, 552, 560, 552, 571, 670, 689, 636, 632, 623], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [503, 487, 476, 506, 492, 535, 542, 569, 566, 585, 591, 615, 634, 646, 625, 622, 551, 578, 499, 488, 504, 503], 'all_points_y': [623, 630, 586, 575, 506, 492, 518, 511, 491, 481, 509, 503, 498, 552, 560, 552, 571, 670, 689, 636, 632, 623], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [503, 487, 476, 506, 492, 535, 542, 569, 566, 585, 591, 615, 634, 646, 625, 622, 551, 578, 499, 488, 504, 503], 'all_points_y': [623, 630, 586, 575, 506, 492, 518, 511, 491, 481, 509, 503, 498, 552, 560, 552, 571, 670, 689, 636, 632, 623], 'categ

Creating JSONs for Detectron2 on train: 100%|██████████| 2249/2249 [00:00<00:00, 8658.63it/s]

3
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [927, 930, 961, 980, 961, 958, 951, 929, 916, 915, 920, 916, 908, 899, 905, 896, 891, 881, 927], 'all_points_y': [388, 399, 434, 453, 473, 466, 467, 484, 466, 460, 451, 444, 450, 440, 435, 428, 433, 427, 388], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_points_x': [927, 930, 961, 980, 961, 958, 951, 929, 916, 915, 920, 916, 908, 899, 905, 896, 891, 881, 927], 'all_points_y': [388, 399, 434, 453, 473, 466, 467, 484, 466, 460, 451, 444, 450, 440, 435, 428, 433, 427, 388], 'category': 0}, 'region_attributes': {}}, '1': {'shape_attributes': {'name': 'polygon', 'all_points_x': [927, 930, 961, 980, 961, 958, 951, 929, 916, 915, 920, 916, 908, 899, 905, 896, 891, 881, 927], 'all_points_y': [388, 399, 434, 453, 473, 466, 467, 484, 466, 460, 451, 444, 450, 440, 435, 428, 433, 427, 388], 'category': 0}, 'region_attributes': {}}}
{'0': {'shape_attributes': {'name': 'polygon', 'all_po




In [50]:
#Validation set
val_dict = {}

for file in tqdm(val, desc="Creating JSONs for Detectron2 on val", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
    file_path = os.path.join(nso_val, file)
    img_id = file.split(".tif")[0]
    geojson_path = os.path.join(nso_geojson, f"{img_id}.geojson")
    if os.path.exists(geojson_path): 
        with open(geojson_path) as f:
            gj = geojson.load(f)

        regions = {}
        num_buildings = len(gj["features"])
        if num_buildings > 0:
            gdal_image = gdal.Open(file_path)
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]

            for i in range(num_buildings):
                points = gj["features"][i]["geometry"]["coordinates"][0]
                if len(points) == 1:
                    points = points[0]

                all_points_x, all_points_y = [], []
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))

                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }

        dictionary = {"file_ref": '',
                      "size": os.path.getsize(file_path),
                      "filename": file.replace(".tif", ".png"),
                      "base64_img_data": '',
                      "file_attributes": {},
                      "regions": regions,
                      "origin_x": originX,
                      "origin_y": originY
                     }

        val_dict[file.replace(".tif", ".png")] = dictionary

with open("../NSO/val/nso.json", "w") as f:
    json.dump(val_dict, f)

Creating JSONs for Detectron2 on val: 100%|██████████| 563/563 [00:00<00:00, 9623.61it/s]


In [51]:
# Create single via_region_data training dataset => can be skipped if only one .json file.=>But then change file name
jsons = ["../NSO/train/nso.json"]

result = {}
for file in jsons:
    with open(file, "r") as f:
        loaded = json.load(f)
        
    #https://realpython.com/iterate-through-dictionary-python/
    for key, value in loaded.items():
        result[key] = value

with open("../NSO/train/via_region_data.json", "w") as file:
    json.dump(result, file)

In [52]:
# Create via_region JSON for entire validation dataset => can be skipped if only one .json file.=>But then change file name
jsons = ["../NSO/val/nso.json"]

result = {}
for file in jsons:
    with open(file, "r") as f:
        loaded = json.load(f)
    for key, value in loaded.items():
        result[key] = value

with open("../NSO/val/via_region_data.json", "w") as file:
    json.dump(result, file)

print("Done creating JSONs")


Done creating JSONs


In [None]:
# TODO: Do the same 2 cells when there is a test set