In [1]:
import os
import json
import geojson
import pandas as pd
from tqdm import tqdm
from osgeo import gdal
from sklearn.model_selection import train_test_split

# custom functions
import sys
sys.path.append('../')
from utils.functions import grab_certain_file

In [5]:
# Creat train val and test folder in NSO directory

root_path = 'NSO'
folders = ['train','test','val']
for folder in folders:
    os.mkdir(os.path.join(root_path,folder))

In [6]:
RANDOM_SEED = 560

nso_train = "NSO/nso_all"
nso_geojson = "NSO/geojsons_annotations_nso"

In [7]:
# Create JSONs for Detectron2
nso_images = grab_certain_file(".tif", nso_train)
train, val = train_test_split(nso_images, test_size=0.2, random_state=RANDOM_SEED)
train_dict = {}

In [8]:
# Training set

#Could probably save origin x and y to stich tiles back together at a later stage
for file in tqdm(train, desc="Creating JSONs for Detectron2 on NSO", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
    file_path = os.path.join(nso_train, file)
    img_id = file.split(".tif")[0]
    geojson_path = os.path.join(nso_geojson, f"{img_id}.geojson")
    #Not all tiles have annotations, thus:
    if os.path.exists(geojson_path):        
        with open(geojson_path) as f:
            gj = geojson.load(f)
        regions = {}
        num_buildings = len(gj["features"])
        
        if num_buildings > 0:
            gdal_image = gdal.Open(file_path)
            #https://www.gis.usu.edu/~chrisg/python/2009/lectures/ospy_slides4.pdf
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]

            for i in range(num_buildings):
                #https://stackoverflow.com/questions/23306653/python-accessing-nested-json-data
                points = gj["features"][i]["geometry"]["coordinates"][0]
                if len(points) == 1:
                    points = points[0]

                all_points_x, all_points_y = [], []
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))

                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }
        #Should probably save origin x and y here but we still have the og tiles and imgid
        dictionary = {"file_ref": '',
                      "size": os.path.getsize(file_path),
                      "filename": file.replace(".tif", ".png"),
                      "base64_img_data": '',
                      "file_attributes": {},
                      "regions": regions
                     }

        train_dict[file.replace(".tif", ".png")] = dictionary
    else:
        continue

with open("NSO/train/nso.json", "w") as f:
    json.dump(train_dict, f)

Creating JSONs for Detectron2 on NSO: 100%|██████████| 4592/4592 [00:00<00:00, 76836.28it/s]


In [9]:
#Validation set
val_dict = {}

for file in tqdm(val, desc="Creating JSONs for Detectron2 on 1_Rio_val", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
    file_path = os.path.join(nso_train, file)
    img_id = file.split(".tif")[0]
    geojson_path = os.path.join(nso_geojson, f"{img_id}.geojson")
    if os.path.exists(geojson_path): 
        with open(geojson_path) as f:
            gj = geojson.load(f)

        regions = {}
        num_buildings = len(gj["features"])
        if num_buildings > 0:
            gdal_image = gdal.Open(file_path)
            pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
            originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]

            for i in range(num_buildings):
                points = gj["features"][i]["geometry"]["coordinates"][0]
                if len(points) == 1:
                    points = points[0]

                all_points_x, all_points_y = [], []
                for j in range(len(points)):
                    all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                    all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))

                regions[str(i)] = {"shape_attributes":
                                       {"name": "polygon",
                                        "all_points_x": all_points_x,
                                        "all_points_y": all_points_y,
                                        "category": 0
                                       },
                                   "region_attributes": {}
                                  }

        dictionary = {"file_ref": '',
                      "size": os.path.getsize(file_path),
                      "filename": file.replace(".tif", ".png"),
                      "base64_img_data": '',
                      "file_attributes": {},
                      "regions": regions
                     }

        val_dict[file.replace(".tif", ".png")] = dictionary

with open("NSO/val/nso.json", "w") as f:
    json.dump(val_dict, f)

Creating JSONs for Detectron2 on 1_Rio_val: 100%|██████████| 1148/1148 [00:00<00:00, 8303.28it/s]


In [10]:
# Create JSON for entire training dataset
jsons = ["NSO/train/nso.json"]

result = {}
for file in jsons:
    with open(file, "r") as f:
        loaded = json.load(f)
        
    #https://realpython.com/iterate-through-dictionary-python/
    for key, value in loaded.items():
        result[key] = value

with open("NSO/train/via_region_data.json", "w") as file:
    json.dump(result, file)

In [11]:
# Create JSON for entire validation dataset
jsons = ["NSO/val/nso.json"]

result = {}
for file in jsons:
    with open(file, "r") as f:
        loaded = json.load(f)
    for key, value in loaded.items():
        result[key] = value

with open("NSO/val/via_region_data.json", "w") as file:
    json.dump(result, file)

print("Done creating JSONs")


Done creating JSONs
