In [1]:
import os
import json
import geojson
import pandas as pd
import numpy as np
import random
from tqdm import tqdm
from osgeo import gdal
import sys
sys.path.append('/scistor/ivm/ako268/nso_repo/detectron2-nso-test/1_data_preperation')
from utils.functions import grab_certain_file

root = "/scistor/ivm/project/NSO/timeseries_50_baseline"
big_tiles_folder = root + "/big_tiles"
small_tiles_folder = root + "/small_tiles"
annotation_path = root + "/annotations_shp/fixed_substations_NL_annotations.shp"
geojson_folder = root + "/geojsons"
json_folder = root + "/jsons"

print('done')

done


In [3]:
def split_data(data, train_ratio=0.6, val_ratio=0.2, test_ratio=0.2, seed=13):
    assert(train_ratio + val_ratio + test_ratio == 1.0)
    random.seed(seed)
    random.shuffle(data)

    # Calculate split sizes
    train_size = int(len(data) * train_ratio)
    val_size = int(len(data) * val_ratio)

    # Split the data
    train_data = data[:train_size]
    val_data = data[train_size:(train_size + val_size)]
    test_data = data[(train_size + val_size):]

    return train_data, val_data, test_data

In [6]:
# This function needs the filestructure to be as follows.
# Top level
# - root
# Sub level: The root folder should contain
# - "small_tiles" folder containing the tiled .tif files
# - "geojsons" folder containing the geojsons annotations
# - "jsons" folder being empty. This is where the created json will be stored

def create_json(set_list, set_type, root=root):
    assert set_type in ['train', 'val', 'test'], "Invalid set_type. Expected 'train', 'val', or 'test'"
    
    small_tiles_folder=root+"/small_tiles"
    geojsons_folder=root+"/geojsons"
    jsons_folder=root+"/jsons"

    print('start processing', len(set_list), 'small tif files for', set_type, 'set')
    set_dict = {}
    for file in tqdm(set_list, desc=f"Creating JSONs for Detectron2 on {set_type}", ncols=150, bar_format="{l_bar}{bar:10}{r_bar}"):
        file_path = os.path.join(small_tiles_folder, file)
        img_id = file.split(".tif")[0]
        geojson_path = os.path.join(geojsons_folder, f"{img_id}.geojson")

        if os.path.exists(geojson_path):
            with open(geojson_path) as f:
                gj = geojson.load(f)

            regions = {}
            num_buildings = len(gj["features"])
            if num_buildings > 0:
                gdal_image = gdal.Open(file_path)
                pixel_width, pixel_height = gdal_image.GetGeoTransform()[1], gdal_image.GetGeoTransform()[5]
                originX, originY = gdal_image.GetGeoTransform()[0], gdal_image.GetGeoTransform()[3]

                for i in range(num_buildings):
                    points = gj["features"][i]["geometry"]["coordinates"][0]

                    if len(points) == 1:
                        points = points[0]

                    all_points_x, all_points_y = [], []
                    for j in range(len(points)):
                        all_points_x.append(int(round((points[j][0] - originX) / pixel_width)))
                        all_points_y.append(int(round((points[j][1] - originY) / pixel_height)))

                    regions[str(i)] = {"shape_attributes":
                                           {"name": "polygon",
                                            "all_points_x": all_points_x,
                                            "all_points_y": all_points_y,
                                            "category": 0
                                           },
                                       "region_attributes": {}
                                      }

            dictionary = {"file_ref": '',
                          "size": os.path.getsize(file_path),
                          "filename": file.replace(".tif", ".png"),
                          "base64_img_data": '',
                          "file_attributes": {},
                          "regions": regions,
                          "origin_x": originX,
                          "origin_y": originY
                         }
            set_dict[file.replace(".tif", ".png")] = dictionary

    print('found a total of', len(set_dict), 'substation annotations for', set_type, 'set')
    
    with open(f"{jsons_folder}/via_region_{set_type}.json", "w") as f:
        json.dump(set_dict, f)

In [5]:
import torch

if torch.cuda.is_available():
    device = torch.cuda.get_device_name(0)
    device_count = torch.cuda.device_count()
    compute_capability = torch.cuda.get_device_capability(0)
    total_memory = torch.cuda.get_device_properties(0).total_memory

    print(f"Device: {device}")
    print(f"Number of GPUs: {device_count}")
    print(f"Compute Capability: {compute_capability}")
    print(f"Total Memory: {total_memory} bytes")
else:
    print("No GPU available.")


No GPU available.


'''
Read NSO tiles and annotations geojsons, convert lat/lon of tile to pixel coordinates and save pixel coordinates into
.json file If more than on .json file can be saved as one via_regions.json.
Source:  https://github.com/rl02898/detectron2-spacenet. JDP Edits:some and saving json with origins of tile in jsons to allow stiching back
of the tiles.
'''

In [7]:
from sklearn.model_selection import train_test_split

df = pd.read_csv('/scistor/ivm/ako268/nso_repo/detectron2-nso-test/1_data_preperation/prepare_imagery/datafiles/cleanbaseline_input.csv')
len(list(set(df['coordinate_str'])))

# Assuming df is your DataFrame
# Split the coordinates into 60% train, 20% val, 20% test
df_train, df_temp = train_test_split(df, test_size=0.4, random_state=13)
df_val, df_test = train_test_split(df_temp, test_size=0.5, random_state=13)

print(sum(df_train['wms_name'].isin(df_val['wms_name'])))
print(sum(df_train['wms_name'].isin(df_test['wms_name'])))

3
3


In [8]:
big_tiles_folder = root + "/big_tiles"
# small_tiles_folder = root + "small_tiles"
# small_tiles_png = root + "small_tiles_png"

files = os.listdir(big_tiles_folder)
tile_names = [file.strip('.tif') for file in files if file.endswith(".tif")]
# create 60% train, 20% validate, 20% test
train, val, test = split_data(tile_names)
print(len(train))
print(len(val))
print(len(test))

22
7
8


In [9]:
# note: these numbers are based on the input. In practice, some subs may fall off because they are on the edge. Also, there may be subs on the tile that we did not select

train_wms = [name.split('_SV_RD_8bit_RGB_50cm_')[0] for name in train]
print('number of subs in train', sum(df['wms_name'].isin(train_wms)))

val_wms = [name.split('_SV_RD_8bit_RGB_50cm_')[0] for name in val]
print('number of subs in val', sum(df['wms_name'].isin(val_wms)))

test_wms = [name.split('_SV_RD_8bit_RGB_50cm_')[0] for name in test]
print('number of subs in test', sum(df['wms_name'].isin(test_wms)))

number of subs in train 26
number of subs in val 11
number of subs in test 10


In [10]:
all_files = grab_certain_file(".tif", small_tiles_folder)

train_set = [file for file in all_files for name in train if name in file]
val_set = [file for file in all_files for name in val if name in file]
test_set = [file for file in all_files for name in test if name in file]

print(len(train_set)/(len(val_set)+len(test_set)+len(train_set)))

0.5958117354343769


In [13]:
create_json(train_set, 'train')
create_json(val_set, 'val')
create_json(test_set, 'test')

small tiles folder exists True
geojsons folder exists True
jsons folder exists True
start processing 14368 small tif files for train set


Creating JSONs for Detectron2 on train: 100%|██████████| 14368/14368 [00:00<00:00, 79384.25it/s]


found a total of 62 substation annotations for train set
small tiles folder exists True
geojsons folder exists True
jsons folder exists True
start processing 4513 small tif files for val set


Creating JSONs for Detectron2 on val: 100%|██████████| 4513/4513 [00:00<00:00, 9656.18it/s]


found a total of 21 substation annotations for val set
small tiles folder exists True
geojsons folder exists True
jsons folder exists True
start processing 5234 small tif files for test set


Creating JSONs for Detectron2 on test: 100%|██████████| 5234/5234 [00:00<00:00, 10768.86it/s]

found a total of 17 substation annotations for test set





In [14]:
jsons_folder=root+"/jsons"

import json

# Specify the path to your JSON file
json_file_path = jsons_folder + "/via_region_test.json"

# Open the JSON file and read its contents
with open(json_file_path, 'r') as json_file:
    json_data = json.load(json_file)

# Now you can access the data from the JSON file
# For example, let's say your JSON file contains a list of objects

In [28]:
key = list(json_data.keys())[0]
json_data[key].keys()
json_data[key] # change to save sub info!


{'file_ref': '',
 'size': 2791197,
 'filename': '20220303_101829_SV1-01_SV_RD_8bit_RGB_50cm_Helmond_20000_23000.png',
 'base64_img_data': '',
 'file_attributes': {},
 'regions': {'0': {'shape_attributes': {'name': 'polygon',
    'all_points_x': [990,
     953,
     927,
     943,
     959,
     953,
     919,
     901,
     933,
     922,
     888,
     872,
     901,
     895,
     914,
     901,
     831,
     946,
     999,
     990],
    'all_points_y': [383,
     366,
     432,
     438,
     445,
     471,
     457,
     498,
     513,
     535,
     524,
     563,
     582,
     602,
     610,
     642,
     607,
     333,
     358,
     383],
    'category': 0},
   'region_attributes': {}}},
 'origin_x': 174196.0,
 'origin_y': 391562.0}