In [27]:
import geopandas as gpd
import pandas as pd
import json
import os
import pygeos
import rasterio
import json
import pyproj
import numpy as np
from pathlib import Path

In [31]:
def split_geotiff (rasters_path, output_dir, width, height, pixel_size):
    """
    This function splits a given geotiff raster image into multiple tiles with a specified width and height.

    Parameters:
    rasters_path (str): File path to the input folder with the rasters.
    output_dir (str): File path to the output folder where the created tiles will be saved.
    width (int): Width of the tiles in pixels.
    height (int): Height of the tiles in pixels.
    pixel_size (float): Pixel size of the raster.

    Returns:
    None
    """
    # Get a list of all files in the directory
    files = os.listdir(rasters_path)

    # Filter for only the files that end with ".tif"
    tif_files = [file for file in files if file.endswith(".tif")]
    
    #raster id to be used in the output file name
    i = 0

    # Iterate through all the raster files in the input directory if ends with "tif"
    for tif_file in tif_files:
        i+=1   
        raster = os.path.join(rasters_path, tif_file)
        # Open the input raster file using rasterio.open() and read the raster image and metadata information
        with rasterio.open(raster) as src:
            out_image = src.read()
            out_meta = src.meta
            out_transform = src.transform
            
            # Calculate the heights and widths of the tiles based on the specified width and height parameters
            heights = np.arange(0, out_image.shape[1], height)
            widths = np.arange(0, out_image.shape[2], width)
            
            # Extract the origin x (min) and y(max) of the raster image
            min_x = out_transform[2]
            max_y = out_transform[5]

            # Iterate through all the tiles and create a new tile image
            for x in range(len(heights)):
                for y in range(len(widths)):
                    # Skip the last tile if it is smaller than the specified height or width
                    if (x + 1 == len(heights)) or (y + 1 == len(widths)):
                        continue
                    else:
                        # Extract the new tile from the input raster using slice operation
                        new_tile = out_image[:, heights[x]:heights[x+1], widths[y]:widths[y+1]]
                        
                        # Calculate the transform for the new tile using rasterio.transform.from_origin() function
                        transform_new = rasterio.transform.from_origin(min_x + widths[y]/2, max_y - heights[x]/2, pixel_size, pixel_size)
                        
                        # Update the metadata information for the new tile
                        out_meta.update({"driver": "GTiff",
                                        "height": new_tile.shape[1],
                                        "width": new_tile.shape[2],
                                        "transform": transform_new})
                        
                        # Save the new tile to the output directory using rasterio.open() and dest.write() functions
                        with rasterio.open(os.path.join(output_dir, "{}_{}_{}.tif".format(i, heights[x], widths[y])),
                                        "w", compress='lzw', **out_meta) as dest:
                            dest.write(new_tile)

In [24]:
def reproject(df_ds, current_crs, approximate_crs):
    """Reproject a geodataframe of a shapefile from one CRS to another.

    Args:
        df_ds (DataFrame): A geodataframe of a shapefile layer.
        current_crs (str): The EPSG code of the current CRS of the shapefile.
        approximate_crs (str): The EPSG code of the target CRS.

    Returns:
        DataFrame: A geodataframe of a shapefile layer with the new coordinate reference system.
    """

    # Extract the geometries from the geodataframe
    geometries = df_ds['geometry']
    
    # Extract the coordinates of the geometries
    coords = pygeos.get_coordinates(geometries)
    
    # Set up a coordinate transformer to transform from the current CRS to the target CRS
    transformer = pyproj.Transformer.from_crs(current_crs, approximate_crs, always_xy=True)
    
    # Transform the coordinates to the target CRS
    new_coords = transformer.transform(coords[:, 0], coords[:, 1])
    
    # Update the geometries in the original geodataframe with the transformed coordinates
    return pygeos.set_coordinates(geometries.copy(), np.array(new_coords).T)


In [34]:
def split_annotations_to_geojson(df, tiles_path, geojson_folder):
    """
    Split annotations data into the raster tiles and convert to the format of train-net.py: 
    Pygeos to GPD to geojson.

    Args:
    df (GeoDataFrame): Vector dataset containing features to split.
    tiles_path (str): Path to the directory where the raster tiles are stored
    geojson_folder (str): Path to the directory where the output geojson files will be saved
    
    Returns:
    None
    """
    # Create spatial index for faster query
    #spatial index are the bounding boxes of the asset geometries
    spatial_index = pygeos.STRtree(df.geometry)
    id_obj = 0
    
    # Get a list of all files in the directory
    files = os.listdir(tiles_path)

    # Filter for only the files that end with ".tif"
    tif_tiles = [file for file in files if file.endswith(".tif")]

    # Iterate through each raster tile
    for tile in tif_tiles:
        input_file = os.path.join(tiles_path,tile)
        with rasterio.open(input_file) as src:
            out_image = src.read()
            out_meta = src.meta
        # geom is the bounding box of the raster tile
        geom = pygeos.box(*src.bounds)
        
        # Query overlapping geometries from geom (tile) and the asset (from the spatial index)
        check_overlaps = spatial_index.query(geom, predicate='intersects')
        print (check_overlaps)

        # Create new geojson file for each overlapping geometry
        if len(check_overlaps) > 0:            
            get_matches = df.iloc[check_overlaps]
            get_exact_overlap = pygeos.intersection(get_matches['geometry'].values, geom)
            df_objs = pd.DataFrame()
            for polygon in get_exact_overlap:
                print (polygon)
                print (type(polygon))
                id_obj += 1
                #convert pygeos geometry to shapely geometry
                shapely_geom = pygeos.to_shapely(polygon)
                #geometries_objects = pygeos.to_wkt(object)
                df_row = pd.DataFrame()
                df_row['properties'] = [{"id":"{}".format(id_obj),"building":"yes"}]
                #convert shapely geometry to geopandas geometry
                df_row['geometry'] = gpd.GeoSeries([shapely_geom])
                print (df_row)
                df_objs = df_objs.append(df_row)
            df_objs = df_objs.append(df_row)
            gdf_obj = gpd.GeoDataFrame(df_objs, geometry='geometry', crs="epsg:28992")
            gdf_obj.set_geometry(col='geometry', inplace=True)
                
            img_id = tile.split(".tif")[0]
            gdf_obj.to_file(os.path.join(geojson_folder, "{}.geojson".format(img_id)), driver="GeoJSON")

In [None]:
### NEW Spliting a list of raster into several tiles of x width and y height ###

nso_folder = Path("../NSO/NSO_big_tiles")
nso_tif = grab_certain_file(".tif", nso_folder)

#raster_folder = Path("../gis/tiling/20220811_110732_SV1-04_SV_RD_8bit_RGB_50cm_Abcoude")
tile_output_folder = Path("../NSO/NSO_small_tiles")
width = 1000 #width of the new tiles
height = 1000 #height of the new tiles
nso_pix = 0.5 #pixel size of the tiles

#make output directory if does not exist
if not os.path.exists(tile_output_folder):
    os.makedirs(tile_output_folder)

#loop through the big tiles and split them into smaller tiles
for big_tiles in nso_tif:
    split_geotiff(big_tiles, tile_output_folder, width, height, nso_pix)

In [36]:
### OLD Spliting single raster into several tiles of x width and y height ###

# raster_folder = Path("../gis/tiling/20220811_110732_SV1-04_SV_RD_8bit_RGB_50cm_Abcoude")
# tile_output_folder = Path("../gis/tiling/20220811_110732_SV1-04_SV_RD_8bit_RGB_50cm_Abcoude/output_tiles")
# width = 1000 #width of the new tiles
# height = 1000 #height of the new tiles
# nso_pix = 0.5 #pixel size of the tiles

# #make output directory if does not exist
# if not os.path.exists(tile_output_folder):
#     os.makedirs(tile_output_folder)

# split_geotiff(raster_folder, tile_output_folder, width, height, nso_pix)

In [None]:
### Reproject annotation shapefile crs to the same crs as the raster tiles ###

annotations_crs = "epsg:4326"
raster_crs = "epsg:28992"
annotation_folder = tile_output_folder + "/annotations"

# make annotation directory if does not exist
if not os.path.exists(annotation_folder):
    os.makedirs(annotation_folder)

# !!!!!! Save GIS made annotations to annotation folder!!!!!! #
annotation_file = "substations_NL_annotations.shp"

# path to the annotation shapefile 
shp_path = os.path.join(annotation_folder, annotation_file)
df = pd.DataFrame(gpd.read_file(shp_path).copy())

# make df geometry column as a pygeos array
df.geometry = pygeos.from_shapely(df.geometry)

# reproject geometry column
df.geometry = reproject(df, annotations_crs, raster_crs)

In [None]:
### Spliting annotations shapefile into the raster tiles & save it to geojson ###

# Make folder for geojsons
json_folder = Path("../NSO/geojsons")
if not os.path.exists(json_folder):
    os.makedirs(json_folder)

# Split annotations data into the new raster tiles of width X and height Y
split_annotations_to_geojson(df, tile_output_folder, json_folder)

In [None]:
# Pyhtonic version to check if the geojsons are correct:
def split_annotations_to_geojson_pythonic (df, tiles_path, geojson_folder):
    """
    Split annotations data into the raster tiles and convert to the format of train-net.py: 
    Pygeos to GPD to geojson.

    Args:
    df (GeoDataFrame): Vector dataset containing features to split.
    tiles_path (str): Path to the directory where the raster tiles are stored
    geojson_folder (str): Path to the directory where the output geojson files will be saved
    
    Returns:
    None
    """
    # Create spatial index for faster query
    spatial_index = pygeos.STRtree(df.geometry)
    
    # Iterate through each raster tile
    for tile in os.listdir(tiles_path):
        if not tile.endswith('.tif'):
            continue
        
        input_file = os.path.join(tiles_path, tile)
        with rasterio.open(input_file) as src:
            geom = pygeos.box(*src.bounds)
            overlaps = spatial_index.query(geom, predicate='intersects')
        
        # Create new geojson file for each overlapping geometry
        if len(overlaps) > 0:
            matches = df.iloc[overlaps]
            exact_overlap = pygeos.intersection(matches.geometry.values, geom)
            
            objs = []
            for polygon in exact_overlap:
                shapely_geom = pygeos.to_shapely(polygon)
                objs.append({'properties': {'id': len(objs) + 1, 'building': 'yes'},
                             'geometry': shapely_geom})
            gdf = gpd.GeoDataFrame(objs, geometry='geometry', crs=src.crs)
            img_id = tile.split(".tif")[0]
            gdf.to_file(os.path.join(geojson_folder, f"{img_id}.geojson"), driver="GeoJSON")

In [None]:
#delete_me
# #Version 14-03-23: does not work for several object in the same tile

# def split_annotations_to_geojson_x(df, tiles_path, geojson_folder):
#     """
#     Split annotations data into the raster tiles and convert to the format of train-net.py: 
#     Pygeos to GPD to geojson.

#     Args:
#     df (GeoDataFrame): Vector dataset containing features to split.
#     tiles_path (str): Path to the directory where the raster tiles are stored
#     geojson_folder (str): Path to the directory where the output geojson files will be saved
    
#     Returns:
#     None
#     """
#     # Create spatial index for faster query
#     spatial_index = pygeos.STRtree(df.geometry)
#     id_obj = 0
    
#         # Get a list of all files in the directory
#     files = os.listdir(tiles_path)

#     # Filter for only the files that end with ".tif"
#     tif_tiles = [file for file in files if file.endswith(".tif")]

#     # Iterate through each raster tile
#     for tile in tif_tiles:
#         input_file = os.path.join(tiles_path,tile)
#         with rasterio.open(input_file) as src:
#             out_image = src.read()
#             out_meta = src.meta
#         geom = pygeos.box(*src.bounds)
        
#         # Query overlapping geometries from the spatial index
#         check_overlaps = spatial_index.query(geom, predicate='intersects')
        
#         # Create new geojson file for each overlapping geometry
#         if len(check_overlaps) > 0:
#             id_obj += 1
#             get_matches = df.iloc[check_overlaps]
#             get_exact_overlap = pygeos.intersection(get_matches['geometry'].values, geom)
            
#             geometries_objects = pygeos.to_wkb(get_exact_overlap)
#             df_obj = pd.DataFrame()
#             df_obj['properties'] = [{"id":"{}".format(id_obj),"building":"yes"}]
#             df_obj['geometry'] = gpd.GeoSeries.from_wkb(geometries_objects)
#             gdf_obj = gpd.GeoDataFrame(df_obj, geometry='geometry', crs="epsg:28992")
#             gdf_obj.set_geometry(col='geometry', inplace=True)
            
#             img_id = tile.split(".tif")[0]
#             gdf_obj.to_file(os.path.join(geojson_folder, "{}.geojson".format(img_id)), driver="GeoJSON")

In [None]:
### Delete me please ###

#adjust if more than one object:
###spliting annotations data into the raster tiles
###Adjustiong geojson to format of train-net.py: Pygeos to GPD to geojson

# path = "../gis/tiling/geojsons_annotations"

# spatial_index = pygeos.STRtree(df.geometry)
# id_obj = 0
# for tile in os.listdir(tiles_path):
#     input_file = os.path.join(tiles_path,tile)
#     with rasterio.open(input_file) as src:
#         out_image = src.read()#, out_transform = rasterio.mask.mask(src, shapes, crop=True)
#         out_meta = src.meta
#     geom = pygeos.box(*src.bounds)
#     check_overlaps = spatial_index.query(geom,predicate='intersects')
#     if len(check_overlaps) > 0:
#         id_obj += 1
#         get_matches = df.iloc[check_overlaps]
#         get_exact_overlap = pygeos.intersection(get_matches['geometry'].values,geom)
#         #old: get_exact_overlap = pygeos.intersection(row['geometry'].values,geom)
#     #for object_coord in get_exact_overlap:
#         # Pygeos coordinates to geoson coordinates to geopandas lead to backslash into the geosonfile so used to_wkb
#         geometries_objects = pygeos.to_wkb(get_exact_overlap)
#         df_obj = pd.DataFrame()
#         df_obj['properties'] = [{"id":"{}".format(id_obj),"building":"yes"}]
#         df_obj['geometry'] = gpd.GeoSeries.from_wkb(geometries_objects)
#         gdf_obj = gpd.GeoDataFrame(df_obj, geometry='geometry', crs="epsg:28992")
#         gdf_obj.set_geometry(col='geometry', inplace=True)
#         img_id = tile.split(".tif")[0]
#         gdf_obj.to_file(os.path.join(path,"{}.geojson".format(img_id)), driver="GeoJSON")

In [None]:
### Delete me please ###
# Old Version without function

###spliting annotations data into the raster tiles
###Adjustiong geojson to format of train-net.py: Pygeos to GPD to geojson

# path = "../gis/tiling/geojsons_annotations"

# spatial_index = pygeos.STRtree(df.geometry)
# id_obj = 0
# for tile in os.listdir(tiles_path):
#     input_file = os.path.join(tiles_path,tile)
#     with rasterio.open(input_file) as src:
#         out_image = src.read()#, out_transform = rasterio.mask.mask(src, shapes, crop=True)
#         out_meta = src.meta
#     geom = pygeos.box(*src.bounds)
#     check_overlaps = spatial_index.query(geom,predicate='intersects')
#     if len(check_overlaps) > 0:
#         id_obj += 1
#         get_matches = df.iloc[check_overlaps]
#         get_exact_overlap = pygeos.intersection(get_matches.geometry.values,geom)
#         geometries_objects = pygeos.to_wkb(get_exact_overlap)
#         df_obj = pd.DataFrame()
#         df_obj['properties'] = [{"id":"{}".format(id_obj),"building":"yes"}]
#         df_obj['geometry'] = gpd.GeoSeries.from_wkb(geometries_objects)
#         gdf_obj = gpd.GeoDataFrame(df_obj, geometry='geometry', crs="epsg:28992")
#         gdf_obj.set_geometry(col='geometry', inplace=True)
#         img_id = tile.split(".tif")[0]
#         gdf_obj.to_file(os.path.join(out_path,"{}.geojson".format(img_id)), driver="GeoJSON")