# Centroid finder

This notebooks finds the best coordinates for each HRDPS grid cell centroid by minimizing elevation difference between HRDPS cell native elevation and underlying CDEM pixels in a 800 meters radius around cell natural centroid.

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from geopandas.tools import sjoin
import rasterio

from rasterio.features import shapes
from shapely import wkt

### Step 0 : Define path to files and project CRS

In [2]:
# Path to the hrdps grid shapefile
hrdps_grid_path = 'C:/Users/PaulBillecocq/Documents/UdS/KRG_DATA/vecteurs/HRDPS/selected_pixels_32618.gpkg'
# Path to the 20 m resolution DEM to use
cdem_path = 'C:/Users/PaulBillecocq/Documents/UdS/KRG_DATA/a3d-prep/utm18n/dem/DEM-hrdps-grid-extent-5m-cleaned_32618.tif'

In [3]:
project_crs = 'EPSG:32618'

In [4]:
dem = rasterio.open(cdem_path)

### Step 1 : Compute DEM cells centroids

In [5]:
# 1. Polygonize CDEM raster
polygonized_pixels = []
with rasterio.open(cdem_path) as src:
    image = src.read(1) # first band
    polygonized_pixels = list(({'properties': {'elevation': v}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=None, transform=src.transform))))

polygonized_raster  = gpd.GeoDataFrame.from_features(polygonized_pixels)
# 2. Compute centroids and ditch the gemoetry column, we won't need it from here
polygonized_raster['centroid'] = polygonized_raster.apply(lambda x : x['geometry'].centroid, axis=1)
polygonized_raster.drop('geometry', inplace=True, axis=1)
polygonized_raster = polygonized_raster.set_geometry('centroid', crs='EPSG:32611')

In [6]:
# 1. Polygonize CDEM raster
polygonized_pixels = []
image = dem.read(1)
polygonized_pixels = list(({'properties': {'elevation': v}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=None, transform=dem.transform))))

polygonized_raster  = gpd.GeoDataFrame.from_features(polygonized_pixels)
# 2. Compute centroids and ditch the gemoetry column, we won't need it from here
polygonized_raster['centroid'] = polygonized_raster.apply(lambda x : x['geometry'].centroid, axis=1)
polygonized_raster.drop('geometry', inplace=True, axis=1)
polygonized_raster.rename(columns={'centroid': 'geometry'}, inplace=True)
polygonized_raster = polygonized_raster.set_geometry('geometry', crs=project_crs)

### Step 2 : For each HRDPS cell get underlying DEM pixels

In [8]:
# Read HRDPS grid and compute centroid
hrdps_grid = gpd.read_file(hrdps_grid_path)
# Add HRDPS centroid information
hrdps_grid['centroid'] = hrdps_grid.centroid

# Function to get elevation at a point
def get_elevation(point, dem):
    row, col = dem.index(point.x, point.y)
    elevation = dem.read(1)[row, col]
    return elevation

# Write the elevation of each HRDPS cell centroid in a new column
hrdps_grid['Elev'] = hrdps_grid['centroid'].apply(lambda point: get_elevation(point, dem))

In [9]:
# Join both spatial dataframes with spatial join. each CDEM pixel will be matched with corresponding HRDPS cell if it's in it.
cdem_hrdps = sjoin(polygonized_raster, hrdps_grid, how="inner")

### Step 3 : Compute distances between HRDPS centroid and underlying CDEM pixels

In [11]:
# Extraxt HRDPS centroids to set it as geometry and compute distance afterwards
hrdps_centroids = cdem_hrdps.loc[:, ['centroid']]
# Get a regular geometry attribute and set is as geometry column for Geopandas to not freak out
hrdps_centroids['geometry'] = hrdps_centroids['centroid']
hrdps_centroids.set_geometry('geometry', crs=project_crs)
# Compute distances
cdem_hrdps['distance'] = cdem_hrdps.geometry.distance(hrdps_centroids.geometry)
cdem_hrdps['elevation_difference'] = np.abs(cdem_hrdps['Elev'] - cdem_hrdps['elevation'])
# Sort geodataframe by station and distance
cdem_hrdps.sort_values(by=['name', 'distance'], ascending=True, inplace=True)

In [12]:
cdem_hrdps['diagonal'] = cdem_hrdps.apply(lambda x : np.sqrt(x['distance'] ** 2 + x['elevation']), axis=1)

In [13]:
cdem_hrdps.groupby('name')['diagonal'].min().reset_index()

Unnamed: 0,name,diagonal
0,KRG_HRDPS_1,32.078904
1,KRG_HRDPS_10,275.194403
2,KRG_HRDPS_11,16.162057
3,KRG_HRDPS_13,7.803948
4,KRG_HRDPS_14,268.200926
5,KRG_HRDPS_15,9.405788
6,KRG_HRDPS_2,13.711572
7,KRG_HRDPS_3,15.916276
8,KRG_HRDPS_5,1193.621944
9,KRG_HRDPS_6,13.913252


In [14]:
# Get the indexes of the minimum values for each group
idx = cdem_hrdps.groupby('name')['diagonal'].idxmin()

# Use loc to get the rows corresponding to these indices
cdem_hrdps.loc[idx].reset_index(drop=True)

Unnamed: 0,elevation,geometry,index_right,rlat,rlon,lat,lon,altitude,name,centroid,Elev,distance,elevation_difference,diagonal
0,0.265265,POINT (461550.706 6895338.641),0,13.1675,17.533775,62.188164,-75.73822,67.624069,KRG_HRDPS_1,POINT (461573.370 6895361.337),0.0,32.074769,0.265265,32.078904
1,0.069845,POINT (466587.719 6898311.162),7,13.212501,17.556282,62.215084,-75.647736,66.988014,KRG_HRDPS_10,POINT (466312.525 6898310.548),0.0,275.194276,0.069845,275.194403
2,258.652771,POINT (468382.634 6897012.819),8,13.212501,17.578773,62.203629,-75.607727,170.914093,KRG_HRDPS_11,POINT (468382.031 6897014.301),258.652771,1.599791,0.0,16.162057
3,57.849968,POINT (465575.202 6901732.733),9,13.235001,17.533775,62.245716,-75.662598,45.117683,KRG_HRDPS_13,POINT (465575.913 6901731.137),57.849968,1.746892,0.0,7.803948
4,0.268478,POINT (467636.031 6900165.937),10,13.235001,17.556282,62.234261,-75.622498,41.099804,KRG_HRDPS_14,POINT (467646.516 6900433.932),0.0,268.200426,0.268478,268.200926
5,83.191589,POINT (469717.314 6899136.048),11,13.235001,17.578773,62.222809,-75.582458,117.480148,KRG_HRDPS_15,POINT (469715.828 6899137.800),83.191589,2.29723,0.0,9.405788
6,180.209442,POINT (463642.217 6894062.263),1,13.1675,17.556282,62.17672,-75.698151,140.585709,KRG_HRDPS_2,POINT (463644.557 6894063.787),180.209442,2.792447,0.0,13.711572
7,251.040298,POINT (465713.273 6892766.361),2,13.1675,17.578773,62.165279,-75.658173,233.387558,KRG_HRDPS_3,POINT (465714.450 6892767.310),251.040298,1.512461,0.0,15.916276
8,0.041154,POINT (463672.899 6896568.650),3,13.190001,17.533775,62.207359,-75.713043,48.433662,KRG_HRDPS_5,POINT (462907.546 6897484.602),0.0,1193.621927,0.041154,1193.621944
9,190.297974,POINT (464976.897 6896187.933),4,13.190001,17.556282,62.195911,-75.672974,99.667091,KRG_HRDPS_6,POINT (464978.538 6896187.167),190.297974,1.811243,0.0,13.913252


In [15]:
centers_df = cdem_hrdps.loc[idx].reset_index(drop=True)

In [16]:
centers_df

Unnamed: 0,elevation,geometry,index_right,rlat,rlon,lat,lon,altitude,name,centroid,Elev,distance,elevation_difference,diagonal
0,0.265265,POINT (461550.706 6895338.641),0,13.1675,17.533775,62.188164,-75.73822,67.624069,KRG_HRDPS_1,POINT (461573.370 6895361.337),0.0,32.074769,0.265265,32.078904
1,0.069845,POINT (466587.719 6898311.162),7,13.212501,17.556282,62.215084,-75.647736,66.988014,KRG_HRDPS_10,POINT (466312.525 6898310.548),0.0,275.194276,0.069845,275.194403
2,258.652771,POINT (468382.634 6897012.819),8,13.212501,17.578773,62.203629,-75.607727,170.914093,KRG_HRDPS_11,POINT (468382.031 6897014.301),258.652771,1.599791,0.0,16.162057
3,57.849968,POINT (465575.202 6901732.733),9,13.235001,17.533775,62.245716,-75.662598,45.117683,KRG_HRDPS_13,POINT (465575.913 6901731.137),57.849968,1.746892,0.0,7.803948
4,0.268478,POINT (467636.031 6900165.937),10,13.235001,17.556282,62.234261,-75.622498,41.099804,KRG_HRDPS_14,POINT (467646.516 6900433.932),0.0,268.200426,0.268478,268.200926
5,83.191589,POINT (469717.314 6899136.048),11,13.235001,17.578773,62.222809,-75.582458,117.480148,KRG_HRDPS_15,POINT (469715.828 6899137.800),83.191589,2.29723,0.0,9.405788
6,180.209442,POINT (463642.217 6894062.263),1,13.1675,17.556282,62.17672,-75.698151,140.585709,KRG_HRDPS_2,POINT (463644.557 6894063.787),180.209442,2.792447,0.0,13.711572
7,251.040298,POINT (465713.273 6892766.361),2,13.1675,17.578773,62.165279,-75.658173,233.387558,KRG_HRDPS_3,POINT (465714.450 6892767.310),251.040298,1.512461,0.0,15.916276
8,0.041154,POINT (463672.899 6896568.650),3,13.190001,17.533775,62.207359,-75.713043,48.433662,KRG_HRDPS_5,POINT (462907.546 6897484.602),0.0,1193.621927,0.041154,1193.621944
9,190.297974,POINT (464976.897 6896187.933),4,13.190001,17.556282,62.195911,-75.672974,99.667091,KRG_HRDPS_6,POINT (464978.538 6896187.167),190.297974,1.811243,0.0,13.913252


### Step 4 : Get closest candidate in elevation within the 800m radius

In [27]:
def coordinates_finder(group):
    '''
    Finds HRDPS best cell center in the CDEM grid. Algorithmn minimizes elevation difference in a 500m radius around the HRDPS cell centoid.
    '''
    center_group = group[group['distance'] <= 500]
    min_elevation_difference = center_group[center_group['elevation_difference'] == center_group['elevation_difference'].min()]
    min_distance = min_elevation_difference[min_elevation_difference['distance'] == min_elevation_difference['distance'].min()]
    
    return min_distance

custom_sorter = cdem_hrdps.groupby(by='name', as_index=False).apply(lambda x : coordinates_finder(x))
# For some weird reason the groupby operator is multiindexing the result. We are dropping the first dimension here which is useless.
custom_sorter.index = custom_sorter.index.droplevel(level=0)
# The apply function is a pandas generic function, and is not able to have the crs informations transit properly throught the operation
# We are going to retrieve the information from the geopandas complete dataframe using the cleaned index we just made
centers_df = cdem_hrdps.loc[custom_sorter.index]

  custom_sorter = cdem_hrdps.groupby(by='name', as_index=False).apply(lambda x : coordinates_finder(x))


#### Visualize resulting DataFrame

In [31]:
centers_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 11 entries, 1648260 to 1663667
Data columns (total 14 columns):
 #   Column                Non-Null Count  Dtype   
---  ------                --------------  -----   
 0   elevation             11 non-null     float64 
 1   geometry              11 non-null     geometry
 2   index_right           11 non-null     int64   
 3   rlat                  11 non-null     float64 
 4   rlon                  11 non-null     float64 
 5   lat                   11 non-null     float64 
 6   lon                   11 non-null     float64 
 7   altitude              11 non-null     float64 
 8   name                  11 non-null     object  
 9   centroid              11 non-null     geometry
 10  Elev                  11 non-null     float32 
 11  distance              11 non-null     float64 
 12  elevation_difference  11 non-null     float64 
 13  diagonal              11 non-null     float64 
dtypes: float32(1), float64(9), geometry(2), int64(

#### Save as pickle for later

In [17]:
# Save centers_df in pickle format
centers_df.to_pickle('C:/Users/PaulBillecocq/Documents/UdS/KRG_DATA/a3d-prep/hrdps-subgridding-prep/hrdps-dem-centroid-correspondance_32618.pkl')

In [40]:
centers_df

Unnamed: 0,elevation,geometry,index_right,rlat,rlon,lat,lon,altitude,name,centroid,Elev,distance,elevation_difference,diagonal
0,0.077642,POINT (344449.647 6897411.321),0,13.1675,17.533775,62.188164,-75.73822,67.624069,KRG_HRDPS_1,POINT (344465.222 6897443.735),0.0,35.960907,0.077642,35.961986
1,0.240415,POINT (349410.080 6900526.196),7,13.212501,17.556282,62.215084,-75.647736,66.988014,KRG_HRDPS_10,POINT (349136.238 6900502.834),0.0,274.836425,0.240415,274.836862
2,257.91687,POINT (351235.240 6899256.247),8,13.212501,17.578773,62.203629,-75.607727,170.914093,KRG_HRDPS_11,POINT (351235.864 6899254.471),257.91687,1.881971,0.0,16.169684
3,57.849968,POINT (348319.985 6903906.059),9,13.235001,17.533775,62.245716,-75.662598,45.117683,KRG_HRDPS_13,POINT (348320.341 6903906.496),57.849968,0.564009,0.0,7.6268
4,0.268478,POINT (350420.168 6902391.120),10,13.235001,17.556282,62.234261,-75.622498,41.099804,KRG_HRDPS_14,POINT (350421.091 6902657.216),0.0,266.097753,0.268478,266.098257
5,81.4608,POINT (352520.352 6901411.160),11,13.235001,17.578773,62.222809,-75.582458,117.480148,KRG_HRDPS_15,POINT (352520.530 6901408.970),81.4608,2.197043,0.0,9.289123
6,180.705658,POINT (346564.832 6896196.371),1,13.1675,17.556282,62.17672,-75.698151,140.585709,KRG_HRDPS_2,POINT (346566.531 6896194.107),180.705658,2.830669,0.0,13.73748
7,251.040298,POINT (348665.015 6894946.421),2,13.1675,17.578773,62.165279,-75.658173,233.387558,KRG_HRDPS_3,POINT (348666.529 6894945.513),251.040298,1.765279,0.0,15.942287
8,0.041154,POINT (346539.829 6898696.270),3,13.190001,17.533775,62.207359,-75.713043,48.433662,KRG_HRDPS_5,POINT (345750.262 6899597.976),0.0,1198.537122,0.041154,1198.537139
9,190.880417,POINT (347849.944 6898346.284),4,13.190001,17.556282,62.195911,-75.672974,99.667091,KRG_HRDPS_6,POINT (347851.385 6898348.464),190.880417,2.613945,0.0,14.06105
