# Centroid finder

This notebooks finds the best coordinates for each HRDPS grid cell centroid by minimizing elevation difference between HRDPS cell native elevation and underlying CDEM pixels in a 800 meters radius around cell natural centroid.

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from geopandas.tools import sjoin
import rasterio
from rasterio.features import shapes
from shapely import wkt

In [2]:
# Path to the hrdps grid shapefile
hrdps_grid_path = '../ancillary_data/hrdps_grid/hrdps_grid.shp'
# Path to the 20 m resolution DEM to use
cdem_path = ' '

### Step 1 : Compute CDEM cells centoids

In [3]:
# 1. Polygonize CDEM raster
polygonized_pixels = []
with rasterio.open(cdem_path) as src:
    image = src.read(1) # first band
    polygonized_pixels = list(({'properties': {'elevation': v}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=None, transform=src.transform))))

polygonized_raster  = gpd.GeoDataFrame.from_features(polygonized_pixels)
# 2. Compute centroids and ditch the gemoetry column, we won't need it from here
polygonized_raster['centroid'] = polygonized_raster.apply(lambda x : x['geometry'].centroid, axis=1)
polygonized_raster.drop('geometry', inplace=True, axis=1)
polygonized_raster = polygonized_raster.set_geometry('centroid', crs='EPSG:32611')

  arr = construct_1d_object_array_from_listlike(values)


### Step 2 : For each HRDPS cell get underlying CDEM pixels

In [None]:
# Read HRDPS grid and compute centroid
hrdps_grid = gpd.read_file(hrdps_grid_path)
# Join both spatial dataframes with spatial join. each CDEM pixel will be matched with corresponding HRDPS cell if it's in it.
cdem_hrdps = sjoin(polygonized_raster, hrdps_grid, how="inner")
# Add HRDPS centroid information
hrdps_grid['centroid'] = hrdps_grid.centroid
cdem_hrdps['hrdps_centroid'] = cdem_hrdps.apply(lambda x : str(hrdps_grid[hrdps_grid['Station'] == x['Station']]['centroid'].values[0]), axis=1)

### Step 3 : Compute distances between HRDPS centroid and underlying CDEM pixels

In [None]:
# Extraxt HRDPS centroids to set it as geometry and compute distance afterwards
hrdps_centroids = cdem_hrdps.loc[:, ['hrdps_centroid']]
# Get a regular geometry attribute
hrdps_centroids['hrdps_centroid'] = cdem_hrdps['hrdps_centroid'].apply(wkt.loads)
hrdps_centroids['geometry'] = hrdps_centroids['hrdps_centroid']
# transform in geodataframe to compute distance
hrdps_centroids = gpd.GeoDataFrame(hrdps_centroids, crs="EPSG:32611", geometry=hrdps_centroids.geometry)
# Compute distances
cdem_hrdps['distance'] = cdem_hrdps.geometry.distance(hrdps_centroids.geometry)
cdem_hrdps['elevation_difference'] = np.abs(cdem_hrdps['Elev'] - cdem_hrdps['elevation'])
# Sort geodataframe by station and distance
cdem_hrdps.sort_values(by=['Station', 'distance'], ascending=True, inplace=True)

### Step 4 : Get closest candidate in elevation within the 800m radius

In [7]:
def coordinates_finder(group):
    '''
    Finds HRDPS best cell center in the CDEM grid. Algorithmn minimizes elevation difference in a 800m radius around the HRDPS cell centoid.
    '''
    center_group = group[group['distance'] <= 800]
    min_elevation_difference = center_group[center_group['elevation_difference'] == center_group['elevation_difference'].min()]
    min_distance = min_elevation_difference[min_elevation_difference['distance'] == min_elevation_difference['distance'].min()]
    
    return min_distance

custom_sorter = cdem_hrdps.groupby(by='Station', as_index=False).apply(lambda x : coordinates_finder(x))
# For some weird reason the groupby operator is multiindexing the result. We are dropping the first dimension here which is useless.
custom_sorter.index = custom_sorter.index.droplevel(level=0)
# The apply function is a pandas generic function, and is not able to have the crs informations transit properly throught the operation
# We are going to retrieve the information from the geopandas complete dataframe using the cleaned index we just made
centers_df = cdem_hrdps.loc[custom_sorter.index]

In [9]:
# Save centers_df in pickle format
centers_df.to_pickle('../ancillary_data/topographic_data/hrdps_cdem_correspondance_custom_sorter_min_100x100.pkl')

In [8]:
centers_df

Unnamed: 0,elevation,centroid,index_right,VALUE,Elev,Station,hrdps_centroid,distance,elevation_difference
49346,2392.0,POINT (470584.914 5672419.565),0,1818.860962,1933,79761,POINT (471229.43653794087 5672850.9725815775),775.578115,459.0
50427,1664.0,POINT (453300.791 5671918.749),1,1605.248169,1665,80305,POINT (453696.1373626217 5672277.190125488),533.646408,1.0
50713,1856.0,POINT (456697.671 5671818.586),2,1864.007812,1857,80306,POINT (456200.9073078195 5672359.287767434),734.256213,1.0
47241,2140.0,POINT (458595.927 5673220.869),3,2069.360107,2043,80307,POINT (458705.67231314705 5672441.342460476),787.214214,97.0
49747,2294.0,POINT (460494.183 5672219.239),4,2080.091553,2067,80308,POINT (461210.4327585001 5672523.354230062),778.138387,227.0
...,...,...,...,...,...,...,...,...,...
4994,1694.0,POINT (466388.769 5690348.757),65,1578.116455,1629,84168,POINT (465643.9799806197 5690193.546962956),760.789485,65.0
4274,1352.0,POINT (467787.484 5690649.246),66,1286.756714,1351,84169,POINT (468149.06581140205 5690275.406615107),520.093413,1.0
3570,1154.0,POINT (471184.364 5690949.735),67,1451.230591,1465,84170,POINT (470654.1484240883 5690357.226123863),795.107162,311.0
6072,1514.0,POINT (450203.636 5689847.941),68,1524.837158,1515,84713,POINT (450613.37612949684 5689701.544619505),435.107930,1.0
