In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import hydrosensordesign as hsd
from sensor_network_utils import *
from glofas_processing_utils import *
# need to install cfgrib, rioxarray 

In [2]:
ROOT = Path("C:/Users/bakka/hydro-project/data")
GLOFAS_FILES = sorted(glob.glob(str(ROOT / "Bangladesh/data_bangladesh_20*.grib")))  # 2020-2023
BND_SHP = ROOT / "gadm41_BGD_shp/gadm41_BGD_0.shp"
GAUGES_CSV = ROOT / "bwdb_gauges.csv"

In [3]:
'''
From sensor_network_Bangladesh.ipynb
'''
bangladesh_poly = load_boundary_shapefile(BND_SHP)

bwdb = (pd.read_csv(GAUGES_CSV)
          .dropna(subset=["Latitude", "Longitude"])
          .rename(columns={"Latitude": "gauge_lat", "Longitude": "gauge_lon"}))
### bwdb has a column named "Station ID" instead of "StationID" ###
bwdb["gauge_id"] = (
    bwdb.get("StationID")
    if "StationID" in bwdb
    else np.arange(len(bwdb))
)

gauge_gdf = prepare_gauge_geodataframe(bwdb)

extent = [87, 93, 20, 27.5]

glofas = load_glofas_data(GLOFAS_FILES)
glofas_clipped = clip_to_region(glofas, bangladesh_poly, extent)
dis24_da, matrix, valid_lat_lon = prepare_matrix(glofas_clipped)

lat_vals = glofas_clipped.latitude.values
lon_vals = glofas_clipped.longitude.values

gauges, sensor_cols, sensor_indices_orig = align_gauges_to_grid(gauge_gdf, lat_vals, lon_vals, valid_lat_lon)

matched_gauge_ids = sensor_cols['gauge_id'].values
gauge_gdf_matched = gauge_gdf[gauge_gdf['gauge_id'].isin(matched_gauge_ids)].copy()

X_train, X_test, mapping_dict = train_test_split_and_filter(matrix)

points_gdf = create_points_geodataframe(valid_lat_lon, mapping_dict)
points_with_basin = assign_basins(points_gdf, country_name =  "Bangladesh")

gauge_counts = count_gauges_per_basin(gauge_gdf_matched, country_name = "Bangladesh", total_gauges = len(sensor_indices_orig))

### selected_sensors is reordered but selected_indices is in original order ###
selected_sensors, selected_indices = qr_pivot_selection(X_train, points_with_basin, gauge_counts)
selected_sensors[:10]

Matrix shape: (1461, 4931)
Valid columns: 4,931
168 gauges matched to grid cells
Columns before filter: 4,931
Columns after filter: 4,931

Selected 168 optimal sensor locations


Unnamed: 0,RHI_CD,RHI_NM,matrix_col,lat,lon
0,0,Bangladesh,1,26.525,88.375
1,0,Bangladesh,10,26.425,88.975
2,0,Bangladesh,11,26.375,88.525
3,0,Bangladesh,26,26.325,88.925
4,0,Bangladesh,56,26.225,89.675
5,0,Bangladesh,74,26.175,89.125
6,0,Bangladesh,94,26.125,89.125
7,0,Bangladesh,97,26.125,89.725
8,0,Bangladesh,119,26.075,89.675
9,0,Bangladesh,132,26.025,88.725


In [4]:
'''
Tutorial
'''
selected_global = hsd.select_sensors(X=X_train, coords=valid_lat_lon,r=gauge_counts['Bangladesh'])
selected_global['coords'][:10]

[np.str_('(23.1250, 90.5750)'),
 np.str_('(25.3250, 89.6250)'),
 np.str_('(24.6250, 88.0750)'),
 np.str_('(23.3250, 90.6250)'),
 np.str_('(25.7250, 89.8250)'),
 np.str_('(23.8750, 89.6750)'),
 np.str_('(23.7750, 89.7250)'),
 np.str_('(26.3250, 88.9250)'),
 np.str_('(25.9750, 89.8250)'),
 np.str_('(24.5750, 91.7250)')]

In [5]:
selected_indices[:10]

[np.int64(3344),
 np.int64(535),
 np.int64(1453),
 np.int64(3102),
 np.int64(349),
 np.int64(2468),
 np.int64(2575),
 np.int64(26),
 np.int64(183),
 np.int64(1609)]

In [6]:
selected_global['indices'][:10]

array([3344,  535, 1453, 3102,  349, 2468, 2575,   26,  183, 1609],
      dtype=int32)

In [7]:
count = sum(x != y for x, y in zip(selected_global['indices'], selected_indices))
count

np.int64(0)