In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import hydrosensordesign as hsd
from sensor_network_utils import *
from glofas_processing_utils import *
# need to install cfgrib, rioxarray 

In [2]:
ROOT = Path("C:/Users/bakka/hydro-project/data")
GLOFAS_FILES = sorted(glob.glob(str(ROOT / "Bangladesh/data_bangladesh_20*.grib")))  # 2020-2023
BND_SHP = ROOT / "gadm41_BGD_shp/gadm41_BGD_0.shp"
GAUGES_CSV = ROOT / "bwdb_gauges.csv"

In [28]:
'''
From sensor_network_Bangladesh.ipynb
'''
bangladesh_poly = load_boundary_shapefile(BND_SHP)

bwdb = (pd.read_csv(GAUGES_CSV)
          .dropna(subset=["Latitude", "Longitude"])
          .rename(columns={"Latitude": "gauge_lat", "Longitude": "gauge_lon"}))
### bwdb has a column named "Station ID" instead of "StationID" ###
bwdb["gauge_id"] = (
    bwdb.get("StationID")
    if "StationID" in bwdb
    else np.arange(len(bwdb))
)

gauge_gdf = prepare_gauge_geodataframe(bwdb)

extent = [87, 93, 20, 27.5]

glofas = load_glofas_data(GLOFAS_FILES)
glofas_clipped = clip_to_region(glofas, bangladesh_poly, extent)
dis24_da, matrix, valid_lat_lon = prepare_matrix(glofas_clipped)

lat_vals = glofas_clipped.latitude.values
lon_vals = glofas_clipped.longitude.values

gauges, sensor_cols, sensor_indices_orig = align_gauges_to_grid(gauge_gdf, lat_vals, lon_vals, valid_lat_lon)

matched_gauge_ids = sensor_cols['gauge_id'].values
gauge_gdf_matched = gauge_gdf[gauge_gdf['gauge_id'].isin(matched_gauge_ids)].copy()

X_train, X_test, mapping_dict = train_test_split_and_filter(matrix)

points_gdf = create_points_geodataframe(valid_lat_lon, mapping_dict)
points_with_basin = assign_basins(points_gdf, country_name =  "Bangladesh")

gauge_counts = count_gauges_per_basin(gauge_gdf_matched, country_name = "Bangladesh", total_gauges = len(sensor_indices_orig))

Matrix shape: (1461, 4931)
Valid columns: 4,931
168 gauges matched to grid cells
Columns before filter: 4,931
Columns after filter: 4,931


In [4]:
### selected_sensors is reordered but selected_indices is in original order ###
selected_sensors, selected_indices = qr_pivot_selection(X_train, points_with_basin, gauge_counts)
selected_sensors[:10]


Selected 168 optimal sensor locations


Unnamed: 0,RHI_CD,RHI_NM,matrix_col,lat,lon
0,0,Bangladesh,1,26.525,88.375
1,0,Bangladesh,10,26.425,88.975
2,0,Bangladesh,11,26.375,88.525
3,0,Bangladesh,26,26.325,88.925
4,0,Bangladesh,56,26.225,89.675
5,0,Bangladesh,74,26.175,89.125
6,0,Bangladesh,94,26.125,89.125
7,0,Bangladesh,97,26.125,89.725
8,0,Bangladesh,119,26.075,89.675
9,0,Bangladesh,132,26.025,88.725


In [None]:
valid_lat_lon

array(['(26.5750, 88.3750)', '(26.5250, 88.3750)', '(26.5250, 88.4250)',
       ..., '(20.8750, 92.2750)', '(20.8250, 92.3250)',
       '(20.7750, 92.3250)'], shape=(4931,), dtype='<U18')

In [29]:
print(points_gdf.shape)
points_gdf.head()

(4931, 5)


Unnamed: 0,matrix_col,lat,lon,geometry,col_pos
0,0,26.575,88.375,POINT (88.375 26.575),0
1,1,26.525,88.375,POINT (88.375 26.525),1
2,2,26.525,88.425,POINT (88.425 26.525),2
3,3,26.525,88.475,POINT (88.475 26.525),3
4,4,26.475,88.425,POINT (88.425 26.475),4


In [26]:
print(sensor_cols.shape)
sensor_cols.head()

(168, 4)


Unnamed: 0,gauge_id,lat_c,lon_c,matrix_col
0,0,21.475,92.475,4884
1,1,22.175,92.225,4507
2,2,21.775,92.175,4788
3,3,21.675,92.525,4830
4,4,24.825,89.375,1141


In [None]:
points_gdf['idx'] = points_gdf['matrix_col'].astype(int)
gauge_gdf_matched['idx'] = sensor_cols['matrix_col'].astype(int)
print(gauge_gdf_matched.shape)
gauge_gdf_matched[:5]

(168, 13)


Unnamed: 0,SL.,Station ID,Station,River,District,Upazila,gauge_lat,gauge_lon,First Date,Last Date,gauge_id,geometry,idx
0,1,SW203.5,Poamuhuri,Matamuhuri,Bandarban,Alikadam,21.4581,92.4545,1-Mar-19,17-Apr-25,0,POINT (92.4545 21.4581),4884.0
1,2,SW247,Bandarban,Sangu,Bandarban,Bandarban Sadar,22.1966,92.2166,7-Apr-94,21-Apr-25,1,POINT (92.2166 22.1966),4507.0
2,3,SW203,Lama,Matamuhuri,Bandarban,Lama,21.7861,92.1908,24-Aug-81,22-Apr-25,2,POINT (92.1908 21.7861),4788.0
3,4,SW247.4,Remakri,Sangu,Bandarban,Thanchi,21.6761,92.5173,5-Mar-19,14-Apr-25,3,POINT (92.5173 21.6761),4830.0
4,5,SW65,Bogra,Deonai_Charalkata_Jamuneswari_Karatoa,Bogura,Bogura Sadar,24.8459,89.3792,23-Jun-94,20-Feb-25,4,POINT (89.3792 24.8459),1141.0


In [None]:
'''
Tutorial
'''
bangladesh_shp = gpd.read_file(BND_SHP).to_crs("EPSG:4326")

selected_global = hsd.select_sensors(X_train=X_train,
                                     boundaries=bangladesh_shp,basin_field='COUNTRY',
                                     flowlines=points_gdf, flow_id_field='matrix_col',
                                     existing_sensors=gauge_gdf_matched,
                                     expansion=False)


Selected 168 optimal sensor locations


In [40]:
selected_global[:5]

Unnamed: 0,COUNTRY,matrix_col,idx,lat,lon
0,Bangladesh,3344,3344,23.125,90.575
1,Bangladesh,535,535,25.325,89.625
2,Bangladesh,1453,1453,24.625,88.075
3,Bangladesh,3102,3102,23.325,90.625
4,Bangladesh,349,349,25.725,89.825


In [59]:
np.sum(selected_global['idx']==selected_indices)

np.int64(168)

In [66]:
set([1,5,7]) & set([5,6,7])

{5, 7}

In [58]:
len(set(selected_global['matrix_col']) & set(selected_sensors['matrix_col']))

168

In [74]:
selected_sensors['matrix_col'][:12]

0       1
1      10
2      11
3      26
4      56
5      74
6      94
7      97
8     119
9     132
10    145
11    178
Name: matrix_col, dtype: int64

In [71]:
sorted(selected_global['matrix_col'])[:12]

[1, 10, 11, 26, 56, 74, 94, 97, 119, 132, 145, 178]