In [1]:
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
from shapely.geometry import Point
import re
import requests
from pathlib import Path
import numpy as np
import pandas as pd
import xarray as xr
from datetime import datetime, timedelta
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import unicodedata

In [22]:
PATH_HDMS = Path('Z:/nahaUsers/casadje/datasets/hDMS')

hdms = gpd.read_file(PATH_HDMS / 'reservoirs' / 'GIS' / 'reservoirs_HDMS_20250305.shp').set_index('EFAS_ID')

In [29]:
hdms.shape

(547, 28)

In [24]:
PATH_GDW = Path('Z:/nahaUsers/casadje/datasets/reservoirs/GDW')
gdw = gpd.read_file(PATH_GDW / 'GDW_v1_0_shp' / 'GDW_barriers_v1_0.shp').set_index('GDW_ID')

In [25]:
extent = [
    hdms.geometry.x.min() - 1,
    hdms.geometry.x.max() + 1,
    hdms.geometry.y.min() - 1,
    hdms.geometry.y.max() + 1,
]

In [26]:
mask_lon = (extent[0] <= gdw.geometry.x) & (gdw.geometry.x <= extent[1])
mask_lat = (extent[2] <= gdw.geometry.y) & (gdw.geometry.y <= extent[3])

In [27]:
gdw = gdw[mask_lon & mask_lat]

In [28]:
gdw.shape

(3717, 71)

***

In [46]:
distance_thr = 0.005

mapping = {}
for efas_id, point in hdms.geometry.items():
    
    distance = gdw.geometry.distance(point)
    
    # find close reservoirs
    if distance.min() < distance_thr:
        gdw_ids = distance[distance < distance_thr].index.tolist()
        if len(gdw_ids) > 0:
            mapping[efas_id] = gdw_ids


  distance = gdw.geometry.distance(point)


In [47]:
len(mapping)

410

In [38]:
a = [ 8004,  8018,  8107,  8109,  8159,  9098,  9130,  9134,  9135,
             9136,  9141,  9143,  9149,  9204,  9258,  9307,  9371,  9372,
             9375,  9387,  9392,  9406,  9409,  9410,  9417,  9418,  9419,
             9420,  9421,  9439,  9507,  9508,  9511,  9517,  9519, 10348,
            10354, 18144, 19247]

In [54]:
hdms.index.difference(mapping).difference(a)

Int64Index([ 8039,  8052,  8059,  8080,  8110,  8151,  8179,  8185,  9097,
             9126,  9137,  9139,  9140,  9144,  9147,  9162,  9164,  9169,
             9175,  9181,  9186,  9205,  9213,  9216,  9221,  9226,  9246,
             9251,  9262,  9272,  9291,  9295,  9304,  9305,  9308,  9309,
             9343,  9348,  9359,  9381,  9386,  9397,  9405,  9414,  9415,
             9422,  9423,  9424,  9426,  9431,  9434,  9438,  9440,  9443,
             9456,  9457,  9458,  9462,  9463,  9465,  9466,  9468,  9469,
             9472,  9473,  9475,  9476,  9477,  9478,  9479,  9480,  9483,
             9487,  9493,  9495,  9500,  9501,  9502,  9503,  9506,  9514,
             9515,  9516,  9518, 10343, 10344, 10345, 10346, 10350, 10355,
            10357, 18123, 18125, 18128, 18129, 18132, 18140, 18145],
           dtype='int64', name='EFAS_ID')

In [48]:
for efas_id, gdw_ids in mapping.items():
    if len(gdw_ids) > 1:
        print(efas_id, gdw_ids)

8168 [4281, 7312]
8180 [4336, 7313]
9315 [4281, 7312]
9318 [4336, 7313]


In [55]:
mapping

{3087: [4046],
 4461: [4005],
 4477: [5989],
 7998: [4043],
 8014: [4032],
 8017: [35790],
 8021: [16108],
 8026: [6003],
 8028: [16113],
 8040: [3926],
 8041: [3939],
 8051: [35816],
 8058: [15970],
 8064: [15962],
 8065: [32170],
 8081: [15964],
 8111: [5943],
 8123: [5945],
 8140: [3940],
 8148: [5938],
 8152: [4308],
 8153: [16401],
 8154: [6105],
 8155: [4337],
 8156: [4341],
 8157: [1562],
 8158: [4370],
 8160: [16371],
 8161: [4311],
 8162: [4307],
 8163: [4292],
 8164: [1549],
 8165: [16450],
 8166: [4312],
 8167: [4278],
 8168: [4281, 7312],
 8169: [4303],
 8170: [4314],
 8171: [16397],
 8172: [7004],
 8173: [4316],
 8174: [4324],
 8175: [37330],
 8176: [4350],
 8177: [4300],
 8178: [1550],
 8180: [4336, 7313],
 8181: [4329],
 8182: [4334],
 8183: [4378],
 8184: [16490],
 8186: [9001],
 8187: [4387],
 8188: [1570],
 8189: [4343],
 8190: [4379],
 8191: [4340],
 8192: [4321],
 8193: [7005],
 8194: [4328],
 8195: [4338],
 8196: [4349],
 8197: [4346],
 8198: [8987],
 8199: [6120],

In [56]:
with open("mapping_HDMS_GDW.yml", "w") as file:
    yaml.dump(mapping, file, default_flow_style=False)

***

In [98]:
with open("mapping_HDMS_GDW.yml", "r") as file:
    map_hdms_gdw = yaml.safe_load(file)

In [99]:
len(map_hdms_gdw)

447

In [100]:
len(np.unique(list(map_hdms_gdw)))

447

In [101]:
len(np.unique([item for sublist in map_hdms_gdw.values() for item in sublist]))

356

In [102]:
df_hdms_gdw = pd.DataFrame(columns=['GDW_ID'], dtype='Int64')
df_hdms_gdw.index.name = 'EFAS_ID'
for efas_id, ls in map_hdms_gdw.items():
    for gdw_id in ls:
        df_hdms_gdw.loc[efas_id, 'GDW_ID'] = gdw_id
df_hdms_gdw.reset_index(inplace=True)
df_hdms_gdw.index.name = 'RES_ID'

In [103]:
df_hdms_gdw.head(2)

Unnamed: 0_level_0,EFAS_ID,GDW_ID
RES_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
0,3087,4046
1,4461,4005


In [111]:
cols = ['YEAR_DAM', 'REM_YEAR', 'TIMELINE', 'DAM_HGT_M', 'CAP_MCM']

In [126]:
reservoirs_table = pd.merge(df_hdms_gdw, gdw[cols], left_on='GDW_ID', right_index=True)
reservoirs_table['SOURCE'] = 'GDW'
reservoirs_table['REPLACED_BY'] = np.nan
reservoirs_table['REPLACED_BY'] = reservoirs_table['REPLACED_BY'].astype('Int64')
reservoirs_table.replace(-99, None, inplace=True)

reservoirs_table.head()

Unnamed: 0_level_0,EFAS_ID,GDW_ID,YEAR_DAM,REM_YEAR,TIMELINE,DAM_HGT_M,CAP_MCM,SOURCE,REPLACED_BY
RES_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,3087,4046,1963,,,84,168.5,GDW,
1,4461,4005,1968,,,63,62.0,GDW,
2,4477,5989,1957,,,60,24.4,GDW,
3,7998,4043,1968,,,135,233.0,GDW,
4,8014,4032,1976,,,102,115.4,GDW,


In [127]:
reservoirs_table[reservoirs_table.REM_YEAR.notnull()]

Unnamed: 0_level_0,EFAS_ID,GDW_ID,YEAR_DAM,REM_YEAR,TIMELINE,DAM_HGT_M,CAP_MCM,SOURCE,REPLACED_BY
RES_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
38,8168,7312,1950,2007,Replaced,41,28.0,GDW,
249,9315,7312,1950,2007,Replaced,41,28.0,GDW,
49,8180,7313,1935,2008,Replaced,54,103.0,GDW,
252,9318,7313,1935,2008,Replaced,54,103.0,GDW,


In [130]:
reservoirs_table.loc[reservoirs_table.EFAS_ID == 8168]

Unnamed: 0_level_0,EFAS_ID,GDW_ID,YEAR_DAM,REM_YEAR,TIMELINE,DAM_HGT_M,CAP_MCM,SOURCE,REPLACED_BY
RES_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
38,8168,7312,1950,2007,Replaced,41,28.0,GDW,


In [None]:
reservoirs_table.loc[reservoirs_table.GDW_ID == 7312, 'REPLACED_BY'] = 4281

In [119]:
gdw.loc[4046, ['YEAR_DAM', 'REM_YEAR']]

YEAR_DAM    1963
REM_YEAR     -99
Name: 4046, dtype: object

In [86]:
df_hdms_gdw.GDW_ID.value_counts()

4017     2
16401    2
4329     2
15970    2
5937     2
        ..
3921     1
3902     1
3884     1
3893     1
15568    1
Name: GDW_ID, Length: 355, dtype: Int64

In [71]:
map_hdms_gdw

{3087: [4046],
 4461: [4005],
 4477: [5989],
 7998: [4043],
 8014: [4032],
 8017: [35790],
 8021: [16108],
 8026: [6003],
 8028: [16113],
 8039: [3918],
 8040: [3926],
 8041: [3939],
 8051: [35816],
 8058: [15970],
 8059: [5937],
 8064: [15962],
 8065: [32170],
 8081: [15964],
 8111: [5943],
 8123: [5945],
 8140: [3940],
 8148: [5938],
 8151: [27999],
 8152: [4308],
 8153: [16401],
 8154: [6105],
 8155: [4337],
 8156: [4341],
 8157: [1562],
 8158: [4370],
 8160: [16371],
 8161: [4311],
 8162: [4307],
 8163: [4292],
 8164: [1549],
 8165: [16450],
 8166: [4312],
 8167: [4278],
 8168: [4281, 7312],
 8169: [4303],
 8170: [4314],
 8171: [16397],
 8172: [7004],
 8173: [4316],
 8174: [4324],
 8175: [37330],
 8176: [4350],
 8177: [4300],
 8178: [1550],
 8180: [4336, 7313],
 8181: [4329],
 8182: [4334],
 8183: [4378],
 8184: [16490],
 8185: [8891],
 8186: [9001],
 8187: [4387],
 8188: [1570],
 8189: [4343],
 8190: [4379],
 8191: [4340],
 8192: [4321],
 8193: [7005],
 8194: [4328],
 8195: [4338]