In [1]:
from pathlib import Path

import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr
import rioxarray as rxr

In [4]:
PATH_USER = Path('Z:/nahaUsers/casadje')
PATH_EFAS6 = PATH_USER / 'EFASv6'
file_name = 'reservoirs_efas6'

## Tables 

In [33]:
dates = [
    '20250303',
    '20250825'
]

variables = [
    '100yr_inflow',
    'avg_inflow',
    'capacity',
    'env_flow',
    # 'years'
]

tables = {}
for var in variables:
    print(var)
    df = []
    for date in dates:
        data = pd.read_csv(
            PATH_EFAS6 / 'tables' / f'{file_name}_{var}_{date}.txt',
            sep='\t',
            index_col=0, 
            header=None
        ).squeeze()
        data.index.name = 'GDW_ID'
        data.name = date
        print(f'\t{date}:\t{len(data)}')
        df.append(data)
    tables[var] = pd.concat(df, axis=1)

100yr_inflow
	20250303:	1359
	20250825:	1328
avg_inflow
	20250303:	1359
	20250825:	1328
capacity
	20250303:	1359
	20250825:	1328
env_flow
	20250303:	1359
	20250825:	1328


In [36]:
tables['capacity']

Unnamed: 0_level_0,20250303,20250825
GDW_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
33,1.080000e+10,1.080000e+10
35,5.600000e+09,5.600000e+09
36,1.750000e+10,1.750000e+10
37,9.500000e+09,9.500000e+09
39,2.540000e+10,2.540000e+10
...,...,...
99995,2.570000e+08,2.570000e+08
99997,1.600000e+08,1.600000e+08
50001,,3.160000e+08
50002,,6.340000e+08


## Map

In [42]:
def read_map(file: Path) -> xr.DataArray:
    data = rxr.open_rasterio(file).squeeze()
    data.close()
    data = data.where(data != 0, np.nan)
    return data

In [43]:
map_name = 'reservoirs_efas6_01min'

In [46]:
reservoir_25 = read_map(PATH_EFAS6 / 'static_maps' / f'{map_name}_20250825.nc')

ids_25 = [int(x) for x in np.unique(reservoir_25) if ~np.isnan(x)]
print(len(ids_25))

# reservoir_11

1328


In [47]:
reservoir_11 = read_map(PATH_EFAS6 / 'static_maps' / f'{map_name}_20250311.nc')

ids_11 = [int(x) for x in np.unique(reservoir_11) if ~np.isnan(x)]
print(len(ids_11))

# reservoir_11

1359


In [None]:
reservoir_03 = read_map(PATH_EFAS6 / 'static_maps' / f'{map_name}_20250303.nc')

ids_03 = [int(x) for x in np.unique(reservoir_03) if ~np.isnan(x)]
print(len(unique_ids))

# reservoir_03

In [41]:
len(set(ids_11).intersection(ids_03))

1359

The two maps contain the same reservoirs.

In [43]:
1181 in ids_11

False

In [44]:
1181 in ids_03

False

In [135]:
50010 in ids_11

True

## Points

In [34]:
points = gpd.read_file(PATH_EFAS6 / 'reservoirs' / 'tables' / 'efas6_reservoirs+GDW_20250716.shp').set_index('GDW_ID')
print(len(points))

1542


## Inflow

In [121]:
PATH_NCEXTRACT = PATH_EFAS6 / 'reservoirs' / 'ancillary' / 'ncextract'
PATH_INFLOW = PATH_NCEXTRACT / 'natural_flow' / 'inflow' / '20250716'

In [122]:
ids_inflow = [int(file.stem) for file in PATH_INFLOW.glob('*.nc')]
print(len(ids_inflow))

821


In [127]:
points_nc = []
csv_file = 'efas6_reservoirs_GDW_20250716_'
for file in PATH_NCEXTRACT.glob(f'{csv_file}*.csv'):
    print(file.stem)
    batch = int(file.stem.split('_')[-1])
    df = pd.read_csv(file).set_index('GDW_ID')
    df['batch'] = batch
    points_nc.append(df)
points_nc = pd.concat(points_nc)

print(len(points_nc))
points_nc.batch.value_counts()

efas6_reservoirs_GDW_20250716_1
efas6_reservoirs_GDW_20250716_2
efas6_reservoirs_GDW_20250716_3
efas6_reservoirs_GDW_20250716_4
efas6_reservoirs_GDW_20250716_5
1542


batch
5    342
1    300
2    300
3    300
4    300
Name: count, dtype: int64

In [131]:
for batch in points_nc.batch.unique():
    # find missing points
    df = points_nc[points_nc.batch == batch]
    missing = df.index.difference(ids_inflow)
    n_missing = len(missing)
    print('Batch {0}:\t missing {1} out of {2} points'.format(
        batch, 
        n_missing,
        len(df)
    ))
    df_missing = df.loc[missing].drop('batch', axis=1)

    # create new CSV files
    
    i = n_missing // 2
    df_missing.iloc[:i].to_csv(
        PATH_NCEXTRACT / f'{csv_file}{batch}_1.csv', 
        float_format='%.6f'
    )
    df_missing.iloc[i:].to_csv(
        PATH_NCEXTRACT / f'{csv_file}{batch}_2.csv',
        float_format='%.6f'
    )

Batch 1:	 missing 135 out of 300 points
Batch 2:	 missing 136 out of 300 points
Batch 3:	 missing 136 out of 300 points
Batch 4:	 missing 136 out of 300 points
Batch 5:	 missing 178 out of 342 points
