# Prepare global input data for MizuRoute with Hanasaki 
## Map reservoir parameters onto global river and catchment topology
Inne Vanderkelen - March 2021



See repository Shervan on updated scripts to write netcdf files: https://github.com/ShervanGharari/lake_models_python/blob/main/Hanasaki/test_case_creation.ipynb 

In [1]:
# load modules
import xarray as xr
import geopandas as gpd
import pandas as pd
import netCDF4 as nc4
import numpy as np
import os 
import time



In [2]:
# settings
# if False, script will still run. 
save_river_H06 = False # save river with H06 attributes as a shapefile (takes some minutes)

# irrigation reservoir classification: 0 is no irrig, 1 is main purpose irrig, 2 is a purpose irrig
irrig = 1 

# water demand sensitivty testing. Use empty string if no sensivitiy is taken
sensitivity = '_obsscaled'

# define filename of parameters
if irrig == 1 :
    H06_param_fn = 'HDMA_H06_parameters'+sensitivity+'.nc'
    topology_fn = 'ntopo_hdma_mod.reorder_lake_H06'+sensitivity+'.nc'
elif irrig == 2: 
    H06_param_fn = 'HDMA_H06_parameters_allirrig.nc'
    topology_fn = 'ntopo_hdma_mod.reorder_lake_H06_allirrig.nc'    
elif irrig == 0: 
    H06_param_fn = 'HDMA_H06_parameters_noirrig.nc'
    topology_fn = 'ntopo_hdma_mod.reorder_lake_H06_noirrig.nc'

### Load shapefiles

In [3]:
# load global river topo
data_dir = './data_for_mizuroute/'

# load original topology, with already adjusted lakes for D03
ds_ntopo = xr.open_dataset(data_dir+'ntopo_hdma_mod.reorder_lake_D03.nc')

# shapefile also containing lake_id, updated HDMA topology including lakes up to 10 km²
river_with_lake_dir = data_dir+'/HDMA_hydrolakes10km_reorder/'
gpd_river = gpd.read_file(river_with_lake_dir+'/river_with_lake_flag4_10km_reorder.shp')

# read reduced version of hydrolakes with only lakeID and GRanD id (the only needed variables) to link the two
gpd_hydrolakes = gpd.read_file(data_dir+'/hydrolakes_tolink_GRanD/HydroLAKES_polys_v10_only_GRandID.shp')

# read GRanD
gpd_grand = gpd.read_file(data_dir+'/GRanD_Version_1_3/GRanD_reservoirs_v1_3.shp')

In [4]:
# merge hydrolakes to GranD to have lakeID
gpd_hydrolakes = gpd_hydrolakes.rename(columns = {'Grand_id':'GRAND_ID', 'Hylak_id' : 'lakeId'}) # rename to be able to compare
gpd_grand = pd.merge(gpd_grand,gpd_hydrolakes, on='GRAND_ID') # merge hydroLAKES with GRanD

# make selection of necessary GRanD variables 
gpd_grand_selection = gpd_grand[['lakeId','CAP_MCM','USE_IRRI','MAIN_USE','RES_NAME', 'YEAR','DIS_AVG_LS']]


### Merge GRanD attributes for Hanaskai parameters and save into river_with_lake shapefile

In [5]:
### create gpd with H06 parameters to write into network topology
df_H06 = pd.DataFrame() # Note that there are now row data inserted.

### assign lake id to create rows
df_H06['lakeId']          = gpd_grand['lakeId']

### constant parameters
df_H06['H06_alpha']       = 0.85
df_H06['H06_envfact']     = 0.9
df_H06['H06_c1']          = 0.1
df_H06['H06_c2']          = 0.9
df_H06['H06_exponent']    = 2
df_H06['H06_denominator'] = 0.5
df_H06['H06_c_compare']   = 0.5
df_H06['H06_frac_Sdead']  = 0.1
df_H06['H06_I_mem_F']     = 0
df_H06['H06_D_mem_F']     = 0
df_H06['H06_I_mem_L']     = 5
df_H06['H06_D_mem_L']     = 5


### parameters based on GRanD
df_H06['H06_Smax']        =  gpd_grand['CAP_MCM']*10e5 # m³
df_H06['H06_S_ini']       =  gpd_grand['CAP_MCM']*10e5 # m³
# initial storage as dead storage
df_H06['H06_E_rel']       =  (gpd_grand['CAP_MCM']*df_H06['H06_frac_Sdead'])/(gpd_grand['CAP_MCM']*df_H06['H06_alpha']) # E_rel = S_ini/(S_max*alpha)
# OR
# initial storage as max storage 
# df_H06['H06_E_rel']       =  (gpd_grand['CAP_MCM']*df_H06['H06_frac_Sdead'])/(gpd_grand['CAP_MCM']*df_H06['H06_alpha']) # E_rel = S_ini/(S_max*alpha)

df_H06['H06_purpose'] = 0
# main use 0: no irrigation, 1: irrigation
if irrig == 1: 
    df_H06.loc[gpd_grand['MAIN_USE']   == "Irrigation",'H06_purpose'] = 1 
    df_H06.loc[gpd_grand['MAIN_USE']   != "Irrigation",'H06_purpose'] = 0

elif irrig == 0:
    df_H06.loc[gpd_grand['MAIN_USE']   == "Irrigation",'H06_purpose'] = 0 
    df_H06.loc[gpd_grand['MAIN_USE']   != "Irrigation",'H06_purpose'] = 0
    
elif irrig == 2: # all irrigation 
    # if any use is irrigation ((‘Main’; ‘Major’; or ‘Sec’ = Secondary use)) -> all of them
    df_H06.loc[gpd_grand['USE_IRRI'].isnull(),'H06_purpose'] = 0
    df_H06.loc[gpd_grand['USE_IRRI'].notnull(),'H06_purpose'] = 1
    
### combine gpd river and H06 parameters based on lakeids
gpd_river_H06 = gpd_river.merge(df_H06, on="lakeId",how='left')

### demand and inflow 

# load demand and inflow seasonalities
df_inflow = pd.read_csv(data_dir+'reservoirs_monthly_inflow.csv')
df_inflow['PFAF'] = df_inflow['PFAF'].astype(str)
df_demand = pd.read_csv(data_dir+'reservoirs_monthly_demand'+sensitivity+'.csv')
df_demand['PFAF'] = df_demand['PFAF'].astype(str)

gpd_river_H06 = gpd_river_H06.merge(df_inflow, how='left', left_on='PFAF', right_on='PFAF')
#gpd_river_H06 = gpd_river_H06.merge(df_demand, how='left', left_on='PFAF', right_on='PFAF')

### add lake model to shapefile: 0: no lake model, 1: Döll, 2: Hanasaki
gpd_river_H06['lake_model'] = 0
# add reservoirs as Hanasaki
gpd_river_H06.loc[gpd_river_H06['lakeId'].isin(list(gpd_grand['lakeId'].values)),'lake_model'] = 2
gpd_river_H06.loc[(gpd_river_H06['islake'] ==1) & (~gpd_river_H06['lakeId'].isin(list(gpd_grand['lakeId'].values))),'lake_model'] = 1


In [6]:
# give everywhere zero  to avoid error in int fill values when reading netcdf in FORTRAN
# error: integer: problem with fill value: in Python script -1, becomes in netcdf –1LL 

gpd_river_H06.loc[gpd_river_H06['H06_purpose']!=1, 'H06_purpose'] = 0

gpd_river_H06.loc[gpd_river_H06['H06_I_mem_F'].isnull(), 'H06_I_mem_L'] = 0
gpd_river_H06.loc[gpd_river_H06['H06_I_mem_F'].isnull(), 'H06_D_mem_L'] = 0

gpd_river_H06.loc[gpd_river_H06['H06_I_mem_F'].isnull(), 'H06_I_mem_F'] = 0
gpd_river_H06.loc[gpd_river_H06['H06_D_mem_F'].isnull(), 'H06_D_mem_F'] = 0


In [7]:
# save shp file with H06 attributes (! large file so takes some minutes)

if save_river_H06: 
        gpd_river_H06.to_file(river_with_lake_dir+'river_with_grand.shp')

### Write parameters from shapefile to netcdf and merge with existing network topology

In [8]:
gpd_river_H06 = gpd_river_H06.merge(df_demand, how='left', left_on='PFAF', right_on='PFAF')

In [9]:
# write netcdf file with H06 parameters to merge to existing topology

# define the shapefile
shp = gpd_river_H06

# open the nc file to write
ncid = nc4.Dataset(data_dir+H06_param_fn, "w", format="NETCDF4")
# the dimension of the nc file variables is equal to the row of the shapefile
dimid_seg = ncid.createDimension('seg',len(ds_ntopo.seg))

# define the variable segId
varid = ncid.createVariable('lakeId','i8',('seg',),fill_value=-1) #assuming all the fields are ints
# Attributes
varid.long_name      = 'lakeId corresponding to HydroLAKES and GRanD'
varid.unit           = '-'
# assign the values
temp = np.array(shp['lakeId']); temp = temp.astype(int)
varid[:] = temp


# define the variable lake_model
varid = ncid.createVariable('lake_model','i8',('seg',),fill_value=-1) #assuming all the fields are ints
# Attributes
varid.long_name      = 'Lake model used (1: Döll, natural lakes; 2: Hanasaki, reservoirs)'
varid.unit           = '-'
# assign the values
temp = np.array(shp['lake_model']); temp = temp.astype(int)
varid[:] = temp


# define the variable lake_Vol
varid = ncid.createVariable('lakeVol','f8',('seg',),fill_value=-9999) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Maximum lake storage'
varid.unit           = '-'
# assign the values
temp = np.array(shp['lake_Vol']) * 1000000 + 1 # 1 is to avoid division by zero
varid[:] = temp



# define the variable H06_Smax
varid = ncid.createVariable('H06_Smax','f8',('seg',),fill_value=-9999) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Maximal reservoir storage'
varid.unit           = 'm^3'
# assign the values
varid[:] = np.array(shp['H06_Smax'])


# define the variable H06_alpha
varid = ncid.createVariable('H06_alpha','f8',('seg',),fill_value=-9999) # assuming all the fields are floats
# Attributes
varid.long_name      = 'Fraction of active storage compared to total storage'
varid.unit           = '-'
# assign the values
temp = np.array(shp['H06_alpha'])
varid[:] = temp


# define the variable H06_envfact
varid = ncid.createVariable('H06_envfact','f8',('seg',),fill_value=-9999) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Fraction of inflow that can be used to meet demand'
varid.unit           = '-'
# assign the values
varid[:] = np.array(shp['H06_envfact'])


############ remote this one? ##################
# define the variable H06_S_ini
varid = ncid.createVariable('H06_S_ini','f8',('seg',),fill_value=-9999) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Initial storage, used to calculate release coefficient before start of operational year  '
varid.unit           = 'm^3'
# assign the values
varid[:] = np.array(shp['H06_S_ini'])


# define the variable H06_c1
varid = ncid.createVariable('H06_c1','f8',('seg',),fill_value=-9999) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Coefficient 1, used for calculation of target release for irrigation reservoirs'
varid.unit           = '-'
# assign the values
varid[:] = np.array(shp['H06_c1'])


# define the variable H06_c2
varid = ncid.createVariable('H06_c2','f8',('seg',),fill_value=-9999) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Coefficient 2, used for calculation of target release for irrigation reservoirs'
varid.unit           = '-'
# assign the values
temp = np.array(shp['H06_c2']) 
varid[:] = temp


# define the variable H06_exponent
varid = ncid.createVariable('H06_exponent','f8',('seg',),fill_value=-9999) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Exponent in actual release calculation for within-a-year reservoir '
varid.unit           = '-'
# assign the values
temp = np.array(shp['H06_exponent'])
varid[:] = temp


# define the variable H06_denominator
varid = ncid.createVariable('H06_denominator','f8',('seg',),fill_value=-9999) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Denominator in actual release calculation for within-a-year-reservoir'
varid.unit           = '-'
# assign the values
temp = np.array(shp['H06_denominator']) 
varid[:] = temp


# define the variable H06_c_compare
varid = ncid.createVariable('H06_c_compare','f8',('seg',),fill_value=-9999) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Criterion to distinguish between multi-year reservoir and whitin-a-year reservoir, compared against c, the ratio between storage capacity and mean total annual inflow'
varid.unit           = '-'
# assign the values
temp = np.array(shp['H06_c_compare']) 
varid[:] = temp

# define the variable H06_frac_Sdead
varid = ncid.createVariable('H06_frac_Sdead','f8',('seg',),fill_value=-9999) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Fraction of maximum storage that is dead storage '
varid.unit           = '-'
# assign the values
temp = np.array(shp['H06_frac_Sdead']) 
varid[:] = temp


###### rename nc variable to H06_E_rel (remove the ini) )
# define the variable H06_E_rel
varid = ncid.createVariable('H06_E_rel_ini','f8',('seg',),fill_value=-9999) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Release coefficient (provided with initial value and updated throughout simulation'
varid.unit           = '-'
# assign the values
temp = np.array(shp['H06_E_rel']) 
varid[:] = temp


# define the variable H06_I_mem_F
varid = ncid.createVariable('H06_I_mem_F','i8',('seg',),fill_value=-1) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Flag to transition to modeled inflow values for calculation of mean monthly inflow '
varid.unit           = '-'
# assign the values
temp = np.array(shp['H06_I_mem_F'])
varid[:] = temp


# define the variable H06_D_mem_F
varid = ncid.createVariable('H06_D_mem_F','i8',('seg',),fill_value=-1) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Flag to transition to modeled demand values for calculation of mean monthly demand '
varid.unit           = '-'
# assign the values
temp = np.array(shp['H06_D_mem_F'])
varid[:] = temp

# define the variable H06_I_mem_L
varid = ncid.createVariable('H06_I_mem_L','i8',('seg',),fill_value=-1) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Memory length in years to calculate the mean monthly inflow if flag, H06_I_mem_F is true '
varid.unit           = 'year'
# assign the values
temp = np.array(shp['H06_I_mem_L'])
varid[:] = temp

# define the variable H06_D_mem_L
varid = ncid.createVariable('H06_D_mem_L','i8',('seg',),fill_value=-1) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Memory length in years to calculate the mean monthly demand if flag, H06_D_mem_F is true '
varid.unit           = 'year'
# assign the values
temp = np.array(shp['H06_D_mem_L'])
varid[:] = temp


# define the variable H06_D_mem_L
varid = ncid.createVariable('H06_purpose','i8',('seg',),fill_value=-1) #assuming all the fields are floats
# Attributes
varid.long_name      = 'Reservoir purpose (0 non-irrigation, 1 irrigation)'
varid.unit           = '-'
# assign the values
temp = np.array(shp['H06_purpose'])
varid[:] = temp


# define monthly demand/inflow 
months      = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec',''] 
months_long = ['January','February','March','April','May','June','July','August','September','October','November','December']
for month, month_long in zip(months,months_long):

    # Inflow: define the variable H06_I month
    varid = ncid.createVariable('H06_I_'+month,'f8',('seg',),fill_value=-9999) #assuming all the fields are floats
    # Attributes
    varid.long_name      = 'Mean reservoir inflow in '+month_long
    varid.unit           = 'm^3/s'
    # assign the values
    varid[:] = np.array(shp['H06_I_'+month])
        
        
    # Demand: define the variable H06_D month
    varid = ncid.createVariable('H06_D_'+month,'f8',('seg',),fill_value=-9999) #assuming all the fields are floats
    # Attributes
    varid.long_name      = 'Mean reservoir demand in '+month_long
    varid.unit           = 'm^3/s'
    # assign the values
    varid[:] = np.array(shp['H06_D_'+month])


    
ncid.Conventions = 'CF-1.6'
ncid.License     = 'The data were written by Inne Vanderkelen. They are under GPL.'
ncid.history     = 'Created ' + time.ctime(time.time())
ncid.source      = 'Written by prepare_ntopo_H06.ipynb notebook'
ncid.close()

In [10]:
# merge Hanasaki parameters in network topology

# open newly created file
ds_H06_param = xr.open_dataset(data_dir + H06_param_fn)

# merge hanasaki parameters with existing network topology and save 
ds_H06_param.merge(ds_ntopo, compat='override').to_netcdf(data_dir + topology_fn)