# Postprocessing of CLM simulation for MizuRoute
INCLUDING: 
- 1. Processing of irrigation water demand as input for irrigation topology 
- 2. Inputfile preparation with runoff, precip and evaporation



In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import os
import rasterstats
import utils
from iv_utils import *
import netCDF4 as nc4

# plot settings
utils.set_plot_param()

### Initialisation

In [2]:
### Initialisation

# model directory
outdir = '/glade/scratch/ivanderk/'

# current working directory
scriptsdir = os.getcwd() + '/'

# Define directory where processing is done -- subject to change
procdir =  '/glade/work/ivanderk/data/'

# mizuroute input dir (to save netcdf file to)
mizuroute_dir = '/glade/work/ivanderk/mizuRoute_global/route/'

# mizuroute data dir 
datadir = '/glade/work/ivanderk/data/'

# go to processing directory 
os.chdir(procdir)


In [3]:
# set case name
case ='i.IHistClm50Sp.hcru_hcru.CTL'

# run settings -- change this to terms directly? 
block = 'lnd'  # lnd data
               # atm data
               # rof data
   
    
# define start and end year
nspinupyears = 5
spstartyear = '1960'   # spin up start year 
startyear   = str(int(spstartyear)+nspinupyears)   # start year, spin up excluded (5 years for now, best change to 10 when simulation is ready)
endyear     = '2010'   # last year of the simulation


# open network topology 
ntopo = xr.open_dataset(mizuroute_dir+'ancillary_data/ntopo_hdma_mod.reorder_lake_H06.nc')


## 1. Save irrigation demand seasonality

### Load time series of simulated variables from raw h0 output

In [4]:
# user settings
stream = 'h0'  # h0 output block
               # h1 output block
               # h2 output block

exclude_spinup = True

variables =  ['QIRRIG_FROM_SURFACE']#, 'QRUNOFF'] 

In [5]:
# load history file for every year, extract variables and concatenate timeseries

variables = variables +  ['lat','lon','time','time_bounds']

# set start year for load based on whether or not to exclude spin up
if exclude_spinup:  load_startyear = startyear 
else: load_startyear = spstartyear

# Define directory where timeseries data is stored
filedir = outdir + 'archive/' + case + '/' + block + '/hist/'
tspans = {'h0' : [str(year)+'-02-01-00000' for year in range(int(load_startyear),int(endyear)+1)],
          'h1' : [str(year)+'-01-01-00000' for year in range(int(load_startyear),int(endyear)+1)], 
          'h2' : [str(year)+'-01-01-00000' for year in range(int(load_startyear),int(endyear)+1)]} 

# define filename
for i, year in enumerate(range(int(load_startyear),int(endyear)+1)):
    print('Loading year '+str(year),end='\r')
    fn_in = case + '.clm2.' + stream + '.' + tspans[stream][i] +'.nc'

    # open file
    ds_year = xr.open_dataset(filedir+fn_in)

    # extract necessary variable 
    ds_sel = ds_year[variables]

    # initialise data array
    if i == 0:   ds = ds_sel
    else:  ds = xr.concat([ds, ds_sel], dim="time")



Loading year 1965

FileNotFoundError: [Errno 2] No such file or directory: b'/glade/scratch/ivanderk/archive/i.IHistClm50Sp.hcru_hcru.CTL/lnd/hist/i.IHistClm50Sp.hcru_hcru.CTL.clm2.h0.1965-02-01-00000.nc'

### Calculate and save irrigation seasonality in gridded format

In [None]:
# save rolled variable

da = ds.QIRRIG_FROM_SURFACE

da_seascycle = da.groupby('time.month').mean()

values = np.roll(da_seascycle.values,360, axis=2)

da_roll = xr.DataArray(values, coords={'time':da_seascycle.month.values,'lat': da_seascycle.lat.values, 'lon':  da_seascycle.lon.values},
             dims=['time','lat', 'lon'])

da_roll['lon']  = da_roll['lon']-180

#ds_seas = da_roll.to_dataset(name='QIRRIG')
#da_hru_id = xr.open_dataset(mizuroute_dir+'input/I2000CLM50_exp1.clm2.h1.1980-2000.nc')['hru_id']
#ds_seas['hru_id'] = da_hru_id
#ds_seas.to_netcdf('irrig_seasonality/'+case+'.'+startyear+'-'+endyear+'.QIRRIG_seascycle_rolled.nc')

# save per month
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

ds_per_month = xr.Dataset()

for i,month in enumerate(months): 
    
    da_month = xr.DataArray(da_roll[i,:,:], coords={'lat': da_roll.lat.values, 'lon':  da_roll.lon.values}, dims=['lat', 'lon'])
    ds_per_month['QIRRIG_'+month] = da_month

ds_per_month.to_netcdf(datadir+'irrig_seasonality/'+case+'.'+startyear+'-'+endyear+'.QIRRIG_seascycle.nc')


### Calculate mean irrigation for each hru for every month

In [16]:
# settings

load_geometry_catch_global = False

# load hru id info from catchment shpfile
# really loading takes a long time! (therefore, load csv where geometry is already dropped)
if load_geometry_catch_global:
    catch = gpd.read_file(catch_file)
    catch_ids = catch_global.drop(columns=['geometry'])
    catch_ids.to_csv(datadir+'topology/HDMA_catchment/hdma_global_catch_v2_nogeom.csv')
else: 
    catch_ids = pd.read_csv(datadir+'topology/HDMA_catchment/hdma_global_catch_v2_nogeom.csv')
              

In [18]:
%%time
months = ['Jun','Jul', 'Aug', 'Sep', 'Oct']
# Calculate zonal statistics for every month and save in csv

catch_file = datadir+'topology/HDMA_catchment/hdma_global_catch_v2.gpkg'
nc_file = datadir+'irrig_seasonality/'+case+'.'+startyear+'-'+endyear+'.QIRRIG_seascycle.nc'

for month in months: 
    print(month)
    zonal_statistics = rasterstats.zonal_stats(catch_file, 'netcdf:'+nc_file+':QIRRIG_'+month)

    means = []
    [means.append(item['mean']) for item in zonal_statistics]; 
    catch_ids['QIRRIG_'+month] = means
    catch_ids.to_csv(datadir+'irrig_seasonality/catch_QIRRIG_'+month+'.csv')


Jun
Jul
Aug
Sep
Oct
CPU times: user 1h 59min 16s, sys: 2h 36min 37s, total: 4h 35min 54s
Wall time: 4h 36min 43s


In [50]:
# save QIRRIG per hru in netcdf file (that will be used as input to apply irrigation topology)

ds_qirrig_hru = xr.Dataset()

df_ntopo = ntopo[['hruid']].to_dataframe()

for i, month in enumerate(months): 
    catch_ids = pd.read_csv(datadir+'irrig_seasonality/catch_QIRRIG_'+month+'.csv')
    df_ntopo_merged = df_ntopo.merge(catch_ids, on='hruid')
    ds_qirrig_hru['QIRRIG_'+month] = xr.DataArray(df_ntopo_merged['QIRRIG_'+month], coords=[df_ntopo_merged.index.values], dims=["hru"])

fn = 'remap_monthly_QIRRIG_I2000Clm50Sp.hcru_hcru.nc'
ds_qirrig_hru.to_netcdf(datadir+'irrig_seasonality/'+fn)

## 2. Make timeseries of daily CLM simulations and save as MizuRoute input

In [4]:
# user settings
stream = 'h1'              # h1 output block
               # h2 output block

exclude_spinup = True
save_mizuroute_input = True

variables = ['QIRRIG_FROM_SURFACE', 'QRUNOFF', 'RAIN_FROM_ATM'] 
variables = [ 'QRUNOFF', 'RAIN_FROM_ATM', 'QIRRIG_FROM_SURFACE']#,'SNOW'] 
variables = []
startyear   = '1971'
endyear     = '1996'   # last year of the simulation


In [5]:
%%time

# load history file for every year, extract variables and concatenate timeseries
variables = variables +  ['lat','lon','time','time_bounds']

# set start year for load based on whether or not to exclude spin up
if exclude_spinup:  load_startyear = startyear 
else: load_startyear = spstartyear

# Define directory where timeseries data is stored
filedir = outdir + 'archive/' + case + '/' + block + '/hist/'
tspans = {'h0' : [str(year)+'-02-01-00000' for year in range(int(load_startyear),int(endyear)+1)],
          'h1' : [str(year)+'-01-01-00000' for year in range(int(load_startyear),int(endyear)+1)], 
          'h2' : [str(year)+'-01-01-00000' for year in range(int(load_startyear),int(endyear)+1)]} 




for i, year in enumerate(range(int(load_startyear),int(endyear)+1)):

    # load daily model output
    stream = 'h1'
    print('Loading year '+str(year),end='\r')
    fn_in = case + '.clm2.' + stream + '.' + tspans[stream][i] +'.nc'

    # open file
    ds_year = xr.open_dataset(filedir+fn_in)

    # extract necessary variable 
    ds_sel = ds_year[variables]

    # remove last timestep
    #da = da[:-1,:,:]

    # load subgrid output
    # evaporation: load evaporation for lake landunit, apply vector to grid and save onto netcdf
   
    variable = 'EFLX_LH_TOT'
    stream = 'h2'
    fn_in = case + '.clm2.' + stream + '.' + tspans[stream][i] +'.nc'
    outfile = case + '.clm2.' + stream + '.'+variable+'_lake.' + tspans[stream][i] +'.nc'
    # do vector to grid conversion and select lake land unit

    if not os.path.isfile(filedir+outfile):
        ds_lunit =  lunit2grid(variable,filedir+fn_in,fn_in+outfile,select_lunit=5)
    else: 
        ds_lunit = xr.open_dataset(filedir+outfile)
        
        #ds_lunit_lake = ds_lunit.sel(lunit=5)
        #outfile_lake = case + '.clm2.' + stream + '.'+variable+'_lake.' + tspans[stream][i] +'.nc'
        #ds_lunit_lake.to_netcdf(filedir+outfile_lake)
        
    da_lunit = ds_lunit[variable+'_lunit']
    
    ds_sel[variable+'_lake'] = da_lunit
    
    # store both in data array
    if i == 0:   ds = ds_sel
    else:  ds = xr.concat([ds, ds_sel], dim="time")

# Save variables in one file at MizuRoute input location
if save_mizuroute_input: 
     # add hru_id to ds
    da_hru_id = xr.open_dataset(mizuroute_dir+'input/I2000CLM50_exp1.clm2.h1.1980-2000.nc')['hru_id']
    ds['hru_id'] = da_hru_id
   
    # convert latent heat flux into evaporation (mm/s) 
    # LHF [W/m²]= E [mm/s] * lvap [J/kg]
    lvap = 2.45e6
    ds['EVAP_lake'] = (ds['EFLX_LH_TOT_lake']/lvap)
    ds['EVAP_lake'].attrs = {'long_name':'evaporation from lake land unit, converted from LHF of subgrid output', 
                        'units': 'mm/s'}
    ds = ds.drop_vars('EFLX_LH_TOT_lake')#.astype('float32')
  
    fn_out = case+'.clm2.h1.'+startyear+'-'+endyear+'.nc'
    ds.to_netcdf(mizuroute_dir+'input/'+fn_out)

CPU times: user 1min 5s, sys: 2min 17s, total: 3min 22s
Wall time: 5min 52s


In [6]:
fn_out = case+'.clm2.h1.'+startyear+'-'+endyear+'.nc'
ds.to_netcdf(mizuroute_dir+'input/'+fn_out)

In [9]:
fn_out

'i.IHistClm50Sp.hcru_hcru.CTL.clm2.h1.1971-1996.nc'

## fIX evap lake to bigger dataset

# continue from here

In [4]:
ds = xr.open_dataset('/glade/work/ivanderk/mizuRoute_global/route/input/i.IHistClm50Sp.hcru_hcru.CTL.clm2.h1.1971-2000.nc')
ds.sel(time=slice("1971-01-01", "1996-12-31")).to_netcdf('/glade/work/ivanderk/mizuRoute_global/route/input/i.IHistClm50Sp.hcru_hcru.CTL.clm2.h1.1971-1996.nc')

In [5]:
da_evap = xr.open_dataset(mizuroute_dir+'input/'+'i.IHistClm50Sp.hcru_hcru.CTL.clm2.h1.1971-1996_EVAP_lake.nc')['EVAP_lake']
da_evap_masked = da_evap.where(da_evap!=0, np.nan)
masked_values = da_evap_masked.to_masked_array()

In [6]:
# open the nc file to write
ncid = nc4.Dataset('/glade/work/ivanderk/mizuRoute_global/route/input/i.IHistClm50Sp.hcru_hcru.CTL.clm2.h1.1971-1996.nc', "a", format="NETCDF4")
var_evap = ncid.createVariable('EVAP_lake','float32', ('time','lat','lon'),fill_value=1e+36)
var_evap.long_name       = 'evaporation from lake land unit, converted from LHF of subgrid output'
var_evap.units           = 'mm/s'
var_evap.missing_value   = 1.e+36 

var_evap[:] = masked_values
ncid.close()

##  Test case

In [17]:
%%time
stream = 'h2'
fn_in = case + '.clm2.' + stream + '.' + tspans[stream][i] +'.nc'
infile = filedir+ fn_in
outfile = filedir+case + '.clm2.' + stream + '.'+variable+'_lunit.' + tspans[stream][i] +'.nc'

#outfile = filedir+'i.IHistClm50Sp.f09_g17.CTL.clm2.h2.TSA_lunit.189001-201412.nc'

var  = 'EFLX_LH_TOT'

# lunit vector to grid
ds_lunit =  lunit2grid(var,infile,outfile=outfile)

ds_sel = ds_lunit.isel(lunit=5)[variable+'_lunit'].to_dataset(name=variable+'_lake') 
da = ds_lunit[variable+'_lunit'].isel(lunit=5).mean('time').plot()

# pft vector to grid
#ds_lunit =  lunit2grid(var,infile,outfile)



Manipulating EFLX_LH_TOT data
Time spend: 0:10:21.662639


NameError: name 'variable' is not defined

In [7]:
fn_out = case+'.clm2.h1.'+str(1981)+'-'+str(1990)+'.nc'

ds =     xr.open_dataset(mizuroute_dir+'input/'+fn_out)

In [27]:
ds['QRUNOFF']

In [15]:
ds_lh = xr.open_dataset('/glade/scratch/ivanderk/archive/i.IHistClm50Sp.hcru_hcru.CTL/lnd/hist/i.IHistClm50Sp.hcru_hcru.CTL.clm2.h2.EFLX_LH_TOT_lake.1971-01-01-00000.nc')

In [25]:
ds_lh['EFLX_LH_TOT_lunit']