In [1]:
import glob
from pathlib import Path
import os

import geopandas as gpd
import iris
import iris.pandas
import numpy as np
from esmvalcore import preprocessor
from iris.coords import DimCoord
from iris.cube import Cube
from pathos.threading import ThreadPool as Pool
from datetime import datetime
from datetime import timedelta
import pandas as pd

In [2]:
from f_grid_to_catchments import *
from f_postprocess_timeseries import *
from f_catch_characteristics import *
from f_preprocess_discharge import *

## Define paths

In [3]:
# define here your working directory -> in this directory you need a folder with data/shapes (shapefiles) and data/forcing (netcdf forcing)
work_dir=Path("/work/users/vanoorschot/fransje/scripts/GLOBAL_SR/global_sr_module")

# make output directory
if not os.path.exists(f'{work_dir}/output'):
    os.makedirs(f'{work_dir}/output')

## Catchment IDs

In [4]:
# make list of catchment ids
SHAPE_DIR = Path(f'{work_dir}/data/shapes/')
shapefiles = glob.glob(f"{SHAPE_DIR}/*shp")
catch_id_list = []
for i in shapefiles:
    catch_id_list.append(Path(i).name.split('.')[0])
np.savetxt(f'{work_dir}/output/catch_id_list.txt',catch_id_list,fmt='%s')

## Preprocess discharge data

In [5]:
# set paths
if not os.path.exists(f'{work_dir}/output/discharge/timeseries'):
    os.makedirs(f'{work_dir}/output/discharge/timeseries')
    
if not os.path.exists(f'{work_dir}/output/discharge/characteristics'):
    os.makedirs(f'{work_dir}/output/discharge/characteristics')

fol_in = f'{work_dir}/data/gsim_discharge/'
fol_out = f'{work_dir}/output/discharge/'
for j in catch_id_list:
    catch_id = j
    preprocess_gsim_discharge(catch_id, fol_in, fol_out)

## Grid to catchments

In [6]:
# Set Paths
if not os.path.exists(f'{work_dir}/output/forcing_timeseries/raw'):
    os.makedirs(f'{work_dir}/output/forcing_timeseries/raw')
if not os.path.exists(f'{work_dir}/output/forcing_timeseries/processed'):
    os.makedirs(f'{work_dir}/output/forcing_timeseries/processed')

SHAPE_DIR = Path(f'{work_dir}/data/shapes/')
NC4_DIR = Path(f'{work_dir}/data/forcing/')
OUT_DIR = Path(f'{work_dir}/output/forcing_timeseries/raw')

# Construct lists for parallel run
(
    shapefile_list,
    netcdf_list,
    operator_list,
    output_dir_list,
) = construct_lists_for_parallel_function(NC4_DIR, SHAPE_DIR, OUT_DIR)

# run function parallel
run_function_parallel(shapefile_list, netcdf_list, operator_list, output_dir_list)

[None, None, None, None, None, None, None, None, None]

## Post process timeseries

In [7]:
fol_in=f'{work_dir}/output/forcing_timeseries/raw'
fol_out=f'{work_dir}/output/forcing_timeseries/processed'
var = ['Ep','P','T']
for j in catch_id_list:
    process_forcing_timeseries(j,fol_in,fol_out,var)

## Descriptor variables

In [8]:
fol_in=f'{work_dir}/output/'
fol_out=f'{work_dir}/output/'
var=['p_mean','ep_mean','q_mean','t_mean','ai','rc','ea_wb','si_p','si_ep','phi','st','tc','ntc','nonveg','start_year','end_year']
catch_id = catch_id_list
catch_characteristics(var, catch_id, fol_in, fol_out)

Unnamed: 0,p_mean,ep_mean,q_mean,t_mean,ai,rc,ea_wb,si_p,si_ep,phi,st,tc,ntc,nonveg,start_year,end_year
br_0000495,5.985058,3.635537,3.21034,27.17695,1.646265,,,0.610597,0.051158,6,,,,,,
fr_0000326,3.540341,2.105863,1.701488,10.755472,1.681183,,,0.36535,0.303236,6,,,,,,
us_0002247,4.276623,2.636243,3.9137,16.772313,1.622242,,,0.179611,0.364593,6,,,,,,
