In [1]:
# import packages
import glob
from pathlib import Path
import os
import geopandas as gpd
import iris
import iris.pandas
import numpy as np
from esmvalcore import preprocessor
from iris.coords import DimCoord
from iris.cube import Cube
from pathos.threading import ThreadPool as Pool
from datetime import datetime
from datetime import timedelta
import pandas as pd
import xarray as xr

The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed')).History will not be written to the database.


In [2]:
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

In [3]:
work_dir=Path("/mnt/u/LSM root zone/global_sr")

In [4]:
## 1 
def regridding_target_cube(catchment_shapefile, spatial_resolution, buffer=1):
    
    """
    Define the target cube for regridding the input netcdf data
    catchment_shapefile:  str, catchment shapefile
    spatial_resolution:   float, target resolution
    buffer:               int, buffer
    
    returns:
    target cube for regridding
    
    """    
    catchment_bounds = gpd.read_file(catchment_shapefile).bounds

    buffer = 1
    minx = int(catchment_bounds.minx.values[0]) - buffer
    maxx = int(catchment_bounds.maxx.values[0]) + buffer
    miny = int(catchment_bounds.miny.values[0]) - buffer
    maxy = int(catchment_bounds.maxy.values[0]) + buffer

    latitude = DimCoord(
        np.linspace(
            miny,
            maxy,
            int(np.divide(abs(miny-maxy), spatial_resolution)),
            dtype=float,
        ),
        standard_name="latitude",
        units="degrees",
    )
    latitude.guess_bounds()
    
    longitude = DimCoord(
        np.linspace(
            minx,
            maxx,
            int(np.divide(abs(minx-maxx), spatial_resolution)),
            dtype=float,
        ),
        standard_name="longitude",
        units="degrees",
    )
    longitude.guess_bounds()
    
    target_cube = Cube(
        np.zeros((len(latitude.points), len(longitude.points)), np.float32),
        dim_coords_and_dims=[(latitude, 0), (longitude, 1)],
    )

    return target_cube

## STOCKER

In [30]:
def stocker_to_shape_values(catch_id,work_dir):
    catchment_shapefile =  glob.glob(f'{work_dir}/output/selected_shapes/{catch_id}.shp')[0]
    
    # STOCKER CWDX80
    catchment_netcdf= glob.glob(f'{work_dir}/data/reference_sr/stocker/cwdx80_units.nc')[0]
    # catchment_netcdf= glob.glob(f'{work_dir}/data/reference_sr/stocker/cwdx80_cru_grid.nc')[0]

    # Load iris cube of netcdf
    cube = iris.load_cube(catchment_netcdf)
    cube.dim_coords[0].guess_bounds()
    cube.dim_coords[1].guess_bounds()

    # From cube extract shapefile shape
    cube = preprocessor.extract_shape(cube, catchment_shapefile, method="contains") #use all grid cells that lie >50% inside the catchment shape

    # Calculate area weighted statistics of extracted grid cells (inside catchment shape)
    cube_stats = preprocessor.area_statistics(cube, 'mean')

    # from cube to xarray dataarray
    a=xr.DataArray.from_iris(cube_stats)

#     # STOCKER ROOT DEPTH
#     catchment_netcdf= glob.glob(f'{work_dir}/data/reference_sr/stocker/zroot_cwd80_units.nc')[0]

#     # Load iris cube of netcdf
#     cube = iris.load_cube(catchment_netcdf)
#     cube.dim_coords[0].guess_bounds()
#     cube.dim_coords[1].guess_bounds()

#     # From cube extract shapefile shape
#     cube = preprocessor.extract_shape(cube, catchment_shapefile, method="contains") #use all grid cells that lie >50% inside the catchment shape

#     # Calculate area weighted statistics of extracted grid cells (inside catchment shape)
#     cube_stats = preprocessor.area_statistics(cube, 'mean')

#     # from cube to xarray dataarray
#     b=xr.DataArray.from_iris(cube_stats)
    df = pd.DataFrame(index=[catch_id], columns=['stocker_cwd80x_mm','stocker_zroot_cwd80x_mm'])
    df['stocker_cwd80x_mm'] = a.values
    # df['stocker_zroot_cwd80x_mm'] = b.values
    df.to_csv(f'{work_dir}/output/sr_calculation/reference_sr/stocker/{catch_id}.csv')
    
def run_stocker_sr_parallel(
    catch_id_list=list,
    work_dir_list=list,
    # threads=None
    threads=100
):
    """
    Runs function area_weighted_shapefile_rasterstats in parallel.

    catch_list:  str, list, list of catchment ids
    work_dir_list: str, list, list of work directories
    threads:         int,       number of threads (cores), when set to None use all available threads

    Returns: None
    """
    # Set number of threads (cores) used for parallel run and map threads
    if threads is None:
        pool = Pool()
    else:
        pool = Pool(nodes=threads)
    # Run parallel models
    results = pool.map(
        stocker_to_shape_values,
        catch_id_list,
        work_dir_list,
    )
    

In [37]:
data_sources = 'gswp-p_gleam-ep_gswp-t'
df = pd.read_csv(f'{work_dir}/output/catchment_characteristics/master_characteristics_{data_sources}_v2.csv',index_col=0)
df = df[df.p_mean-df.q_mean < df.ep_mean]
df = df[['p_mean','ep_mean', 't_mean','tdiff_mean','idu_mean', 'si_ep','si_p', 'asi','dtb','ia','slp_mean','el_mean','tc','ntc','nonveg','pclay','psand','lai_mean','lai_rsd','sc_mean','sc_rsd','sr_20','lat','lon']]
df = df.dropna()
catch_id_list = df.index

el_id_list=[]
for filepath in glob.iglob(f'{work_dir}/output/sr_calculation/reference_sr/stocker/*.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-4] # remove .year extension
    el_id_list.append(f)
dif = list(set(catch_id_list) - set(el_id_list))
len(dif)

203

In [38]:
catch_list = dif[:]
work_dir_list = [work_dir]*len(catch_list)
run_stocker_sr_parallel(catch_list,work_dir_list)

## LAN

In [39]:
def lan_to_shape_values(catch_id,work_dir):
    catchment_shapefile =  glob.glob(f'{work_dir}/output/selected_shapes/{catch_id}.shp')[0]    
    # LAN CRU
    rp = ['2yrs','5yrs','10yrs','20yrs','30yrs','40yrs','50yrs','60yrs','max']
    df_cru = pd.DataFrame(index=[catch_id], columns=[])
    for i in rp:
        catchment_netcdf= glob.glob(f'{work_dir}/data/reference_sr/lan/sr_cru_{i}.nc')[0]

        # Load iris cube of netcdf
        cube = iris.load_cube(catchment_netcdf)
        cube.dim_coords[1].guess_bounds()
        cube.dim_coords[2].guess_bounds()

        # Create target grid and regrid cube to higher resolution (0.1) because 0.5 is coarse
        grid_resolution = 0.1
        target_cube = regridding_target_cube(catchment_shapefile, grid_resolution, buffer=1) #create the regrid target cube
        cube = preprocessor.regrid(cube, target_cube, scheme="nearest") #regrid the netcdf file with nearest neighbour (most logical for sr values I think)

        # From cube extract shapefile shape
        cube = preprocessor.extract_shape(cube, catchment_shapefile, method="contains") #use all grid cells that lie >50% inside the catchment shape

        # Calculate area weighted statistics of extracted grid cells (inside catchment shape)
        cube_stats = preprocessor.area_statistics(cube, 'mean')

        # from cube to xarray dataarray
        a=xr.DataArray.from_iris(cube_stats)
        df_cru[f'lan_cru_{i}'] = a.values

    # LAN CHIRPS
    # rp = ['2yrs','5yrs','10yrs','20yrs','30yrs','40yrs','50yrs','60yrs','max']
    rp = ['20yrs','max']
    df_chirps = pd.DataFrame(index=[catch_id], columns=[])
    for i in rp:
        catchment_netcdf= glob.glob(f'{work_dir}/data/reference_sr/lan/sr_chirps_{i}.nc')[0]

        # Load iris cube of netcdf
        cube = iris.load_cube(catchment_netcdf)
        cube.dim_coords[1].guess_bounds()
        cube.dim_coords[2].guess_bounds()

        # Create target grid and regrid cube to higher resolution (0.1) because 0.5 is coarse
        grid_resolution = 0.1
        target_cube = regridding_target_cube(catchment_shapefile, grid_resolution, buffer=1) #create the regrid target cube
        cube = preprocessor.regrid(cube, target_cube, scheme="nearest") #regrid the netcdf file with nearest neighbour (most logical for sr values I think)

        # From cube extract shapefile shape
        cube = preprocessor.extract_shape(cube, catchment_shapefile, method="contains") #use all grid cells that lie >50% inside the catchment shape

        # Calculate area weighted statistics of extracted grid cells (inside catchment shape)
        cube_stats = preprocessor.area_statistics(cube, 'mean')

        # from cube to xarray dataarray
        a=xr.DataArray.from_iris(cube_stats)
        df_chirps[f'lan_chirps_{i}'] = a.values   

    df = pd.concat([df_chirps,df_cru],axis=1)
    df.to_csv(f'{work_dir}/output/sr_calculation/reference_sr/lan/{catch_id}.csv')
    
    
def run_lan_sr_parallel(
    catch_id_list=list,
    work_dir_list=list,
    # threads=None
    threads=100
):
    """
    Runs function area_weighted_shapefile_rasterstats in parallel.

    catch_list:  str, list, list of catchment ids
    work_dir_list: str, list, list of work directories
    threads:         int,       number of threads (cores), when set to None use all available threads

    Returns: None
    """
    # Set number of threads (cores) used for parallel run and map threads
    if threads is None:
        pool = Pool()
    else:
        pool = Pool(nodes=threads)
    # Run parallel models
    results = pool.map(
        lan_to_shape_values,
        catch_id_list,
        work_dir_list,
    )

In [40]:
data_sources = 'gswp-p_gleam-ep_gswp-t'
df = pd.read_csv(f'{work_dir}/output/catchment_characteristics/master_characteristics_{data_sources}_v2.csv',index_col=0)
df = df[df.p_mean-df.q_mean < df.ep_mean]
df = df[['p_mean','ep_mean', 't_mean','tdiff_mean','idu_mean', 'si_ep','si_p', 'asi','dtb','ia','slp_mean','el_mean','tc','ntc','nonveg','pclay','psand','lai_mean','lai_rsd','sc_mean','sc_rsd','sr_20','lat','lon']]
df = df.dropna()
catch_id_list = df.index

el_id_list=[]
for filepath in glob.iglob(f'{work_dir}/output/sr_calculation/reference_sr/lan/*.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-4] # remove .year extension
    el_id_list.append(f)
dif = list(set(catch_id_list) - set(el_id_list))
len(dif)

527

In [41]:
catch_list = dif[:]
work_dir_list = [work_dir]*len(catch_list)
run_lan_sr_parallel(catch_list,work_dir_list)

## Yang

In [156]:
def yang_to_shape_values(catch_id,work_dir):
    catchment_shapefile =  glob.glob(f'{work_dir}/output/selected_shapes/{catch_id}.shp')[0]
    # print(catchment_shapefile)
    catchment_netcdf= glob.glob(f'{work_dir}/data/reference_sr/yang/data/Effective_Rooting_Depth.nc')[0]

    # Load iris cube of netcdf
    cube = iris.load_cube(catchment_netcdf)
    cube.dim_coords[0].guess_bounds()
    cube.dim_coords[1].guess_bounds()

    # From cube extract shapefile shape
    cube = preprocessor.extract_shape(cube, catchment_shapefile, method="contains") #use all grid cells that lie >50% inside the catchment shape

    # Calculate area weighted statistics of extracted grid cells (inside catchment shape)
    cube_stats = preprocessor.area_statistics(cube, 'mean')

    # from cube to xarray dataarray
    a=xr.DataArray.from_iris(cube_stats)
    df = pd.DataFrame(index=[catch_id], columns=['yang_m'])
    df['yang_m'] = a.values
    df.to_csv(f'{work_dir}/output/sr_calculation/reference_sr/yang/{catch_id}.csv')
    
def run_yang_sr_parallel(
    catch_id_list=list,
    work_dir_list=list,
    # threads=None
    threads=100
):
    """
    Runs function area_weighted_shapefile_rasterstats in parallel.

    catch_list:  str, list, list of catchment ids
    work_dir_list: str, list, list of work directories
    threads:         int,       number of threads (cores), when set to None use all available threads

    Returns: None
    """
    # Set number of threads (cores) used for parallel run and map threads
    if threads is None:
        pool = Pool()
    else:
        pool = Pool(nodes=threads)
    # Run parallel models
    results = pool.map(
        yang_to_shape_values,
        catch_id_list,
        work_dir_list,
    )
    

In [157]:
data_sources = 'gswp-p_gleam-ep_gswp-t'
df = pd.read_csv(f'{work_dir}/output/catchment_characteristics/master_characteristics_{data_sources}_v2.csv',index_col=0)
df = df[df.p_mean-df.q_mean < df.ep_mean]
df = df[['p_mean','ep_mean', 't_mean','tdiff_mean','idu_mean', 'si_ep','si_p', 'asi','dtb','ia','slp_mean','el_mean','tc','ntc','nonveg','pclay','psand','lai_mean','lai_rsd','sc_mean','sc_rsd','sr_20','lat','lon']]
df = df.dropna()
catch_id_list = df.index

el_id_list=[]
for filepath in glob.iglob(f'{work_dir}/output/sr_calculation/reference_sr/yang/*.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-4] # remove .year extension
    el_id_list.append(f)
dif = list(set(catch_id_list) - set(el_id_list))
len(dif)

455

In [158]:
catch_list = dif[423:425]
work_dir_list = [work_dir]*len(catch_list)
run_yang_sr_parallel(catch_list,work_dir_list)

ValueError: buffer source array is read-only

## Fan

In [52]:
def fan_to_shape_values(catch_id,work_dir):
    catchment_shapefile =  glob.glob(f'{work_dir}/output/selected_shapes/{catch_id}.shp')[0]
    catchment_netcdf= glob.glob(f'{work_dir}/data/reference_sr/fan/maxroot_allregions_025.nc') [0]

    # Load iris cube of netcdf
    cube = iris.load_cube(catchment_netcdf)
    cube.dim_coords[0].guess_bounds()
    cube.dim_coords[1].guess_bounds()

    # From cube extract shapefile shape
    cube = preprocessor.extract_shape(cube, catchment_shapefile, method="contains") #use all grid cells that lie >50% inside the catchment shape

    # Calculate area weighted statistics of extracted grid cells (inside catchment shape)
    cube_stats = preprocessor.area_statistics(cube, 'mean')

    # from cube to xarray dataarray
    a=xr.DataArray.from_iris(cube_stats)
    df = pd.DataFrame(index=[catch_id], columns=['fan_m'])
    df['fan_m'] = a.values
    df.to_csv(f'{work_dir}/output/sr_calculation/reference_sr/fan/{catch_id}.csv')
    
def run_fan_sr_parallel(
    catch_id_list=list,
    work_dir_list=list,
    # threads=None
    threads=100
):
    """
    Runs function area_weighted_shapefile_rasterstats in parallel.

    catch_list:  str, list, list of catchment ids
    work_dir_list: str, list, list of work directories
    threads:         int,       number of threads (cores), when set to None use all available threads

    Returns: None
    """
    # Set number of threads (cores) used for parallel run and map threads
    if threads is None:
        pool = Pool()
    else:
        pool = Pool(nodes=threads)
    # Run parallel models
    results = pool.map(
        fan_to_shape_values,
        catch_id_list,
        work_dir_list,
    )

In [71]:
data_sources = 'gswp-p_gleam-ep_gswp-t'
df = pd.read_csv(f'{work_dir}/output/catchment_characteristics/master_characteristics_{data_sources}_v2.csv',index_col=0)
df = df[df.p_mean-df.q_mean < df.ep_mean]
df = df[['p_mean','ep_mean', 't_mean','tdiff_mean','idu_mean', 'si_ep','si_p', 'asi','dtb','ia','slp_mean','el_mean','tc','ntc','nonveg','pclay','psand','lai_mean','lai_rsd','sc_mean','sc_rsd','sr_20','lat','lon']]
df = df.dropna()
catch_id_list = df.index

el_id_list=[]
for filepath in glob.iglob(f'{work_dir}/output/sr_calculation/reference_sr/fan/*.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-4] # remove .year extension
    el_id_list.append(f)
dif = list(set(catch_id_list) - set(el_id_list))
len(dif)

0

In [70]:
catch_list = dif[:]
work_dir_list = [work_dir]*len(catch_list)
run_fan_sr_parallel(catch_list,work_dir_list)

## Schenk2009

In [121]:
data_sources = 'gswp-p_gleam-ep_gswp-t'
df = pd.read_csv(f'{work_dir}/output/catchment_characteristics/master_characteristics_{data_sources}_v2.csv',index_col=0)
df = df[df.p_mean-df.q_mean < df.ep_mean]
df = df[['p_mean','ep_mean', 't_mean','tdiff_mean','idu_mean', 'si_ep','si_p', 'asi','dtb','ia','slp_mean','el_mean','tc','ntc','nonveg','pclay','psand','lai_mean','lai_rsd','sc_mean','sc_rsd','sr_20','lat','lon']]
df = df.dropna()

In [122]:
sc = xr.open_dataset(f'{work_dir}/data/reference_sr/schenk2009/data/processed/95ecosys_rootdepth_1d_crugrid.nc') 
sc = sc['95ecosys_rootdepth_1d'][0]
sc

In [125]:
data_sources = 'gswp-p_gleam-ep_gswp-t'
df = pd.read_csv(f'{work_dir}/output/catchment_characteristics/master_characteristics_{data_sources}_v2.csv',index_col=0)
df = df[df.p_mean-df.q_mean < df.ep_mean]
df = df[['p_mean','ep_mean', 't_mean','tdiff_mean','idu_mean', 'si_ep','si_p', 'asi','dtb','ia','slp_mean','el_mean','tc','ntc','nonveg','pclay','psand','lai_mean','lai_rsd','sc_mean','sc_rsd','sr_20','lat','lon']]
df = df.dropna()
catch_id_list = df.index

el_id_list=[]
for filepath in glob.iglob(f'{work_dir}/output/sr_calculation/reference_sr/schenk2009/*.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-4] # remove .year extension
    el_id_list.append(f)
dif = list(set(catch_id_list) - set(el_id_list))
len(dif)

0

In [124]:
for i in range(len(dif)):
    catch_id = dif[i]
    lat = df.lat[i]
    lon = df.lon[i]
    ixlon = find_nearest(sc.lon,lon)
    ixlat = find_nearest(sc[:,ixlon].lat,lat)
    a = sc[ixlat,ixlon].values
    lat = sc.lat[ixlat].values
    lon = sc.lon[ixlon].values
    ddf = pd.DataFrame(index=[catch_id], columns=['schenk_m'])
    ddf['schenk_m'] = a
    ddf.to_csv(f'{work_dir}/output/sr_calculation/reference_sr/schenk2009/{catch_id}.csv')

## KLEIDON

In [5]:
ka = xr.open_dataset(f'{work_dir}/data/reference_sr/Kleidon rooting depth/rootAssMap150_crugrid.nc')
ka = ka['rootAssMap150_m']
ko = xr.open_dataset(f'{work_dir}/data/reference_sr/Kleidon rooting depth/rootOptMap150_crugrid.nc')
ko = ko['rootOptMap150_m']

In [9]:
data_sources = 'gswp-p_gleam-ep_gswp-t'
df = pd.read_csv(f'{work_dir}/output/catchment_characteristics/master_characteristics_{data_sources}_v2.csv',index_col=0)
df = df[df.p_mean-df.q_mean < df.ep_mean]
df = df[['p_mean','ep_mean', 't_mean','tdiff_mean','idu_mean', 'si_ep','si_p', 'asi','dtb','ia','slp_mean','el_mean','tc','ntc','nonveg','pclay','psand','lai_mean','lai_rsd','sc_mean','sc_rsd','sr_20','lat','lon']]
df = df.dropna()
catch_id_list = df.index

el_id_list=[]
for filepath in glob.iglob(f'{work_dir}/output/sr_calculation/reference_sr/kleidon/*.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-4] # remove .year extension
    el_id_list.append(f)
dif = list(set(catch_id_list) - set(el_id_list))
len(dif)

0

In [8]:
for i in range(len(dif)):
    catch_id = dif[i]
    lat = df.lat[i]
    lon = df.lon[i]
    ixlon = find_nearest(ko.lon,lon)
    ixlat = find_nearest(ko[:,ixlon].lat,lat)
    a = ko[ixlat,ixlon].values
    b = ka[ixlat,ixlon].values
    lat = ko.lat[ixlat].values
    lon = ko.lon[ixlon].values
    ddf = pd.DataFrame(index=[catch_id], columns=['kleidon_opt_m','kleidon_ass_m'])
    ddf['kleidon_opt_m'] = a
    ddf['kleidon_ass_m'] = b
    ddf.to_csv(f'{work_dir}/output/sr_calculation/reference_sr/kleidon/{catch_id}.csv')

## COMBINE

In [12]:
data_sources = 'gswp-p_gleam-ep_gswp-t'
df = pd.read_csv(f'{work_dir}/output/catchment_characteristics/master_characteristics_{data_sources}_v2.csv',index_col=0)
df = df[df.p_mean-df.q_mean < df.ep_mean]
df = df[['p_mean','ep_mean', 't_mean','tdiff_mean','idu_mean', 'si_ep','si_p', 'asi','dtb','ia','slp_mean','el_mean','tc','ntc','nonveg','pclay','psand','lai_mean','lai_rsd','sc_mean','sc_rsd','sr_20','lat','lon']]
df = df.dropna()
catch_id_list = df.index[:]

ll=[]
for i in range(len(catch_id_list)):
    catch_id = catch_id_list[i]
    sc =f'{work_dir}/output/sr_calculation/reference_sr/schenk2009/{catch_id}.csv'
    s =f'{work_dir}/output/sr_calculation/reference_sr/stocker/{catch_id}.csv'
    f =f'{work_dir}/output/sr_calculation/reference_sr/fan/{catch_id}.csv'
    l = f'{work_dir}/output/sr_calculation/reference_sr/lan/{catch_id}.csv'
    y =f'{work_dir}/output/sr_calculation/reference_sr/yang/{catch_id}.csv'
    k =f'{work_dir}/output/sr_calculation/reference_sr/kleidon/{catch_id}.csv'
    if((os.path.exists(sc)==True)&(os.path.exists(l)==True)&(os.path.exists(f)==True)&(os.path.exists(y)==True)&(os.path.exists(s)==True)&(os.path.exists(k)==True)):
        scd = pd.read_csv(sc,index_col=0)
        fd = pd.read_csv(f,index_col=0)
        ld = pd.read_csv(l,index_col=0)
        ld = ld[['lan_cru_20yrs','lan_cru_max']]
        yd = pd.read_csv(y,index_col=0)
        sd = pd.read_csv(s,index_col=0)
        kd = pd.read_csv(k,index_col=0)
        kd = kd[['kleidon_opt_m','kleidon_ass_m']]
        d = pd.concat([scd,fd,ld,yd,sd,kd],axis=1)
        ll.append(d)

In [13]:
b = pd.concat(ll)
b

Unnamed: 0,schenk_m,fan_m,lan_cru_20yrs,lan_cru_max,yang_m,stocker_cwd80x_mm,stocker_zroot_cwd80x_mm,kleidon_opt_m,kleidon_ass_m
ca_0002351,0.7,4.577707e-01,147.24728,117.729965,0.200000,82.57038,1278.1450,0.043325,0.037156
br_0001191,,1.591897e+01,170.89000,151.390000,2.190000,844.23170,22685.3520,0.056484,0.056080
br_0000469,3.1,3.920803e+00,427.26190,352.917330,1.920000,497.65894,12994.8480,0.608276,0.609227
bw_0000006,1.4,2.384222e+00,137.19330,120.664550,0.340000,508.08330,14102.9730,0.285656,0.279685
ca_0001997,0.8,1.730958e+00,159.87488,127.689860,0.380289,86.47423,1141.4482,0.099059,0.099025
...,...,...,...,...,...,...,...,...,...
2342933,1.2,2.365148e+00,314.10000,250.940000,0.510000,244.95515,,0.207024,0.208007
8155200,1.0,5.684342e-14,547.51000,502.120000,0.720000,273.59580,,0.143921,0.079751
2472000,1.8,7.405374e+00,375.06967,319.821440,0.560000,319.91632,,0.090336,0.011000
6784000,0.7,5.237949e+00,165.46872,137.190050,0.422193,123.50533,,0.157498,0.117505


In [14]:
b.to_csv(f'{work_dir}/output/sr_calculation/reference_sr/combined.csv')

In [167]:
b.corr()

Unnamed: 0,schenk_m,fan_m,lan_cru_20yrs,lan_cru_max,yang_m
schenk_m,1.0,0.299307,0.387099,0.385578,0.043266
fan_m,0.299307,1.0,0.364344,0.361156,0.010062
lan_cru_20yrs,0.387099,0.364344,1.0,0.993585,0.068508
lan_cru_max,0.385578,0.361156,0.993585,1.0,0.075622
yang_m,0.043266,0.010062,0.068508,0.075622,1.0


In [168]:
ll2=[]
for i in range(len(catch_id_list)):
    catch_id = catch_id_list[i]
    s =f'{work_dir}/output/sr_calculation/reference_sr/stocker/{catch_id}.csv'
    if((os.path.exists(s)==True)):
        d = pd.read_csv(s,index_col=0)
        ll2.append(d)

In [169]:
b2 = pd.concat(ll2)
b2

Unnamed: 0,stocker_cwd80x_mm,stocker_zroot_cwd80x_mm
ca_0002351,82.57038,1278.1450
cz_0000019,70.48158,1045.5525
br_0001191,844.23170,22685.3520
br_0000469,497.65894,12994.8480
bw_0000006,508.08330,14102.9730
...,...,...
2342933,244.95515,
8155200,273.59580,
2472000,319.91632,
6784000,123.50533,


In [171]:
b3 = pd.concat([b,b2],axis=1)
b3

Unnamed: 0,schenk_m,fan_m,lan_cru_20yrs,lan_cru_max,yang_m,stocker_cwd80x_mm,stocker_zroot_cwd80x_mm
ca_0002351,0.7,0.457771,147.24728,117.729965,0.200000,82.57038,1278.1450
br_0001191,,15.918975,170.89000,151.390000,2.190000,844.23170,22685.3520
br_0000469,3.1,3.920803,427.26190,352.917330,1.920000,497.65894,12994.8480
bw_0000006,1.4,2.384222,137.19330,120.664550,0.340000,508.08330,14102.9730
ca_0001997,0.8,1.730958,159.87488,127.689860,0.380289,86.47423,1141.4482
...,...,...,...,...,...,...,...
se_0000034,,,,,,94.89175,1619.0137
ITSAR236,,,,,,454.97665,
fr_0000610,,,,,,197.26073,3473.9656
in_0000255,,,,,,437.28250,10109.3240


In [172]:
b3.to_csv(f'{work_dir}/output/sr_calculation/reference_sr/combined.csv')

## SCHENK JACKSON points

In [46]:
#Schenk and Jackson observed rooting depths
sj = pd.read_csv(f'{work_dir}/data/reference_sr/Schenk+Jackson2003-rootzone-point-data.csv',index_col=0,delimiter=';')
lats = sj.Latitude.values
lons = sj.Longitude.values

# get unique values
sj = sj[['Latitude','Longitude','D50','D95',' D50_extrapolated','D95_extrapolated']]
ll=[]
for i in range(len(lats)):
    a = lats[i]
    b = lons[i]
    c = f'{a}_{b}'
    ll.append(c)
sj.index = ll
sjj = sj.groupby(level=0).agg('mean')

In [47]:
sjj

Unnamed: 0,Latitude,Longitude,D50,D95,D50_extrapolated,D95_extrapolated
-0.62_-72.45,-0.62,-72.45,0.090000,0.343333,0.116667,0.533333
-1.08_-47.33,-1.08,-47.33,0.360000,3.760000,0.360000,3.760000
-1.18_-47.57,-1.18,-47.57,1.220000,5.290000,1.220000,5.290000
-1.18_36.63,-1.18,36.63,0.140000,3.330000,0.140000,3.330000
-1.43_-48.48,-1.43,-48.48,0.090000,0.305000,0.125000,0.570000
...,...,...,...,...,...,...
9.15_-79.85,9.15,-79.85,0.110000,0.570000,0.130000,0.970000
9.2_-79.38,9.20,-79.38,0.053333,0.360000,0.070000,0.610000
9.38_-1.98,9.38,-1.98,0.160000,0.490000,0.170000,0.620000
9.88_-83.67,9.88,-83.67,0.126667,0.600000,0.140000,0.906667


In [48]:
pp = xr.open_dataset(f'{work_dir}/output/gridded_sr/data_xr_mean.nc')
pp = pp.sr_p

ll=[]
for i in range(len(sjj)):
    lat = sjj.Latitude[i]
    lon = sjj.Longitude[i]
    ixlon = find_nearest(pp.lon,lon)
    ixlat = find_nearest(pp[:,ixlon].lat,lat)
    a = pp[ixlat,ixlon].values
    lat = pp.lat[ixlat].values
    lon = pp.lon[ixlon].values
    ll.append(a)
sjj['sr_p'] = ll

In [49]:
pp = xr.open_dataset(f'{work_dir}/data/reference_sr/fan/maxroot_allregions_crugrid2.nc') 
pp = pp.root_depth
ll=[]
for i in range(len(sjj)):
    lat = sjj.Latitude[i]
    lon = sjj.Longitude[i]
    ixlon = find_nearest(pp.lon,lon)
    ixlat = find_nearest(pp[:,ixlon].lat,lat)
    a = pp[ixlat,ixlon].values
    lat = pp.lat[ixlat].values
    lon = pp.lon[ixlon].values
    ll.append(a)
sjj['fan'] = ll

In [50]:
pp = xr.open_dataset(f'{work_dir}/data/reference_sr/yang/data/Effective_Rooting_Depth.nc')
pp = pp.Band1
ll=[]
for i in range(len(sjj)):
    lat = sjj.Latitude[i]
    lon = sjj.Longitude[i]
    ixlon = find_nearest(pp.lon,lon)
    ixlat = find_nearest(pp[:,ixlon].lat,lat)
    a = pp[ixlat,ixlon].values
    lat = pp.lat[ixlat].values
    lon = pp.lon[ixlon].values
    ll.append(a)
sjj['yang'] = ll

In [51]:
pp = xr.open_dataset(f'{work_dir}/data/reference_sr/lan/sr_cru_20yrs.nc')
pp = pp.sr_cru_20yrs[0]
ll=[]
for i in range(len(sjj)):
    lat = sjj.Latitude[i]
    lon = sjj.Longitude[i]
    ixlon = find_nearest(pp.lon,lon)
    ixlat = find_nearest(pp[:,ixlon].lat,lat)
    a = pp[ixlat,ixlon].values
    lat = pp.lat[ixlat].values
    lon = pp.lon[ixlon].values
    ll.append(a)
sjj['lan'] = ll

In [52]:
pp = xr.open_dataset(f'{work_dir}/data/reference_sr/stocker/cwdx80_cru_grid.nc')
pp = pp.cwdx80
ll=[]
for i in range(len(sjj)):
    lat = sjj.Latitude[i]
    lon = sjj.Longitude[i]
    ixlon = find_nearest(pp.lon,lon)
    ixlat = find_nearest(pp[:,ixlon].lat,lat)
    a = pp[ixlat,ixlon].values
    lat = pp.lat[ixlat].values
    lon = pp.lon[ixlon].values
    ll.append(a)
sjj['stocker'] = ll

In [55]:
pp = xr.open_dataset(f'{work_dir}/data/reference_sr/schenk2009/data/processed/95ecosys_rootdepth_1d_crugrid.nc') 
pp = pp['95ecosys_rootdepth_1d'][0]
ll=[]
for i in range(len(sjj)):
    lat = sjj.Latitude[i]
    lon = sjj.Longitude[i]
    ixlon = find_nearest(pp.lon,lon)
    ixlat = find_nearest(pp[:,ixlon].lat,lat)
    a = pp[ixlat,ixlon].values
    lat = pp.lat[ixlat].values
    lon = pp.lon[ixlon].values
    ll.append(a)
sjj['schenk2009'] = ll

In [56]:
pp = xr.open_dataset(f'{work_dir}/data/reference_sr/Kleidon rooting depth/rootAssMap150_crugrid.nc')
pp = pp['rootAssMap150_m']
ll=[]
for i in range(len(sjj)):
    lat = sjj.Latitude[i]
    lon = sjj.Longitude[i]
    ixlon = find_nearest(pp.lon,lon)
    ixlat = find_nearest(pp[:,ixlon].lat,lat)
    a = pp[ixlat,ixlon].values
    lat = pp.lat[ixlat].values
    lon = pp.lon[ixlon].values
    ll.append(a)
sjj['kleidon_ass'] = ll

In [58]:
pp = xr.open_dataset(f'{work_dir}/data/reference_sr/Kleidon rooting depth/rootOptMap150_crugrid.nc')
pp = pp['rootOptMap150_m']
ll=[]
for i in range(len(sjj)):
    lat = sjj.Latitude[i]
    lon = sjj.Longitude[i]
    ixlon = find_nearest(pp.lon,lon)
    ixlat = find_nearest(pp[:,ixlon].lat,lat)
    a = pp[ixlat,ixlon].values
    lat = pp.lat[ixlat].values
    lon = pp.lon[ixlon].values
    ll.append(a)
sjj['kleidon_opt'] = ll

In [59]:
sjj

Unnamed: 0,Latitude,Longitude,D50,D95,D50_extrapolated,D95_extrapolated,sr_p,fan,yang,lan,stocker,schenk2009,kleidon_ass,kleidon_opt
-0.62_-72.45,-0.62,-72.45,0.090000,0.343333,0.116667,0.533333,52.49619742370786,1.294471818152742,0.7099999785423279,55.71900177001953,72.3171615600586,0.800000011920929,0.04357379016687468,0.04534874088508932
-1.08_-47.33,-1.08,-47.33,0.360000,3.760000,0.360000,3.760000,190.4808991082117,2.7419042285213844,10.75,352.95001220703125,368.4580078125,1.7000000476837158,0.3346817718994602,0.3359395693820126
-1.18_-47.57,-1.18,-47.57,1.220000,5.290000,1.220000,5.290000,190.4808991082117,2.7419042285213844,10.75,352.95001220703125,368.4580078125,1.7000000476837158,0.3346817718994602,0.3359395693820126
-1.18_36.63,-1.18,36.63,0.140000,3.330000,0.140000,3.330000,249.99765047458445,3.8069921575206687,0.9300000071525574,221.4199981689453,88.83419799804688,1.600000023841858,0.24591681121042702,0.306336693130581
-1.43_-48.48,-1.43,-48.48,0.090000,0.305000,0.125000,0.570000,123.26284927865352,1.1437774208355858,10.050000190734863,267.8800048828125,,1.2999999523162842,0.3346817718994602,0.3359395693820126
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9.15_-79.85,9.15,-79.85,0.110000,0.570000,0.130000,0.970000,102.27914270872266,5.266478854981188,2.6700000762939453,422.3699951171875,452.0565185546875,,0.2213348234432965,0.25846390636582084
9.2_-79.38,9.20,-79.38,0.053333,0.360000,0.070000,0.610000,209.78403294840095,4.840256980704527,1.8600000143051147,,,,0.2213348234432965,0.25846390636582084
9.38_-1.98,9.38,-1.98,0.160000,0.490000,0.170000,0.620000,390.1853738374098,3.525511020233262,0.41999998688697815,167.39999389648438,355.42120361328125,2.799999952316284,0.1370233060602281,0.42419556743580755
9.88_-83.67,9.88,-83.67,0.126667,0.600000,0.140000,0.906667,,1.495549910672282,,190.91000366210938,141.08363342285156,1.5,0.28810416760106156,0.2995461664328056


In [60]:
sjj.to_csv(f'{work_dir}/data/reference_sr/Schenk+Jackson2003-rootzone-point-data_processed.csv')