In [1]:
# import packages
import glob
from pathlib import Path
import scipy.stats
import os
import numpy as np
from datetime import datetime
from datetime import timedelta
import pandas as pd
import calendar
import matplotlib.pyplot as plt
import matplotlib as mpl
import math
import cartopy.crs as ccrs
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from pathos.threading import ThreadPool as Pool
from scipy.stats import gaussian_kde
import xarray as xr
from matplotlib import cm
import matplotlib
from collections import Counter

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
from f_sr_calculation import *
from f_snow_module import *

In [4]:
# work_dir=Path("/scratch/fransjevanoors/global_sr")
work_dir=Path("/mnt/u/LSM root zone/global_sr")

In [5]:
def process_forcing_timeseries(catch_id,fol_in,fol_in2,fol_out,var):
    """
    postprocess the catchment timeseries to usable pandas dataframes
    catch_id:   str, catchment id
    fol_in:     str, dir, folder with raw output csvs from grid-catchment extraction
    fol_out:    str, dir, folder where to storre the processed dataframes
    var:        str, list, list of variables calculated
    
    returns:
    stores csvs of daily, monthly, yearly, climatology and mean timeseries    
    
    """
    # make empty dataframe
    d = pd.DataFrame()

    # for j in variable list - list the timeseries csvs for the catch id
    l = glob.glob(fol_in + f"*/{catch_id}*.csv")
    l2 = glob.glob(fol_in2 + f"*/{catch_id}*.csv")

    l3 = []
    for i in range(len(l)):
        if ('gswp_p' in l[i]):
            k=i
        else:
            l3.append(l[i])
    l3.append(l2[0])

    # combine variable timeseries in one dataframe
    li=[] #make empty list
    for filename in l3:
        df = pd.read_csv(filename, index_col=0, header=0)
        # df = df.drop(columns=['Unnamed: 0'])
        df.index = pd.to_datetime(df.index)
        df = df.loc['1981-01-01':'2010-12-31']
        li.append(df) #append dataframe to list

    d = pd.DataFrame()
    frame = pd.concat(li, axis=1, ignore_index=False) #concatenate dataframes in li
    col=frame.columns #get column names 
    y_start,y_end = frame.index[0].year, frame.index[-1].year #add columns with start and end years
    d[col] = frame #add frame data to dataframe d
    d = d.rename(columns={'Potential evaporation from GLEAM v3.5a':f'{var[0]}'}) #rename column names to variable list names
    d = d.rename(columns={'precipitation':f'{var[1]}'})
    d = d.rename(columns={'air_temperature':f'{var[2]}'})

    # get daily timeseries and store as csv
    if not os.path.exists(f'{fol_out}/daily'):
         os.makedirs(f'{fol_out}/daily')
    d.to_csv(f'{fol_out}/daily/{catch_id}_{y_start}_{y_end}.csv')

    # get monthly timeseries and store as csv
    if not os.path.exists(f'{fol_out}/monthly'):
         os.makedirs(f'{fol_out}/monthly')
    df_m = d.groupby(pd.Grouper(freq='M')).mean()
    y_start,y_end = df_m.index[0].year, df_m.index[-1].year
    df_m.to_csv(f'{fol_out}/monthly/{catch_id}_{y_start}_{y_end}.csv')    

    # get climatology and store as csv
    if not os.path.exists(f'{fol_out}/climatology'):
         os.makedirs(f'{fol_out}/climatology')
    df_m = df_m.groupby([df_m.index.month]).mean()
    df_m.to_csv(f'{fol_out}/climatology/{catch_id}_{y_start}_{y_end}.csv')

    # get yearly timeseries and store as csv
    if not os.path.exists(f'{fol_out}/yearly'):
         os.makedirs(f'{fol_out}/yearly')
    df_y = d.groupby(pd.Grouper(freq='Y')).mean()
    y_start,y_end = df_y.index[0].year, df_y.index[-1].year
    df_y.to_csv(f'{fol_out}/yearly/{catch_id}_{y_start}_{y_end}.csv')

    # get mean of timeseries and store as csv
    if not os.path.exists(f'{fol_out}/mean'):
         os.makedirs(f'{fol_out}/mean')
    dm = d.mean()
    dm.to_csv(f'{fol_out}/mean/{catch_id}_{y_start}_{y_end}.csv')

In [6]:
def run_processing_function_parallel(
    catch_list=list,
    fol_in_list=list,
    fol_in2_list=list,
    fol_out_list=list,
    var_list=list,
    threads=None
    # threads=100
):
    """
    Runs function preprocess_gsim_discharge  in parallel.
​
    catch_list:  str, list, list of catchmet ids
    fol_in_list:     str, list, list of input folders
    fol_out_list:   str, list, list of output folders
    var_list: str,list, list of var list 
    threads:         int,       number of threads (cores), when set to None use all available threads
​
    Returns: None
    """
    # Set number of threads (cores) used for parallel run and map threads
    if threads is None:
        pool = Pool()
    else:
        pool = Pool(nodes=threads)
    # Run parallel models
    results = pool.map(
        process_forcing_timeseries,
        catch_list,
        fol_in_list,
        fol_in2_list,
        fol_out_list,
        var_list,
    )
    

In [49]:
def process_forcing_timeseries2(catch_id,fol_in,fol_in2,fol_out):
    # make empty dataframe
    d = pd.DataFrame()

    # for j in variable list - list the timeseries csvs for the catch id
    l = f'{fol_in}/{catch_id}_1981_2010.csv'
    l2 = f'{fol_in2}/{catch_id}.csv'
    
    if ((os.path.exists(l)==True) and (os.path.exists(l2)==True)):
        l1 = pd.read_csv(l,index_col=0)
        l1.index = pd.to_datetime(l1.index)
        l1 = l1.drop(columns='ep')
        l2 = pd.read_csv(l2,index_col=0)
        l2.index = pd.to_datetime(l2.index)
        l2 = l2.drop(columns=['tas','tasmax','tasmin'])
        l3 = pd.concat([l1,l2],axis=1)
        l3 = l3.rename(columns={'ep_hs':'ep'})
        d = l3

        y_start,y_end = d.index[0].year, d.index[-1].year

        # daily
        d.to_csv(f'{fol_out}/daily/{catch_id}_{y_start}_{y_end}.csv')

        # get monthly timeseries and store as csv
        df_m = d.groupby(pd.Grouper(freq='M')).mean()
        y_start,y_end = df_m.index[0].year, df_m.index[-1].year
        df_m.to_csv(f'{fol_out}/monthly/{catch_id}_{y_start}_{y_end}.csv')    

        # get climatology and store as csv
        df_m = df_m.groupby([df_m.index.month]).mean()
        df_m.to_csv(f'{fol_out}/climatology/{catch_id}_{y_start}_{y_end}.csv')

        # get yearly timeseries and store as csv
        df_y = d.groupby(pd.Grouper(freq='Y')).mean()
        y_start,y_end = df_y.index[0].year, df_y.index[-1].year
        df_y.to_csv(f'{fol_out}/yearly/{catch_id}_{y_start}_{y_end}.csv')

        # get mean of timeseries and store as csv
        dm = d.mean()
        dm.to_csv(f'{fol_out}/mean/{catch_id}_{y_start}_{y_end}.csv')
    
def run_processing_function_parallel2(
    catch_list=list,
    fol_in_list=list,
    fol_in2_list=list,
    fol_out_list=list,
    threads=None
    # threads=100
):
    # Set number of threads (cores) used for parallel run and map threads
    if threads is None:
        pool = Pool()
    else:
        pool = Pool(nodes=threads)
    # Run parallel models
    results = pool.map(
        process_forcing_timeseries2,
        catch_list,
        fol_in_list,
        fol_in2_list,
        fol_out_list,
    )

In [46]:
# add italy catchments to catch id list
catch_id_list = np.genfromtxt(f'{work_dir}/output/gsim_aus_catch_id_list_lo_sel.txt',dtype='str')[:]

it_list=[]
for filepath in glob.iglob(f'{work_dir}/data/po_basin/organized_data/selected_shapes/*.shp'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-4] # remove .year extension
    it_list.append(f)
it = np.array(it_list)
catch_id_list2 = np.concatenate([catch_id_list,it])
np.savetxt(f'{work_dir}/output/gsim_aus_catch_id_list_lo_sel_it.txt',catch_id_list2,fmt='%s')

In [44]:
it

array(['ITEMI002', 'ITEMI073', 'ITEMI101', 'ITLOM122', 'ITLOM124',
       'ITLOM128', 'ITLOM130', 'ITLOM132', 'ITLOM137', 'ITPIE168',
       'ITPIE186', 'ITPIE189', 'ITPIE194', 'ITSAR226', 'ITSAR230',
       'ITSAR233', 'ITSAR236', 'ITSAR237', 'ITTOS329', 'ITTOS331',
       'ITTOS338', 'ITTOS339', 'ITTOS340', 'ITTOS343', 'ITTOS345',
       'ITTOS349', 'ITTOS353', 'ITTOS372', 'ITTOS379', 'ITTOS384',
       'ITTRE240', 'ITTRE243', 'ITTRE248', 'ITTRE262', 'ITTRE264',
       'ITTRE267', 'ITUMB274', 'ITUMB275', 'ITUMB276', 'ITUMB277',
       'ITUMB283', 'ITUMB285', 'ITVAL288', 'ITVEN427', 'ITVEN429',
       'ITVEN430', 'ITVEN433', 'ITVEN434', 'ITVEN435'], dtype='<U8')

In [80]:
# p
for catch_id in it:
    # for j in variable list - list the timeseries csvs for the catch id
    fol_in=f'{work_dir}/output/forcing_timeseries/raw'
    l = glob.glob(fol_in + f"*/{catch_id}*P*.csv")

    # combine variable timeseries in one dataframe
    li=[] #make empty list
    for filename in l:
        df = pd.read_csv(filename, index_col=0, header=0)
        # df = df.drop(columns=['Unnamed: 0'])
        df.index = pd.to_datetime(df.time)
        # df = df.loc['1981-01-01':'2010-12-31']
        li.append(df) #append dataframe to list
    d = pd.DataFrame()
    frame = pd.concat(li, axis=0, ignore_index=False) #concatenate dataframes in li
    frame = frame.drop(columns='time')
    frame.to_csv(f'{fol_in}/{catch_id}_gswp_p_1981_2010.csv')

In [81]:
# tas
for catch_id in it:
    # for j in variable list - list the timeseries csvs for the catch id
    fol_in=f'{work_dir}/output/forcing_timeseries/raw'
    l = glob.glob(fol_in + f"*/{catch_id}*T*.csv")

    # combine variable timeseries in one dataframe
    li=[] #make empty list
    for filename in l:
        df = pd.read_csv(filename, index_col=0, header=0)
        # df = df.drop(columns=['Unnamed: 0'])
        df.index = pd.to_datetime(df.time)
        # df = df.loc['1981-01-01':'2010-12-31']
        li.append(df) #append dataframe to list
    d = pd.DataFrame()
    frame = pd.concat(li, axis=0, ignore_index=False) #concatenate dataframes in li
    frame = frame.drop(columns='time')
    frame.to_csv(f'{fol_in}/{catch_id}_gswp_tas_1981_2010.csv')

In [83]:
# ep
for catch_id in it[:]:
    # for j in variable list - list the timeseries csvs for the catch id
    fol_in=f'{work_dir}/output/forcing_timeseries/raw'
    l = glob.glob(fol_in + f"*/{catch_id}*Ep*.csv")

    # combine variable timeseries in one dataframe
    li=[] #make empty list
    for filename in l:
        df = pd.read_csv(filename, index_col=0, header=0)
        # df = df.drop(columns=['Unnamed: 0'])
        df.index = pd.to_datetime(df.time)
        # df = df.loc['1981-01-01':'2010-12-31']
        li.append(df) #append dataframe to list
    d = pd.DataFrame()
    frame = pd.concat(li, axis=0, ignore_index=False) #concatenate dataframes in li
    frame = frame.drop(columns='time')
    frame.to_csv(f'{fol_in}/{catch_id}_gleam_ep_1980_2011.csv')

## processed timeseries with mswep p and gleam ep and gswp t

In [13]:
# define input directory
fol_in=f'{work_dir}/output/forcing_timeseries/raw'
fol_in2=f'{work_dir}/output/forcing_timeseries/mswep_p/processed_timeseries'

# define output directory
fol_out=f'{work_dir}/output/forcing_timeseries/processed_mswep_gleam_gswp'

# get catch_id_list
catch_id_list = np.genfromtxt(f'{work_dir}/output/gsim_aus_catch_id_list_lo_sel_it.txt',dtype='str')[:]

# check which catchments are missing
el_id_list=[]
for filepath in glob.iglob(f'{work_dir}/output/forcing_timeseries/processed_mswep_gleam_gswp/daily/*.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-14] # remove .year extension
    el_id_list.append(f)
dif = list(set(catch_id_list) - set(el_id_list))
print(len(dif))
catch_list = dif

# define variables
var = ['ep','p','tas']

# run process_forcing_timeseries (defined in f_grid_to_catchments.py) for all catchments in catch_id_list
# for catch_id in dif:
#     process_forcing_timeseries(catch_id,fol_in,fol_in2,fol_out,var)

0


In [12]:
catch_id_list = dif[:]
fol_in_list = [fol_in] * len(catch_id_list)
fol_in2_list = [fol_in2] * len(catch_id_list)
fol_out_list = [fol_out] * len(catch_id_list)
var_list = [var] * len(catch_id_list)

run_processing_function_parallel(catch_id_list,fol_in_list,fol_in2_list,fol_out_list,var_list)

In [14]:
# print P Ep T timeseries for catchment [0] in catch_id_list
catch_id = catch_id_list[0]
f = glob.glob(f'{fol_out}/daily/{catch_id}*.csv')
print(f)
c = pd.read_csv(f[0], index_col=0)
c.head()

['/mnt/u/LSM root zone/global_sr/output/forcing_timeseries/processed_mswep_gleam_gswp/daily/102101A_1981_2010.csv']


Unnamed: 0_level_0,ep,tas,p
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1981-01-01,4.744433,27.895155,2.787765
1981-01-02,4.475309,27.476841,1.442021
1981-01-03,6.261349,27.55455,47.797848
1981-01-04,4.613283,25.598467,16.89649
1981-01-05,3.863077,25.903513,6.125936


## processed timeseries with mswep p and hargreaves ep and gswp t

In [83]:
# define input directory
fol_in=f'{work_dir}/output/forcing_timeseries/processed_mswep_gleam_gswp/daily'
fol_in2=f'{work_dir}/output/forcing_timeseries/ep_hargreaves'

# define output directory
fol_out=f'{work_dir}/output/forcing_timeseries/processed_mswep_hs_gswp'

# get catch_id_list
catch_id_list = np.genfromtxt(f'{work_dir}/output/gsim_aus_catch_id_list_lo_sel_it.txt',dtype='str')[:]

# check which catchments are missing
el_id_list=[]
for filepath in glob.iglob(f'{work_dir}/output/forcing_timeseries/processed_mswep_hs_gswp/daily/*.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-14] # remove .year extension
    el_id_list.append(f)
dif = list(set(catch_id_list) - set(el_id_list))
print(len(dif))
catch_list = dif

0


In [82]:
catch_id_list = dif[:]
fol_in_list = [fol_in] * len(catch_id_list)
fol_in2_list = [fol_in2] * len(catch_id_list)
fol_out_list = [fol_out] * len(catch_id_list)

run_processing_function_parallel2(catch_id_list,fol_in_list,fol_in2_list,fol_out_list)

## processed timeseries with gswp p and hargreaves ep and gswp t

In [80]:
# define input directory
fol_in=f'{work_dir}/output/forcing_timeseries/processed/daily'
fol_in2=f'{work_dir}/output/forcing_timeseries/ep_hargreaves'

# define output directory
fol_out=f'{work_dir}/output/forcing_timeseries/processed_gswp_hs_gswp'

# get catch_id_list
catch_id_list = np.genfromtxt(f'{work_dir}/output/gsim_aus_catch_id_list_lo_sel_it.txt',dtype='str')[:]

# check which catchments are missing
el_id_list=[]
for filepath in glob.iglob(f'{work_dir}/output/forcing_timeseries/processed_gswp_hs_gswp/daily/*.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-14] # remove .year extension
    el_id_list.append(f)
dif = list(set(catch_id_list) - set(el_id_list))
print(len(dif))
catch_list = dif

0


In [79]:
catch_id_list = dif[:]
fol_in_list = [fol_in] * len(catch_id_list)
fol_in2_list = [fol_in2] * len(catch_id_list)
fol_out_list = [fol_out] * len(catch_id_list)

run_processing_function_parallel2(catch_id_list,fol_in_list,fol_in2_list,fol_out_list)

## Run snow calculation with mswep

In [15]:
# select catchments with temperature <0 AND precipitation>1, because if no p there is no snow possible
catch_id_list = np.genfromtxt(f'{work_dir}/output/gsim_aus_catch_id_list_lo_sel_it.txt',dtype='str')[:]
snow_catch=[]
for catch_id in catch_id_list:
    a = pd.read_csv(f'{work_dir}/output/forcing_timeseries/processed_mswep_gleam_gswp/daily/{catch_id}_1981_2010.csv',index_col=0)
    t = a[(a.tas<0)&(a.p>1)]
    if (len(t))>0:
        if (len(t)>0.05*len(a)):
            snow_catch.append(catch_id)
np.savetxt(f'{work_dir}/output/snow/catch_id_list_snow_t_and_p_mswep.txt',snow_catch,fmt='%s')


In [5]:
snow_id_list = np.genfromtxt(f'{work_dir}/output/snow/catch_id_list_snow_t_and_p_mswep.txt',dtype='str')[:]
len(snow_id_list)

2571

In [15]:
catch_id_list = np.genfromtxt(f'{work_dir}/output/snow/catch_id_list_snow_t_and_p_mswep.txt',dtype='str')

# check which catchments are missing
el_id_list=[]
for filepath in glob.iglob(f'{work_dir}/output/snow/timeseries_mswep/*.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-4] # remove .year extension
    el_id_list.append(f)
dif = list(set(catch_id_list) - set(el_id_list))
print(len(dif))
catch_list = dif

3


In [None]:
# run in parallel
# make lists for parallel computation
catch_list = catch_list[:]
work_dir_list = [work_dir] * len(catch_list)
run_function_parallel_snow_mswep(catch_list, work_dir_list)

## Run Sd calculation

In [104]:
ir_case = 'iaf' # or 'ni' or 'iwu' or 'iaf'
pep_dir = f'{work_dir}/output/forcing_timeseries/processed_mswep_gleam_gswp/daily'
q_dir = f'{work_dir}/output/q_timeseries_selected'
out_dir = f'{work_dir}/output/sr_calculation/sd_catchments_mswep_gleam'
snow_ids = np.genfromtxt(f'{work_dir}/output/snow/catch_id_list_snow_t_and_p.txt',dtype='str')
snow_dir = f'{work_dir}/output/snow/timeseries_mswep'

catch_list = np.genfromtxt(f'{work_dir}/output/gsim_aus_catch_id_list_lo_sel_area_wb.txt',dtype='str')[:]
# itlist = np.genfromtxt(f'{work_dir}/data/po_basin/organized_data/it_selected_catchments.txt',dtype='str')
# catch_list = itlist

# # check which catchments are missing
# el_id_list=[]
# for filepath in glob.iglob(f'{work_dir}/output/sr_calculation/sd_catchments_mswep_gleam/irri/f0.9ia/sd/*.csv'):
#     f = os.path.split(filepath)[1] # remove full path
#     f = f[:-11] # remove .year extension
#     el_id_list.append(f)
# dif = list(set(catch_list) - set(el_id_list))
# print(len(dif))
# catch_list = dif

catch_list = ['ca_0004894']#,'fr_0000874']

catch_id_list = catch_list
pep_dir_list = [pep_dir] * len(catch_id_list)
q_dir_list = [q_dir] * len(catch_id_list)
out_dir_list = [out_dir] * len(catch_id_list)
snow_id_list = [snow_ids] * len(catch_id_list)
snow_dir_list = [snow_dir] * len(catch_id_list)
work_dir_list = [work_dir] * len(catch_id_list)
ir_case_list = [ir_case] * len(catch_id_list)

run_sd_calculation_parallel(catch_id_list,pep_dir_list,q_dir_list,out_dir_list,snow_id_list,snow_dir_list,work_dir_list,ir_case_list)

IndexError: list index out of range

In [107]:
catch_id = 'ca_0004894'
snow_id_list = snow_ids

In [111]:
# if catch_id in snow_id_list:
#     s = 1 # snow is yes
#     f_pep = glob.glob(f'{snow_dir}/{catch_id}*.csv')

# else:
#     s = 0 # snow is no
#     # get P Ep and Q files for catch id
f_pep = glob.glob(f'{pep_dir}/{catch_id}*.csv')

cc = pd.read_csv(f'{work_dir}/output/catchment_characteristics/gswp-p_gleam-ep_gswp-t/landscape/{catch_id}.csv',index_col=0)
ir_area = cc.ia.values

# read q df
f_q = glob.glob(f'{q_dir}/{catch_id}*.csv')

# read files as dataframes
q_ts = pd.read_csv(f_q[0],index_col=0)
q_ts.index = pd.to_datetime(q_ts.index)
pep_ts = pd.read_csv(f_pep[0],index_col=0)
pep_ts.index = pd.to_datetime(pep_ts.index)

In [112]:
pep_ts

Unnamed: 0_level_0,ep,tas,p
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1981-01-01,0.095865,-3.937836,0.000000
1981-01-02,0.022581,-5.562408,0.000000
1981-01-03,0.036574,-3.593994,0.000000
1981-01-04,0.082080,-3.011627,0.158732
1981-01-05,0.033199,-2.026978,2.009772
...,...,...,...
2010-12-27,0.000000,-3.407257,0.000000
2010-12-28,0.039419,-5.179138,0.632407
2010-12-29,0.000000,-9.887177,0.000000
2010-12-30,0.000000,-16.543243,0.000000


## SR CALCULATION

In [18]:
sd_list2=[]
for filepath in glob.iglob(f'{work_dir}/output/sr_calculation/sd_catchments_mswep_gleam/irri/f0.9ia/sr/*gumbelfit.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-21] 
    sd_list2.append(f)
len(sd_list2)

4514

In [19]:
dif = list(set(sd_list2) - set(sd_list1))
len(dif)

0

In [None]:
ir_case = 'iaf' #'ni' or iwu or iaf
catch_list = np.genfromtxt(f'{work_dir}/output/gsim_aus_catch_id_list_lo_sel_area_wb.txt',dtype='str')[:]
# itlist = np.genfromtxt(f'{work_dir}/data/po_basin/organized_data/it_selected_catchments.txt',dtype='str')

sd_list=[]
for filepath in glob.iglob(f'{work_dir}/output/sr_calculation/sd_catchments_mswep_gleam/irri/f0.9ia/sd/*.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-11] 
    sd_list.append(f)

# define directories
sd_dir = f'{work_dir}/output/sr_calculation/sd_catchments'
out_dir = f'{work_dir}/output/sr_calculation/sd_catchments'

# define return periods
rp_array = [1.5,2,3,5,10,20,30,40,50,60,70,80]
catch_id_list = dif
sd_dir_list = [sd_dir] * len(catch_id_list)
out_dir_list = [out_dir] * len(catch_id_list) 
rp_array_list = [rp_array] * len(catch_id_list) 
ir_case_list = [ir_case] * len(catch_id_list)

run_sr_calculation_parallel(catch_id_list,rp_array_list,sd_dir_list,out_dir_list,ir_case_list)

Combine calculated Sr values in one dataframe for each irrigation case:

In [16]:
# combine Sr output in dataframe - f0.9ia
c_list=[]
for filepath in glob.iglob(f'{work_dir}/output/sr_calculation/sd_catchments_mswep_gleam/irri/f0.9ia/sr_rzyear/*gumbelfit.csv'):
    f = os.path.split(filepath)[1] # remove full path
    f = f[:-21] # remove .year extension
    c_list.append(f)
print(len(c_list))

sr_df_gf = pd.DataFrame(index=c_list, columns=['1.5','2.0','3.0','5.0','10.0','20.0','30.0','40.0','50.0','60.0','70.0','80.0'])
sr_df_p = pd.DataFrame(index=c_list, columns=['k3','k5'])
# sr_df_gf.to_csv(f'{work_dir}/output/sr_calculation/sd_catchments/irri/f0.9ia/sr_irri_f0.9ia_combined_gumbelfit2_rzyear.csv')
# sr_df_p.to_csv(f'{work_dir}/output/sr_calculation/sd_catchments/irri/f0.9ia/sr_irri_f0.9ia_combined_points2_rzyear.csv')

# sr_df_gf = pd.read_csv(f'{work_dir}/output/sr_calculation/sd_catchments/irri/f0.9ia/sr_irri_f0.9ia_combined_gumbelfit_rzyear.csv',index_col=0)
# sr_df_p = pd.read_csv(f'{work_dir}/output/sr_calculation/sd_catchments/irri/f0.9ia/sr_irri_f0.9ia_combined_points_rzyear.csv',index_col=0)
# sr_n = sr_df_p.dropna()

p=[]
# p = sr_n.index
di = list(set(c_list) - set(p)) #missing catchments in dataframe

for catch_id in di[:]:
    if os.path.exists(f'{work_dir}/output/sr_calculation/sd_catchments_mswep_gleam/irri/f0.9ia/sr_rzyear/{catch_id}_f0.9ia_gumbelfit.csv'):
        d = pd.read_csv(f'{work_dir}/output/sr_calculation/sd_catchments_mswep_gleam/irri/f0.9ia/sr_rzyear/{catch_id}_f0.9ia_gumbelfit.csv',index_col=0)
        sr_df_gf.loc[catch_id] = d.iloc[0]
        
        d = pd.read_csv(f'{work_dir}/output/sr_calculation/sd_catchments_mswep_gleam/irri/f0.9ia/sr_rzyear/{catch_id}_f0.9ia_points.csv',index_col=0)
        l = [2]#,3,5,10,20,30,40,50,60,70,80]
        for i in range(len(l)):
            a = d['T_a'].values
            ix1 = find_nearest(a,l[i])
            sr = np.mean(d.sd.values[[ix1,ix1-1,ix1+1]])
            sr_df_p.loc[catch_id]['k3'] = sr
            
            a = d['T_a'].values
            ix1 = find_nearest(a,l[i])
            sr = np.mean(d.sd.values[[ix1,ix1-1,ix1+1,ix1-2,ix1+2]])
            sr_df_p.loc[catch_id]['k5'] = sr

sr_df_gf.to_csv(f'{work_dir}/output/sr_calculation/sd_catchments_mswep_gleam/irri/f0.9ia/sr_irri_f0.9ia_combined_gumbelfit2_rzyear.csv')
sr_df_p.to_csv(f'{work_dir}/output/sr_calculation/sd_catchments_mswep_gleam/irri/f0.9ia/sr_irri_f0.9ia_combined_points2_rzyear.csv')

4513
