In [70]:
%matplotlib inline

In [71]:
# This notebook reads streamflow measurements and meteorological data
# and exports a dataframe with water balance statistics and streamflow indices.
# Winter streamflow data is missing for some years. Only years with >90% temporal data coverage are considered.
# Gauges with less than 3 years of valid data are omitted.

In [72]:
import sys
sys.path.append(r'C:\Users\hordurbhe\Dropbox\UW\lamah_ice\code\HydroAnalysis')
import hydroanalysis
from pathlib import Path
import pickle
import pandas as pds
import numpy as np
import geopandas as gpds
import datetime as dt
import matplotlib.pyplot as plt
pds.set_option('display.max_rows', 500)
from pathlib import Path

# Define a function to read csv files containing ERA5-Land data
def read_era_csv(path,start,end):
    var = pds.read_csv(path)
    var = var.set_index('YYYY MM DD')
    var.index = pds.to_datetime(var.index)
    var = var[start:end]
    var.columns = var.columns.astype(int)
    var.sort_index(axis=1,inplace=True)
    return(var)

def combine_csv_files(csv_directory):
    import pandas as pd
    import glob
    from pathlib import Path

    # List the CSV files
    csv_files = glob.glob(str(csv_directory / '*.csv'))

    # Initialize an empty DataFrame for combined data
    combined_df = pd.DataFrame()

    for file in csv_files:
        # Read the CSV file
        df = pd.read_csv(file)

        # Extract catchment ID from filename
        catchment_id = Path(file).stem

        # Rename the 'PET' column to the catchment ID
        df = df.rename(columns={'PET': catchment_id})

        # Merge with the combined DataFrame
        if combined_df.empty:
            combined_df = df
        else:
            combined_df = pd.merge(combined_df, df, on='date', how='outer')

    # The combined_df now contains all the data with catchment IDs as column headers
    combined_df = combined_df.set_index('date')
    combined_df.index = pds.to_datetime(combined_df.index)
    return(combined_df)

# Read meteorological data and measured streamflow

In [73]:
# Read precip, evap and PET from ERA5-Land
start = '1981-10-01'
end = '2018-09-30'
path_precip = Path(r'C:\Users\hordurbhe\Documents\Vinna\lamah\lamah_ice\era5_land\1950-2021\daily\ERA5L_total_precipitation.csv')
path_total_evap = Path(r'C:\Users\hordurbhe\Documents\Vinna\lamah\lamah_ice\era5_land\1950-2021\daily\ERA5L_total_evaporation.csv')
path_potential_evap = Path(r'C:\Users\hordurbhe\Documents\Vinna\lamah\lamah_ice\era5_land\1950-2021\daily\ERA5L_potential_evaporation.csv')

precip = read_era_csv(path_precip,start,end)
evap = read_era_csv(path_total_evap,start,end)
pet = read_era_csv(path_potential_evap,start,end)

# Read precip from RAV-II
rav_data = pds.read_csv(Path(r"C:\Users\hordurbhe\Documents\Vinna\lamah\lamah_ice\rav2\rav2_precip_daily_Basins_A.csv")) #header=TRUE,
rav_data.index=pds.to_datetime(rav_data['Time'])
rav_data = rav_data.drop(columns=['Time'])
rav_data = rav_data[start:end]
rav_data.columns = rav_data.columns.astype(int)
rav_data.sort_index(axis=1,inplace=True)

# Read ET from RAV-II
rav_ET_data = pds.read_csv(Path(r"C:\Users\hordurbhe\Documents\Vinna\lamah\lamah_ice\rav2\rav2_ET_hourly_Basins_A.csv")) #header=TRUE,
rav_ET_data.index=pds.to_datetime(rav_ET_data['Time'])
rav_ET_data = rav_ET_data.drop(columns=['Time'])
# Calc daily sums
rav_ET_sum = rav_ET_data.resample('D').sum()
rav_ET_sum = rav_ET_sum[start:end]
rav_ET_sum.columns = rav_ET_sum.columns.astype(int)
rav_ET_sum.sort_index(axis=1,inplace=True)

# Read PET from RAV-II
csv_directory = Path(r"C:\Users\hordurbhe\Documents\Vinna\lamah\lamah_ice\PET_Calculations\ref_ET_from_RAV_PM")
PET_PM_ravII_grdflx = combine_csv_files(csv_directory)
PET_PM_ravII_grdflx = PET_PM_ravII_grdflx[start:end]
PET_PM_ravII_grdflx.columns = pds.to_numeric(PET_PM_ravII_grdflx.columns)
PET_PM_ravII_grdflx = PET_PM_ravII_grdflx.sort_index(axis=1)
PET_PM_ravII_grdflx = PET_PM_ravII_grdflx[PET_PM_ravII_grdflx.columns[:-3]]
PET_PM_ravII_grdflx.columns = PET_PM_ravII_grdflx.columns.astype(int)
PET_PM_ravII_grdflx.sort_index(axis=1,inplace=True)

# Read precip data from CARRA:
combined_carra = pds.read_csv(Path(r"C:\Users\hordurbhe\Documents\Vinna\lamah\lamah_ice\carra\precip_daily\carra_precip_daily_Basins_A.csv")) #header=TRUE,
combined_carra.index=pds.to_datetime(combined_carra['time'])
combined_carra = combined_carra.drop(columns=['time'])
combined_carra.columns = combined_carra.columns.astype(int)
combined_carra.sort_index(axis=1,inplace=True)

# Read the measured streamflow:
save_date = 'dec6_2023'
savepath = Path(r"C:\Users\hordurbhe\Dropbox\UW\lamah_ice\discharge_measurements\processed_by_hh\combined_gauges_LV_VI_raw_%s.p" % save_date)
combined_dict_npc_met_office = pickle.load(open( savepath, "rb" ) )
savepath = Path(r"C:\Users\hordurbhe\Dropbox\UW\lamah_ice\discharge_measurements\processed_by_hh\combined_gauges_LV_VI_raw_splitted_%s.p" % save_date)
splitted_gauge_dict = pickle.load(open( savepath, "rb" ) )
savepath = Path(r"C:\Users\hordurbhe\Dropbox\UW\lamah_ice\discharge_measurements\processed_by_hh\combined_gauges_LV_VI_highqual_%s.p" % save_date)
combined_dict_high_qual = pickle.load(open( savepath, "rb" ) )
savepath = Path(r"C:\Users\hordurbhe\Dropbox\UW\lamah_ice\discharge_measurements\processed_by_hh\combined_gauges_LV_VI_highqual_splitted_%s.p" % save_date)
splitted_gauge_dict_high_qual = pickle.load(open( savepath, "rb" ) )

# Read the gauges shapefile and the combined attributes file 

In [74]:
# Read gauges shapefile
gauges = gpds.read_file(r'C:\Users\hordurbhe\Documents\Vinna\lamah\lamah_ice\lamah_ice\D_gauges\3_shapefiles\gauges.shp')
gauges.index = gauges['id'].astype(int)
gauges.index.name = 'id'
gauges.sort_index(inplace=True)

catchment_attrs_path = Path(r"C:\Users\hordurbhe\Documents\Vinna\lamah\lamah_ice\lamah_ice\A_basins_total_upstrm\1_attributes\Catchment_attributes.csv")
catchment_attrs = pds.read_csv(catchment_attrs_path,sep=';')
catchment_attrs.set_index('id',inplace=True)
catchment_attrs['lat'] = gauges.geometry.y
catchment_attrs['lon'] = gauges.geometry.x

# Export a water balance .csv file and calculate signatures

In [24]:
# We consider two versions of the streamflow observations: Unfiltered data (all measurements) and filtered data.

# Filtered observations

In [75]:
# Create a dictionary with measurements that are ready for processing by the hydroanalysis package:
meas_dict = dict()
for gauge in splitted_gauge_dict_high_qual.keys():
    
    df = splitted_gauge_dict_high_qual[gauge].copy()
    df.columns=['Value','Quality']
    
    # For the HydroAnalysis package, the quality needs to be on the format 0 (good) or 1 (not used)
    df.loc[df['Quality'] <= 100, 'Quality'] = 0
    # For filtered data, the only instances where the quality code is > 100 is when data is missing (QC=250).
    # Either way, we set the 'Quality' stamp to 1
    df.loc[df['Quality'] > 100, 'Quality'] = 1
    
    # Rename the splitted gauge names (V112_1, V100_1) to their proper gauge names (V112,V100)
    if gauge=='V112_1':
        gauge='V112'
    elif gauge=='V68_1':
        gauge='V68'
    elif gauge=='V100_1':
        gauge='V100'
    meas_dict[gauge] = df

# Create a dictionary that contains a dataframe for each gauge
# The dataframe has the columns: 
# Streamflow meas, quality code, precip from ERA5-Land and RAV-II, PET, total evaporation from ERA5-Land and water year
# If we have less than 3 years of valid data, we don't include the gauge

data_for_valid_years = dict()

# This dictionary keeps track of how many valid years we have for each gauge
valid_years_lengths = dict()

thresh = 0.9

for gauge in meas_dict.keys():
    # Fetch the gauge ID from the station name
    gauge_id = gauges[gauges['V_no']==gauge].index[0] 

    # Load the discharge values
    df = meas_dict[gauge].copy()

    try:
        df.index= df.index.date
    except:
        print('df index is already on date format for %s' %gauge)

    # Load ERA precip
    wshed_precip = precip[gauge_id]
    precip_df = pds.DataFrame(wshed_precip)
    precip_df.columns=['P_ERA5L']
    df['P_ERA5L'] = precip_df
    
    # Load PET
    wshed_pet = pet[gauge_id]
    pet_df = pds.DataFrame(wshed_pet)
    pet_df.columns=['PET_ERA5L']
    df['PET_ERA5L'] = pet_df
    
    # Load total evaporation
    wshed_evap = evap[gauge_id]
    et_df = pds.DataFrame(wshed_evap)
    et_df.columns=['ET_ERA5L']
    df['ET_ERA5L'] = et_df
    
    # Load precip from RÁV-II
    rav_precip = rav_data[gauge_id]
    rav_precip_df = pds.DataFrame(rav_precip)
    rav_precip_df.columns=['P_rav']
    df['P_rav'] = rav_precip_df
    
    # Load ref_ET from RÁV-II 
    rav_PET = PET_PM_ravII_grdflx[gauge_id]
    rav_PET_df = pds.DataFrame(rav_PET)
    rav_PET_df.columns=['PET_rav']
    df['PET_rav'] = rav_PET_df
    
    # Load ref_ET from RÁV-II
    rav_ET = rav_ET_sum[gauge_id]
    rav_ET_df = pds.DataFrame(rav_ET)
    rav_ET_df.columns=['ET_rav']
    df['ET_rav'] = rav_ET_df
    
    # Add column 'water_year'
    water_years = [(d - dt.timedelta(days=273)).year for d in df.index]
    df['water_year'] = water_years

    # Find the index of years where we have data coverage over the threshhold value (90%)
    meas_dropna = df.dropna()
    water_years_dropna = [(d - dt.timedelta(days=273)).year for d in meas_dropna.index]
    meas_dropna.loc[:,'water_year'] = water_years_dropna
    coverage_water_years = meas_dropna['Value'].groupby(water_years_dropna).count()/365

    valid = coverage_water_years.loc[coverage_water_years>=thresh]
    yearcount = len(valid)

    # Get all data for the water years that are valid
    df_valid = df[df['water_year'].isin(valid.index)]
    df_valid = df_valid.copy()

    # Convert streamflow measurements from daily mean m3/s to mm/d
    streamflow_values = df_valid['Value'].values
    streamflow_values_mmday = 1000*(streamflow_values*86400/(catchment_attrs.loc[gauges[gauges['V_no']==gauge].index]['area_calc'].values[0]*1000000))

    df_valid.loc[:,'Q'] = streamflow_values_mmday
    
    if yearcount>=3:
        data_for_valid_years[gauge] = df_valid[['Q','Quality','P_ERA5L','PET_ERA5L','ET_ERA5L','water_year','P_rav','PET_rav','ET_rav']]
    #else:
        #print('Only %s valid years for %s' % (yearcount,gauge))
        # We can plot the data to double check
#         meas_dict[gauge].plot()
#         plt.title('%s, %s' %(gauge,gauges[gauges['V_no']==gauge]['river'].values[0]))
#         plt.show()
    valid_years_lengths[gauge_id] = {'year_count': yearcount}
    
valid_years_df = pds.DataFrame(valid_years_lengths)
valid_years_df = valid_years_df.reindex(sorted(valid_years_df.columns), axis=1)
valid_years_df = valid_years_df.T

# Save wbalance df
w_balance_dict = dict()
for gauge in data_for_valid_years.keys():
    # Fetch the gauge ID from the station name
    gauge_id = catchment_attrs.loc[gauges[gauges['V_no']==gauge].index].index.values[0]
    
    # Create a dataframe with the measured discharge, quality value, precipitation and water year
    df = data_for_valid_years[gauge]
    
    # Calculate means
    w_balance_dict[gauge_id] = df.mean()
    
wb_df = pds.DataFrame(w_balance_dict)
wb_df = wb_df.reindex(sorted(wb_df.columns), axis=1)
final_wb_df = wb_df.T


# Save water balance dataframe as .csv
folder = Path(r"C:\Users\hordurbhe\Dropbox\UW\lamah_ice\lamah_ice\A_basins_total_upstrm\1_attributes\final_attributes")
path = folder/'water_balance.csv'
final_wb_df.index.name = 'id'
final_wb_df[['Q','P_ERA5L','PET_ERA5L','ET_ERA5L','water_year','P_rav','PET_rav','ET_rav']].to_csv(path,sep=';')

# Now the hydrological signatures calculations:
signs_dict = dict()
for gauge in data_for_valid_years.keys():
    # Fetch the gauge ID from the station name
    gauge_id = gauges[gauges['V_no']==gauge].index[0] 
        
    # Create a dataframe with the measured discharge, quality value, precipitation and water year
    df = data_for_valid_years[gauge]
    
    # Calculate signatures
    try:
        signs_dict[gauge_id] = hydroanalysis.utils.calculate_multiple_signatures([hydroanalysis.streamflow_signatures.calculate_q_mean,
                                                       hydroanalysis.streamflow_signatures.calculate_runoff_ratio,
                                                       hydroanalysis.streamflow_signatures.calculate_stream_elas,
                                                       hydroanalysis.streamflow_signatures.calculate_slope_fdc,
                                                       hydroanalysis.streamflow_signatures.calculate_baseflow_index,
                                                       hydroanalysis.streamflow_signatures.calculate_hfd_mean,
                                                       hydroanalysis.streamflow_signatures.calculate_q_5,
                                                       hydroanalysis.streamflow_signatures.calculate_q_95,
                                                       hydroanalysis.streamflow_signatures.calculate_high_q_freq_dur,
                                                       hydroanalysis.streamflow_signatures.calculate_low_q_freq_dur,
                                                       hydroanalysis.streamflow_signatures.calculate_zero_q_freq],
                                                      df['Q'].values, #streamflow_values_mmday,
                                                      df['Quality'].values,
                                                      df['P_rav'].values,
                                                      df['water_year'].values) #np.array(water_years))
    except TypeError:
        print('Wrong output type for %s' % gauge) 

signs_df = pds.DataFrame(signs_dict)
signs_df = signs_df.reindex(sorted(signs_df.columns), axis=1)
final_df = signs_df.T

# Rename the columns 
final_df = final_df[['calculate_q_mean', 'calculate_runoff_ratio',
       'calculate_stream_elas_Sankarasubramanian',
       'calculate_slope_fdc_Addor',
       'calculate_baseflow_index', 'calculate_hfd_mean_hfd_mean',
       'calculate_q_5', 'calculate_q_95',
       'calculate_high_q_freq_dur_hq_freq', 'calculate_high_q_freq_dur_hq_dur',
       'calculate_low_q_freq_dur_lq_freq', 'calculate_low_q_freq_dur_lq_dur',
       'calculate_zero_q_freq']]
new_column_names = ['q_mean','runoff_ratio','stream_elas',
                    'slope_fdc','baseflow_index_ladson',
                    'hfd_mean','Q5',
                    'Q95','high_q_freq','high_q_dur','low_q_freq',
                    'low_q_dur','zero_q_freq']
final_df.columns=new_column_names

path = folder/'valid_years_10_perc_tol_1981_2018_ravII.csv'
valid_years_df.index.name = 'id'
valid_years_df.to_csv(path,sep=';')

path = folder/'hydro_indices.csv'
final_df.index.name = 'id'
final_df.to_csv(path,sep=';')
print('Mean valid years: %s' %valid_years_df[valid_years_df>2].mean())

Mean valid years: year_count    15.027397
dtype: float64


In [84]:
len(final_df)

73

In [77]:
len(valid_years_df[valid_years_df>2].dropna())

73

# Unfiltered observations:

In [87]:
# Same code as above. We now use the splitted_gauge_dict that includes all streamflow measurements (unfiltered version) 

# Create a dictionary with measurements that are ready for processing by the hydroanalysis package:
meas_dict = dict()
for gauge in splitted_gauge_dict.keys():    
    df = splitted_gauge_dict[gauge].copy()
    df.columns=['Value','Quality']
    
    df.loc[df['Quality']<=200, 'Quality']=0
    df.loc[df['Quality']>200, 'Quality']=1
    # Rename the splitted gauge names (V112_1, V100_1) to their proper gauge names (V112,V100)
    if gauge=='V112_1':
        gauge='V112'
    elif gauge=='V68_1':
        gauge='V68'
    elif gauge=='V100_1':
        gauge='V100'
    meas_dict[gauge] = df

# Create a dictionary that contains a dataframe for each gauge
# The dataframe has the columns: 
# Streamflow meas, quality code, precip from ERA5-Land and RAV-II, PET, total evaporation from ERA5-Land and water year
# If we have less than 3 years of valid data, we don't include the gauge

data_for_valid_years = dict()

# This dictionary keeps track of how many valid years we have for each gauge
valid_years_lengths = dict()

thresh = 0.9

for gauge in meas_dict.keys():
    # Fetch the gauge ID from the station name   
    gauge_id = gauges[gauges['V_no']==gauge].index[0]
    # Load the discharge values
    df = meas_dict[gauge].copy()
    try:
        df.index= df.index.date
    except:
        print('df index is already on date format for %s' %gauge)

    # Load ERA precip
    wshed_precip = precip[gauge_id]
    precip_df = pds.DataFrame(wshed_precip)
    precip_df.columns=['P_ERA5L']
    df['P_ERA5L'] = precip_df
    
    # Load PET
    wshed_pet = pet[gauge_id]
    pet_df = pds.DataFrame(wshed_pet)
    pet_df.columns=['PET_ERA5L']
    df['PET_ERA5L'] = pet_df
    
    # Load total evaporation
    wshed_evap = evap[gauge_id]
    et_df = pds.DataFrame(wshed_evap)
    et_df.columns=['ET_ERA5L']
    df['ET_ERA5L'] = et_df
    
    # Load precip from RÁV-II
    rav_precip = rav_data[gauge_id]
    rav_precip_df = pds.DataFrame(rav_precip)
    rav_precip_df.columns=['P_rav']
    df['P_rav'] = rav_precip_df
    
    # Load ref_ET from RÁV-II 
    rav_PET = PET_PM_ravII_grdflx[gauge_id]
    rav_PET_df = pds.DataFrame(rav_PET)
    rav_PET_df.columns=['PET_rav']
    df['PET_rav'] = rav_PET_df
    
    # Load ET from RÁV-II
    rav_ET = rav_ET_sum[gauge_id]
    rav_ET_df = pds.DataFrame(rav_ET)
    rav_ET_df.columns=['ET_rav']
    df['ET_rav'] = rav_ET_df
    
    # Add column 'water_year'
    water_years = [(d - dt.timedelta(days=273)).year for d in df.index]
    df['water_year'] = water_years

    # Find the index of years where we have data coverage over the threshhold value (90%)
    meas_dropna = df.dropna()
    water_years_dropna = [(d - dt.timedelta(days=273)).year for d in meas_dropna.index]
    meas_dropna.loc[:,'water_year'] = water_years_dropna
    coverage_water_years = meas_dropna['Value'].groupby(water_years_dropna).count()/365

    valid = coverage_water_years.loc[coverage_water_years>=thresh]
    yearcount = len(valid)

    # Get all data for the water years that are valid
    df_valid = df[df['water_year'].isin(valid.index)]
    df_valid = df_valid.copy()

    # Convert streamflow measurements from daily mean m3/s to mm/d
    streamflow_values = df_valid['Value'].values
    streamflow_values_mmday = 1000*(streamflow_values*86400/(catchment_attrs.loc[gauges[gauges['V_no']==gauge].index]['area_calc'].values[0]*1000000))

    df_valid.loc[:,'Q'] = streamflow_values_mmday
    
    if yearcount>=3:
        data_for_valid_years[gauge] = df_valid[['Q','Quality','P_ERA5L','PET_ERA5L','ET_ERA5L','water_year','P_rav','PET_rav','ET_rav']]
    else:
        print('Only %s valid years for %s' % (yearcount,gauge))
#         meas_dict[gauge].plot()
#         plt.title('%s, %s' %(gauge,gauges[gauges['V_no']==gauge]['river'].values[0]))
#         plt.show()
    valid_years_lengths[gauge_id] = {'year_count': yearcount}
    
valid_years_df = pds.DataFrame(valid_years_lengths)
valid_years_df = valid_years_df.reindex(sorted(valid_years_df.columns), axis=1)
valid_years_df = valid_years_df.T

# Save wbalance df
w_balance_dict = dict()
for gauge in data_for_valid_years.keys():
    # Fetch the gauge ID from the station name
    gauge_id = catchment_attrs.loc[gauges[gauges['V_no']==gauge].index].index.values[0]
    
    # Create a dataframe with the measured discharge, quality value, precipitation and water year
    df = data_for_valid_years[gauge]
    
    # Calculate means
    w_balance_dict[gauge_id] = df.mean()
    
wb_df = pds.DataFrame(w_balance_dict)
wb_df = wb_df.reindex(sorted(wb_df.columns), axis=1)
final_wb_df = wb_df.T

# Save water balance dataframe as .csv
folder = Path(r"C:\Users\hordurbhe\Dropbox\UW\lamah_ice\lamah_ice\A_basins_total_upstrm\1_attributes\final_attributes")
path = folder/'water_balance_unfiltered.csv'
final_wb_df.index.name = 'id'
final_wb_df[['Q','P_ERA5L','PET_ERA5L','ET_ERA5L','water_year','P_rav','PET_rav','ET_rav']].to_csv(path,sep=';')

# Now the hydrological signatures calculations:
signs_dict = dict()
for gauge in data_for_valid_years.keys():
    # Fetch the gauge ID from the station name
    gauge_id = gauges[gauges['V_no']==gauge].index[0] 
    
    # Create a dataframe with the measured discharge, quality value, precipitation and water year
    df = data_for_valid_years[gauge]
    
    # Calculate signatures
    try:
        signs_dict[gauge_id] = hydroanalysis.utils.calculate_multiple_signatures([hydroanalysis.streamflow_signatures.calculate_q_mean,
                                                       hydroanalysis.streamflow_signatures.calculate_runoff_ratio,
                                                       hydroanalysis.streamflow_signatures.calculate_stream_elas,
                                                       hydroanalysis.streamflow_signatures.calculate_slope_fdc,
                                                       hydroanalysis.streamflow_signatures.calculate_baseflow_index,
                                                       hydroanalysis.streamflow_signatures.calculate_hfd_mean,
                                                       hydroanalysis.streamflow_signatures.calculate_q_5,
                                                       hydroanalysis.streamflow_signatures.calculate_q_95,
                                                       hydroanalysis.streamflow_signatures.calculate_high_q_freq_dur,
                                                       hydroanalysis.streamflow_signatures.calculate_low_q_freq_dur,
                                                       hydroanalysis.streamflow_signatures.calculate_zero_q_freq],
                                                      df['Q'].values, #streamflow_values_mmday,
                                                      df['Quality'].values,
                                                      df['P_rav'].values,
                                                      df['water_year'].values) #np.array(water_years))
    except TypeError:
        print('Wrong output type for %s' % gauge) 

signs_df = pds.DataFrame(signs_dict)
signs_df = signs_df.reindex(sorted(signs_df.columns), axis=1)
final_df = signs_df.T

# Rename the columns 
final_df = final_df[['calculate_q_mean', 'calculate_runoff_ratio',
       'calculate_stream_elas_Sankarasubramanian',
       'calculate_slope_fdc_Addor',
       'calculate_baseflow_index', 'calculate_hfd_mean_hfd_mean',
       'calculate_q_5', 'calculate_q_95',
       'calculate_high_q_freq_dur_hq_freq', 'calculate_high_q_freq_dur_hq_dur',
       'calculate_low_q_freq_dur_lq_freq', 'calculate_low_q_freq_dur_lq_dur',
       'calculate_zero_q_freq']]
new_column_names = ['q_mean','runoff_ratio','stream_elas',
                    'slope_fdc','baseflow_index_ladson',
                    'hfd_mean','Q5',
                    'Q95','high_q_freq','high_q_dur','low_q_freq',
                    'low_q_dur','zero_q_freq']
final_df.columns=new_column_names

path = folder/'valid_years_10_perc_tol_1981_2018_ravII_unfiltered.csv'
valid_years_df.index.name = 'id'
valid_years_df.to_csv(path,sep=';')

path = folder/'hydro_indices_unfiltered.csv'
final_df.index.name = 'id'
final_df.to_csv(path,sep=';')
print('Mean valid years: %s' %valid_years_df[valid_years_df>2].mean())

Only 0 valid years for V100
Only 0 valid years for V100_2
Only 0 valid years for V597
Only 2 valid years for 154259
Only 2 valid years for V515
Only 0 valid years for V125
Only 0 valid years for V94
Only 0 valid years for V95
Only 0 valid years for V98
Only 2 valid years for V280
Only 0 valid years for V99
Only 0 valid years for V252
Only 0 valid years for V132
Only 2 valid years for V618
Mean valid years: year_count    22.0
dtype: float64


In [88]:
len(final_df)

97

In [None]:
# The reason that we have more gauges than reported in the gauge data availability analysis in the paper (97 vs. 94)
# Is that here we are using the splitted gauges.

# Calculate a "water balance" .csv file with only meteorological time series means

In [None]:
# In this version, no years are skipped

In [62]:
start = '1981-10-01'
end = '2018-09-30'
merged_df = pds.concat([
    precip[start:end].mean().sort_index().rename('P_ERA5L_%s' % start[:4]),
    evap[start:end].mean().sort_index().rename('ET_ERA5L_%s' % start[:4]),
    pet[start:end].mean().sort_index().rename('PET_ERA5L_%s' % start[:4]),
    rav_data[start:end].mean().sort_index().rename('P_ravII_%s' % start[:4]),
    rav_ET_sum[start:end].mean().sort_index().rename('ET_ravII_%s' % start[:4]),
    PET_PM_ravII_grdflx[start:end].mean().sort_index().rename('PET_ravII_%s' % start[:4]),
], axis=1)
merged_df = merged_df.loc[merged_df.index[:-7]]
savepath = Path('C:\\Users\\hordurbhe\\Documents\\Vinna\\lamah\\lamah_ice\\lamah_ice\\A_basins_total_upstrm\\1_attributes\\meteorological_data_means_1981_to_2018.csv')
merged_df.to_csv(savepath)


# Now we export carra too for 1991-2018

In [63]:
start = '1991-10-01'
end = '2018-09-30'
merged_df = pds.concat([
    precip[start:end].mean().sort_index().rename('P_ERA5L_%s' % start[:4]),
    evap[start:end].mean().sort_index().rename('ET_ERA5L_%s' % start[:4]),
    pet[start:end].mean().sort_index().rename('PET_ERA5L_%s' % start[:4]),
    rav_data[start:end].mean().sort_index().rename('P_ravII_%s' % start[:4]),
    rav_ET_sum[start:end].mean().sort_index().rename('ET_ravII_%s' % start[:4]),
    PET_PM_ravII_grdflx[start:end].mean().sort_index().rename('PET_ravII_%s' % start[:4]),
    combined_carra[start:end].mean().sort_index().rename('P_carra_%s' % start[:4]),

], axis=1)
merged_df = merged_df.loc[merged_df.index[:-7]]
savepath = Path(r'C:\Users\hordurbhe\Documents\Vinna\lamah\lamah_ice\lamah_ice\A_basins_total_upstrm\1_attributes\meteorological_data_means_1991_to_2018.csv')
merged_df.to_csv(savepath)