In [1]:
%matplotlib inline

In [2]:
# This notebook reads streamflow measurements and meteorological data
# and exports a dataframe with water balance statistics and streamflow indices.
# Winter streamflow data is missing for some years. Only years with >90% temporal data coverage are considered.
# Gauges with less than 3 years of valid data are omitted.
# We consider two versions of the streamflow observations: Unfiltered data (all measurements) and filtered data.

In [94]:
import sys
sys.path.append(r'C:\Users\hordurbhe\Dropbox\UW\lamah_ice\code\HydroAnalysis')
import hydroanalysis
import pandas as pd
import numpy as np
import geopandas as gpds
import datetime as dt
from pathlib import Path
import hydroanalysis.utils
import hydroanalysis.streamflow_signatures

# Set the lamah-ice path
path_lamahice = Path(r"C:\Users\hordurbhe\Documents\Vinna\lamah\lamah_ice\lamah_ice")

# Define common file paths
path_gauges = path_lamahice / "D_gauges/3_shapefiles/gauges.shp"
path_catchment_attrs = path_lamahice / "A_basins_total_upstrm/1_attributes/Catchment_attributes.csv"
path_meteorological = path_lamahice / "A_basins_total_upstrm/2_timeseries/daily/meteorological_data"
savepath_attributes = path_lamahice / "A_basins_total_upstrm/1_attributes"
savepath_signatures = path_lamahice / "D_gauges/1_attributes"

# Read gauges shapefile
gauges = gpds.read_file(path_gauges)
gauges.index = gauges['id'].astype(int)
gauges.index.name = 'id'
gauges.sort_index(inplace=True)

# Read catchment attributes
catchment_attrs = pd.read_csv(path_catchment_attrs, sep=';')
catchment_attrs.set_index('id', inplace=True)

# Add latitude and longitude from the gauges shapefile
catchment_attrs['lat'] = gauges.geometry.y
catchment_attrs['lon'] = gauges.geometry.x

def process_streamflow_data(path_gauges_ts, water_balance_filename, hydro_indices_filename, unfiltered=False):
    """Process streamflow data and save water balance and hydrological indices.

    Parameters:
    - path_gauges_ts: Path to the streamflow measurement files (filtered/unfiltered).
    - water_balance_filename: Name of the water balance CSV output.
    - hydro_indices_filename: Name of the hydrological indices CSV output.
    - Unfiltered: Specifies how we convert the quality flag.
    """

    # Dictionary to store processed measurements
    meas_dict = {}

    # Loop through gauge files
    for gauge_file in path_gauges_ts.glob("ID_*.csv"):
        catchment_id = gauge_file.stem.split("_")[1]  # Extract ID
        df = pd.read_csv(gauge_file, sep=";")

        # Convert to datetime index
        df['date'] = pd.to_datetime(df[['YYYY', 'MM', 'DD']].astype(str).agg('-'.join, axis=1))
        df.set_index('date', inplace=True)

        # Rename columns
        df = df.rename(columns={'qobs': 'Value', 'qc_flag': 'Quality'})

        # Convert quality flag: 0 (good), 1 (bad)
        if unfiltered:
            df.loc[df['Quality']<=200, 'Quality']=0
            df.loc[df['Quality']>200, 'Quality']=1
        else:
            df['Quality'] = np.where(df['Quality'] > 100, 1, 0)

        # Store in dictionary
        meas_dict[catchment_id] = df

    # Dictionary to store processed hydrological data
    data_for_valid_years = {}
    valid_years_lengths = {}

    # Threshold for valid years
    thresh = 0.9

    # Time range
    start = '1981-10-01'
    end = '2018-09-30'

    # Loop through catchments
    for catchment_id in list(meas_dict.keys()):
        df = meas_dict[catchment_id].copy()

        # Load corresponding meteorological data
        meteo_file = path_meteorological / f"ID_{catchment_id}.csv"
        if not meteo_file.exists():
            continue  # Skip if no meteorological data available

        meteo_df = pd.read_csv(meteo_file, sep=";")

        # Convert to datetime index
        meteo_df['date'] = pd.to_datetime(meteo_df[['YYYY', 'MM', 'DD']].astype(str).agg('-'.join, axis=1))
        meteo_df.set_index('date', inplace=True)

        # Rename meteorological columns
        meteo_df = meteo_df.rename(columns={
            'prec': 'P_ERA5L',
            'pet': 'PET_ERA5L',
            'total_et': 'ET_ERA5L',
            'prec_rav': 'P_rav',
            'ref_et_rav': 'PET_rav',
            'total_et_rav': 'ET_rav'
        })

        # Merge streamflow data with meteorological data
        df = df.join(meteo_df[['P_ERA5L', 'PET_ERA5L', 'ET_ERA5L', 'P_rav', 'PET_rav', 'ET_rav']], how="left")
        df = df[start:end]

        # Compute water year
        df['water_year'] = [(d - dt.timedelta(days=273)).year for d in df.index]

        # Find years with enough valid data
        valid_years = df.dropna().groupby('water_year')['Value'].count() / 365
        valid_years = valid_years[valid_years >= thresh]

        # Filter for valid years
        df_valid = df[df['water_year'].isin(valid_years.index)].copy()

        # Convert streamflow to mm/day
        catchment_area = catchment_attrs.loc[int(catchment_id)]['area_calc']
        df_valid['Q'] = 1000 * (df_valid['Value'] * 86400 / (catchment_area * 1000000))

        if len(valid_years) >= 3:
            data_for_valid_years[catchment_id] = df_valid[['Q', 'Quality', 'P_ERA5L', 'PET_ERA5L', 'ET_ERA5L', 'water_year', 'P_rav', 'PET_rav', 'ET_rav']]

        valid_years_lengths[catchment_id] = {'year_count': len(valid_years)}

    # Convert to DataFrame
    valid_years_df = pd.DataFrame(valid_years_lengths).T

    # Save water balance data
    wb_dict = {cid: df.mean() for cid, df in data_for_valid_years.items()}
    wb_df = pd.DataFrame(wb_dict).T
    wb_df.index.name = 'id'
    wb_df.index = wb_df.index.astype(int)
    wb_df = wb_df.sort_index()
    wb_df[['Q', 'P_ERA5L', 'PET_ERA5L', 'ET_ERA5L', 'P_rav', 'PET_rav', 'ET_rav']].to_csv(savepath_attributes / water_balance_filename, sep=";")

    # Calculate hydrological signatures
    signs_dict = {}
    for catchment_id, df in data_for_valid_years.items():
        try:
            signs_dict[catchment_id] = hydroanalysis.utils.calculate_multiple_signatures([
                hydroanalysis.streamflow_signatures.calculate_q_mean,
                hydroanalysis.streamflow_signatures.calculate_runoff_ratio,
                hydroanalysis.streamflow_signatures.calculate_stream_elas,
                hydroanalysis.streamflow_signatures.calculate_slope_fdc,
                hydroanalysis.streamflow_signatures.calculate_baseflow_index,
                hydroanalysis.streamflow_signatures.calculate_hfd_mean,
                hydroanalysis.streamflow_signatures.calculate_q_5,
                hydroanalysis.streamflow_signatures.calculate_q_95,
                hydroanalysis.streamflow_signatures.calculate_high_q_freq_dur,
                hydroanalysis.streamflow_signatures.calculate_low_q_freq_dur,
                hydroanalysis.streamflow_signatures.calculate_zero_q_freq
            ], df['Q'].values, df['Quality'].values, df['P_rav'].values, df['water_year'].values)
        except TypeError:
            print(f"Signature calculation failed for {catchment_id}")

    # Convert to DataFrame
    signs_df = pd.DataFrame(signs_dict).T
    signs_df.index.name = 'id'
    signs_df.index = signs_df.index.astype(int)
    signs_df = signs_df.sort_index()
    
    # Rename columns
    signs_df = signs_df.rename(columns={
    'calculate_q_mean': 'q_mean',
    'calculate_runoff_ratio': 'runoff_ratio',
    'calculate_stream_elas_Sankarasubramanian': 'stream_elas',
    'calculate_slope_fdc_Addor': 'slope_fdc',
    'calculate_baseflow_index': 'baseflow_index_ladson',
    'calculate_hfd_mean_hfd_mean': 'hfd_mean',
    'calculate_q_5': 'Q5',
    'calculate_q_95': 'Q95',
    'calculate_high_q_freq_dur_hq_freq': 'high_q_freq',
    'calculate_high_q_freq_dur_hq_dur': 'high_q_dur',
    'calculate_low_q_freq_dur_lq_freq': 'low_q_freq',
    'calculate_low_q_freq_dur_lq_dur': 'low_q_dur',
    'calculate_zero_q_freq': 'zero_q_freq'
    })


    # Save hydrological indices
    signs_df[['q_mean','runoff_ratio','stream_elas',
                    'slope_fdc','baseflow_index_ladson',
                    'hfd_mean','Q5',
                    'Q95','high_q_freq','high_q_dur','low_q_freq',
                    'low_q_dur','zero_q_freq']].to_csv(savepath_signatures / hydro_indices_filename, sep=";")

# Process filtered measurements
process_streamflow_data(
    path_gauges_ts=path_lamahice / "D_gauges/2_timeseries/daily_filtered",
    water_balance_filename="water_balance.csv",
    hydro_indices_filename="hydro_indices_1981_2018.csv"
)

# Process unfiltered measurements
process_streamflow_data(
    path_gauges_ts=path_lamahice / "D_gauges/2_timeseries/daily",
    water_balance_filename="water_balance_unfiltered.csv",
    hydro_indices_filename="hydro_indices_1981_2018_unfiltered.csv",
    unfiltered=True
)

