In [1]:
import pandas as pd

# This script calculates hydrological signature indices for the reference period (Finnish water years 1991–2020).
# It processes discharge data to determine the number of observations, number of years, and the start and end dates
# for each gauge. Special cases with less than 5 years of observations use all available data.

In [2]:
def read_daily_timeseries_csv(path, time_column='date'):
    """
    Convenience function for reading a daily timeseries from a CSV file and setting the index as a DatetimeIndex.

    Parameters:
    path (str): The path to the CSV file.
    time_column (str): The name of the time column in the CSV file.

    Returns:
    DataFrame: The DataFrame with the time column set as the index.
    """
    
    df = pd.read_csv(path)
    df[time_column] = pd.to_datetime(df[time_column]) 
    df = df.set_index(time_column)
    return df

In [None]:
# Path to the source CSV file containing discharge data
src_path = '/path/to/discharge_spec.csv'
data = read_daily_timeseries_csv(src_path)

In [None]:
# Reference period is Finnish water years 1991–2020
data_ref = data.loc["1990-09-01":'2020-08-31']

In [5]:
# Calculate signature indices for the reference period
signature_indices = pd.DataFrame(data_ref.count(axis=0), columns=['sign_number_of_obs'])
signature_indices['sign_number_of_years'] = signature_indices['sign_number_of_obs'] / 365.25
signature_indices['sign_start_date'] = data_ref.apply(lambda col: col.first_valid_index())
signature_indices['sign_last_date'] = data_ref.apply(lambda col: col.last_valid_index())

# The gauges with less than 5 years (1826 days) of observations during the reference period use all available data
special_cases_idx = signature_indices[signature_indices['sign_number_of_obs'] < 1827].index

special_cases = data[special_cases_idx]
special_case_indices = pd.DataFrame(special_cases.count(axis=0), columns=['sign_number_of_obs'])
special_case_indices['sign_number_of_years'] = special_case_indices['sign_number_of_obs'] / 365.25
special_case_indices['sign_start_date'] = special_cases.apply(lambda col: col.first_valid_index())
special_case_indices['sign_last_date'] = special_cases.apply(lambda col: col.last_valid_index())


# Update the signature indices with special cases
signature_indices.update(special_case_indices)
signature_indices = signature_indices.round(2)

In [17]:
# Save the signature indices to a CSV file
signature_indices.to_csv('/path/to/signature_dates.csv')

In [None]:
# Display the signature indices and special cases
signature_indices
special_cases