-----

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import os
import re
from math import log, exp
from pathlib import Path

# Function to parse duration strings and convert them to hours
def parse_duration(duration_str):
    """
    Parses a duration string and converts it to hours.
    Examples:
        "5-min:" -> 0.0833 hours
        "2-hr:" -> 2 hours
        "2-day:" -> 48 hours
    """
    match = re.match(r'(\d+)-(\w+):', duration_str.strip())
    if not match:
        raise ValueError(f"Invalid duration format: {duration_str}")
    value, unit = match.groups()
    value = int(value)
    unit = unit.lower()
    if unit in ['min', 'minute', 'minutes']:
        hours = value / 60.0
    elif unit in ['hr', 'hour', 'hours']:
        hours = value
    elif unit in ['day', 'days']:
        hours = value * 24
    else:
        raise ValueError(f"Unknown time unit in duration: {unit}")
    return hours

# Function to read and process the precipitation frequency CSV
def read_precipitation_data(csv_file):
    """
    Reads the precipitation frequency CSV and returns a DataFrame
    with durations in hours as the index and ARIs as columns.
    This function dynamically locates the header line for the data table.
    """
    with open(csv_file, 'r') as f:
        lines = f.readlines()

    header_line_idx = None
    header_pattern = re.compile(r'^by duration for ari', re.IGNORECASE)

    # Locate the header line
    for idx, line in enumerate(lines):
        if header_pattern.match(line.strip().lower()):
            header_line_idx = idx
            break

    if header_line_idx is None:
        raise ValueError('Header line for precipitation frequency estimates not found in CSV file.')

    # Extract the ARI headers from the header line
    header_line = lines[header_line_idx].strip()
    headers = [item.strip() for item in header_line.split(',')]
    
    if len(headers) < 2:
        raise ValueError('Insufficient number of ARI columns found in the header line.')

    aris = headers[1:]  # Exclude the first column which is the duration

    # Define the pattern for data lines (e.g., "5-min:", "10-min:", etc.)
    duration_pattern = re.compile(r'^\d+-(min|hr|day):')

    # Initialize lists to store durations and corresponding depths
    durations = []
    depths = {ari: [] for ari in aris}

    # Iterate over the lines following the header to extract data
    for line in lines[header_line_idx + 1:]:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        if not duration_pattern.match(line):
            break  # Stop if the line does not match the duration pattern
        parts = [part.strip() for part in line.split(',')]
        if len(parts) != len(headers):
            raise ValueError(f"Data row does not match header columns: {line}")
        duration_str = parts[0]
        try:
            duration_hours = parse_duration(duration_str)
        except ValueError as ve:
            print(f"Skipping line due to error: {ve}")
            continue  # Skip lines with invalid duration formats
        durations.append(duration_hours)
        for ari, depth_str in zip(aris, parts[1:]):
            try:
                depth = float(depth_str)
            except ValueError:
                depth = np.nan  # Assign NaN for invalid depth values
            depths[ari].append(depth)

    # Create the DataFrame
    df = pd.DataFrame(depths, index=durations)
    df.index.name = 'Duration_hours'

    # Drop any rows with NaN values (optional, based on data quality)
    df = df.dropna()

    return df

# Function to perform log-log linear interpolation for each ARI
def interpolate_depths(df, total_duration):
    """
    Interpolates precipitation depths for each ARI on a log-log scale
    for each hour up to the total storm duration.
    """
    T = total_duration
    t_hours = np.arange(1, T+1)
    D = {}
    for ari in df.columns:
        durations = df.index.values
        depths = df[ari].values
        # Ensure all depths are positive
        if np.any(depths <= 0):
            raise ValueError(f"Non-positive depth value in ARI {ari}")
        # Log-log interpolation
        log_durations = np.log(durations)
        log_depths = np.log(depths)
        log_t = np.log(t_hours)
        log_D_t = np.interp(log_t, log_durations, log_depths)
        D_t = np.exp(log_D_t)
        D[ari] = D_t
    return D

# Function to compute incremental precipitation depths
def compute_incremental_depths(D, total_duration):
    """
    Computes incremental precipitation depths for each hour.
    I(t) = D(t) - D(t-1), with D(0) = 0.
    """
    incremental_depths = {}
    for ari, D_t in D.items():
        I_t = np.empty(total_duration)
        I_t[0] = D_t[0]  # I(1) = D(1) - D(0) = D(1)
        I_t[1:] = D_t[1:] - D_t[:-1]
        incremental_depths[ari] = I_t
    return incremental_depths

# Function to assign incremental depths using the Alternating Block Method
def assign_alternating_block(sorted_depths, max_depth, central_index, T):
    """
    Assigns incremental depths to the hyetograph using the Alternating Block Method.
    """
    hyetograph = [0.0] * T
    hyetograph[central_index] = max_depth
    remaining_depths = sorted_depths.copy()
    remaining_depths.remove(max_depth)
    left = central_index - 1
    right = central_index + 1
    toggle = True  # Start assigning to the right
    for depth in remaining_depths:
        if toggle and right < T:
            hyetograph[right] = depth
            right += 1
        elif not toggle and left >= 0:
            hyetograph[left] = depth
            left -= 1
        elif right < T:
            hyetograph[right] = depth
            right += 1
        elif left >= 0:
            hyetograph[left] = depth
            left -= 1
        else:
            print("Warning: Not all incremental depths assigned.")
            break
        toggle = not toggle
    return hyetograph

# Function to generate the hyetograph for a given ARI
def generate_hyetograph(incremental_depths, position_percent, T):
    """
    Generates the hyetograph for a given ARI using the Alternating Block Method.
    """
    max_depth = np.max(incremental_depths)
    incremental_depths_list = incremental_depths.tolist()
    central_index = int(round(T * position_percent / 100)) - 1
    central_index = max(0, min(central_index, T - 1))
    sorted_depths = sorted(incremental_depths_list, reverse=True)
    hyetograph = assign_alternating_block(sorted_depths, max_depth, central_index, T)
    return hyetograph

# Function to save the hyetograph to a CSV file
def save_hyetograph(hyetograph, ari, output_dir, position_percent, total_duration):
    """
    Saves the hyetograph to a CSV file.
    """
    df = pd.DataFrame({
        'Time_hour': np.arange(1, total_duration + 1),
        'Precipitation_in': hyetograph
    })
    filename = f'hyetograph_ARI_{ari}_years_pos{position_percent}pct_{total_duration}hr.csv'
    output_file = os.path.join(output_dir, filename)
    df.to_csv(output_file, index=False)
    print(f"Hyetograph for ARI {ari} years saved to {output_file}")

# User Inputs
# --------------------
# Set the path to your input CSV file from NOAA Atlas 14
input_csv = 'data\PF_Depth_English_PDS_DavisCA.csv'  # Update this path if necessary

# Set the output directory where hyetograph CSV files will be saved
output_dir = 'hyetographs'

# Set the position percentage for the maximum incremental depth block
# Choose from 25, 33, 50, 67, or 75
position_percent = 50  # Default is 50

# Set the total storm duration in hours
total_duration = 24  # Default is 24 hours

# Ensure the output directory exists
Path(output_dir).mkdir(parents=True, exist_ok=True)
print(f"Output directory is set to: {output_dir}")

# Read precipitation data
try:
    df = read_precipitation_data(input_csv)
    print("Successfully read the input CSV file.")
except Exception as e:
    print(f"Error reading input CSV: {e}")
    raise

# Display the first few rows of the DataFrame to verify
print("\nPrecipitation Frequency Data:")
display(df.head())

# Interpolate depths
try:
    D = interpolate_depths(df, total_duration)
    print("Successfully interpolated precipitation depths.")
except Exception as e:
    print(f"Error during interpolation: {e}")
    raise

# Display interpolated depths for the first ARI
first_ari = df.columns[0]
print(f"\nInterpolated Depths for ARI {first_ari} years:")
print(D[first_ari])

# Compute incremental depths
I = compute_incremental_depths(D, total_duration)
print("Successfully computed incremental depths.")

# Generate and save hyetographs for each ARI
for ari, incremental_depths in I.items():
    hyetograph = generate_hyetograph(incremental_depths, position_percent, total_duration)
    save_hyetograph(hyetograph, ari, output_dir, position_percent, total_duration)

print("\nAll hyetographs have been generated and saved.")


In [None]:
# Plotting the hyetographs (final request from o1-mini)

import matplotlib.pyplot as plt

# Function to plot multiple hyetographs on the same plot
def plot_multiple_hyetographs(aris, position_percent, total_duration, output_dir='hyetographs'):
    """
    Plots multiple hyetographs for specified ARIs on the same figure for comparison.
    
    Parameters:
    - aris (list of str or int): List of Annual Recurrence Intervals to plot (e.g., [1, 2, 5, 10])
    - position_percent (int): Position percentage for the maximum incremental depth block (25, 33, 50, 67, or 75)
    - total_duration (int): Total storm duration in hours
    - output_dir (str): Directory where hyetograph CSV files are saved
    """
    plt.figure(figsize=(14, 7))
    
    for ari in aris:
        # Ensure ARI is a string for consistent filename formatting
        ari_str = str(ari)
        
        # Construct the filename based on the naming convention
        filename = f'hyetograph_ARI_{ari_str}_years_pos{position_percent}pct_{total_duration}hr.csv'
        filepath = os.path.join(output_dir, filename)
        
        # Check if the file exists
        if not os.path.exists(filepath):
            print(f"Warning: File '{filename}' does not exist in the directory '{output_dir}'. Skipping this ARI.")
            continue
        
        # Read the hyetograph data
        try:
            hyetograph_df = pd.read_csv(filepath)
            print(f"Successfully read the hyetograph data from '{filename}'.")
        except Exception as e:
            print(f"Error reading the hyetograph CSV file '{filename}': {e}")
            continue
        
        # Plot the hyetograph
        plt.bar(hyetograph_df['Time_hour'], hyetograph_df['Precipitation_in'], 
                width=0.8, edgecolor='black', alpha=0.5, label=f'ARI {ari_str} years')
    
    # Customize the plot
    plt.xlabel('Time (Hour)', fontsize=14)
    plt.ylabel('Incremental Precipitation (inches)', fontsize=14)
    plt.title(f'Comparison of Hyetographs for ARIs {aris}\nPosition: {position_percent}% | Duration: {total_duration} Hours', fontsize=16)
    plt.legend()
    plt.xticks(range(1, total_duration + 1, max(1, total_duration // 24)))  # Adjust x-ticks based on duration
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()

# User Inputs for Multiple ARIs
# --------------------
# Set the Annual Recurrence Intervals you want to plot
aris_to_plot = [1, 2, 5, 10, 25, 50, 100, 200, 500, 1000]  # Example: Multiple ARIs

# Set the position percentage for the maximum incremental depth block
position_percent = 50  # Example: 50%

# Set the total storm duration in hours
total_duration = 24  # Example: 24 hours

# Set the output directory where hyetograph CSV files are saved
output_dir = 'hyetographs'  # Ensure this matches the output directory used previously

# Plot the multiple hyetographs
plot_multiple_hyetographs(aris=aris_to_plot, 
                           position_percent=position_percent, 
                           total_duration=total_duration, 
                           output_dir=output_dir)


TO DO: REVISE BELOW TO RUN DAVIS AND EXTRACT RESULTS