In [1]:
import os
import pandas as pd
import glob

## Compute multi-indicator agreement at event-level

In [11]:
# Directory containing the CSV files
input_dir = 'path/Flash_Dr_CONUS/FDextracted/'
output_dir = 'path'

In [3]:
# Define method columns (update this with your actual method column names)
method_columns = [
    'Alencar et al.', 'Noguera et al.', 'Osman et al.',
    'Ford and Labosier', 'Pendergrass et al.', 'Christian et al.'
]

In [6]:
# Part 1: Create event columns for all CSVs
def create_event_columns(file):
    df = pd.read_csv(file)
    huc4 = os.path.basename(file).split('_')[-1].split('.')[0]

    for i in range(1, len(method_columns) + 1):
        col_name = f'event_{i}_methods'
        df[col_name] = (df[method_columns].sum(axis=1) >= i).astype(int)

    output_path = os.path.join(output_dir, f"result_HUC4_{huc4}_with_multiple_event_columns.csv")
    df.to_csv(output_path, index=False)

# Apply to all input files
for file in glob.glob(os.path.join(input_dir, "result_HUC4_*.csv")):
    create_event_columns(file)

print("Event columns created and files saved.")

# Part 2: Analyze event duration and frequency
event_cols = [f'event_{i}_methods' for i in range(1, 7)]
summary_results = []

Event columns created and files saved.


In [7]:
def summarize_events(file):
    huc4 = os.path.basename(file).split('_')[2]
    df = pd.read_csv(file, usecols=['Date'] + event_cols)
    df['Date'] = pd.to_datetime(df['Date'])

    for col in event_cols:
        durations, num_events = [], 0
        current_start = None

        for date, value in zip(df['Date'], df[col]):
            if value == 1 and current_start is None:
                current_start = date
                num_events += 1
            elif value == 0 and current_start is not None:
                durations.append((date - current_start).days)
                current_start = None

        if current_start is not None:
            durations.append((df['Date'].iloc[-1] - current_start).days)

        summary_results.append({
            'huc4_code': huc4,
            'event_type': col,
            'number_of_events': num_events,
            'mean_duration': sum(durations) / len(durations) if durations else 0
        })

In [8]:
# Apply to all output files with event columns
for file in glob.glob(os.path.join(output_dir, "result_HUC4_*_with_multiple_event_columns.csv")):
    summarize_events(file)

In [None]:
# Save results
results_df = pd.DataFrame(summary_results)
results_df = results_df.iloc[:-6]  # remove last 6 rows as requested
summary_file = os.path.join(input_dir, 'huc4_event_summary_by_method_threshold.csv')
results_df.to_csv(summary_file, index=False)

print(f"Event summary saved to {summary_file}")

Event summary saved to C:/Users/gqg5266/OneDrive - The Pennsylvania State University/PennState/FlashDrought_main/Flash_Dr_CONUS/FDextracted_v5_2025/huc4_event_summary_by_method_threshold.csv
