# Packages

In [39]:
import os
import glob
import pandas as pd

# Functions and constants

In [40]:
# Function to filter extreme rainfall events
def filter_extreme_rainfall(file_path, output_dir):
    # Read the Excel file
    df = pd.read_csv(file_path)

    # Ensure the required columns exist
    required_columns = {"date", "station", "rr"}
    if not required_columns.issubset(df.columns):
        print(f"⚠ Warning: Missing columns in {file_path}. Skipping this file.")
        return

    # Filter data based on the station's threshold
    df_filtered = df[df.apply(lambda row: row["rr"] >= threshold_dict.get(row["station"], float('inf')), axis=1)]

    # Skip saving if no rows match
    if df_filtered.empty:
        print(f"⚠ No extreme rainfall events found in {file_path}. Skipping save.")
        return

    # Save the filtered data
    output_file = os.path.join(output_dir, os.path.basename(file_path))  # Keep the same filename
    df_filtered.to_csv(output_file, index=False)

    print(f"✅ Filtered data saved: {output_file}")

# Read data

In [43]:
# Define folder paths
path = "/Users/edolatamafebrinal/Library/CloudStorage/OneDrive-MonashUniversity/Study@Monash/Semester 3/Data Analysis in Earth Sciences/github/daes_project/precipitation/"
input_dir = os.path.join(path, "10m_precipitation")  # Directory with rainfall data
output_dir = os.path.join(path, "extreme_days")  # Directory to save filtered data
threshold_file = os.path.join(path, "percentile.csv")  # Threshold file

# Ensure output directory exists
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Load 99.9th percentile threshold data
threshold_df = pd.read_csv(threshold_file)
threshold_df

Unnamed: 0,station,99.9th percentile
0,AAWS Balitbu Sumani,4.1
1,AAWS GAW Bukit Kototabang,5.7
2,AAWS Harau (AWS ),6.2
3,AAWS Staklim Padang Pariaman,10.4
4,ARG Ampek Nagari,9.2
5,ARG Batang Kapas,7.6
6,ARG Guguak,6.6
7,ARG Kapur IX,10.2
8,ARG Linggosari Baganti,8.4
9,ARG Rao Pasaman,6.2


# Analysis

In [44]:
# Convert stations to a dictionary for easy lookup
threshold_dict = dict(zip(threshold_df["station"], threshold_df["99.9th percentile"]))


# Process each file in the input directory
for file_name in os.listdir(input_dir):
    if file_name.endswith('.csv'):  # Ensure we only process Excel files
        file_path = os.path.join(input_dir, file_name)
        filter_extreme_rainfall(file_path, output_dir)

print("🚀 Processing complete! All extreme rainfall events have been saved.")

✅ Filtered data saved: /Users/edolatamafebrinal/Library/CloudStorage/OneDrive-MonashUniversity/Study@Monash/Semester 3/Data Analysis in Earth Sciences/github/daes_project/precipitation/extreme_days/AWS PASAMAN BARAT_160040_2015-2023.csv
✅ Filtered data saved: /Users/edolatamafebrinal/Library/CloudStorage/OneDrive-MonashUniversity/Study@Monash/Semester 3/Data Analysis in Earth Sciences/github/daes_project/precipitation/extreme_days/ARG LINGGOSARI BAGANTI_150145_2015-2023.csv
✅ Filtered data saved: /Users/edolatamafebrinal/Library/CloudStorage/OneDrive-MonashUniversity/Study@Monash/Semester 3/Data Analysis in Earth Sciences/github/daes_project/precipitation/extreme_days/ARG SMPK TANAH DATAR_STA3258_2015-2023.csv
✅ Filtered data saved: /Users/edolatamafebrinal/Library/CloudStorage/OneDrive-MonashUniversity/Study@Monash/Semester 3/Data Analysis in Earth Sciences/github/daes_project/precipitation/extreme_days/ARG SOLOK_STA0091_2015-2023.csv
✅ Filtered data saved: /Users/edolatamafebrinal/Li

# Plots