# Packages

In [1]:
import os
import glob
import pandas as pd

# Functions and constants

# Read data

In [8]:
# Define the input directory
path = "/Users/edolatamafebrinal/Library/CloudStorage/OneDrive-MonashUniversity/Study@Monash/Semester 3/Data Analysis in Earth Sciences/github/daes_project/precipitation/"

# Define folder paths
input_dir = os.path.join(path, "extreme_days")  # Directory with filtered rainfall data
output_dir = os.path.join(path, "event_number")  # Directory to save results
output_file = os.path.join(output_dir, "monthly_event_number.csv")  # Output file path

# Ensure output directory exists
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Analysis

In [11]:
# Initialize an empty dictionary to store counts per station
monthly_counts = {}

# Iterate through each file in extreme_days (filtered extreme events)
for file_name in os.listdir(input_dir):
    if file_name.endswith('.csv'):  # Ensure only Excel files are processed
        file_path = os.path.join(input_dir, file_name)
        df = pd.read_csv(file_path)

        # Ensure necessary columns exist
        if {"date", "station", "rr"}.issubset(df.columns):
            # Extract station name from the file
            station_name = df["station"].iloc[0]

            # Convert 'Tanggal' to datetime format & extract **only month (MM)**
            df["Month"] = pd.to_datetime(df["date"]).dt.strftime('%m')  # '01' to '12'

            # Count occurrences per month (ignoring year)
            month_counts = df.groupby("Month")["rr"].count()

            # Store in dictionary (each station as a column)
            monthly_counts[station_name] = month_counts
        else:
            print(f"⚠ Warning: Missing columns in {file_name}. Skipping.")

# Convert dictionary to DataFrame & fill missing months with 0
df_monthly = pd.DataFrame(monthly_counts).fillna(0)

# Add "Monthly Events Number" (sum across stations)
df_monthly["Monthly Event Number"] = df_monthly.sum(axis=1)

# Reset index to make 'Month' the first column
df_monthly.reset_index(inplace=True)
df_monthly.rename(columns={"index": "Month"}, inplace=True)

# Ensure months are sorted correctly
df_monthly = df_monthly.sort_values(by="Month").reset_index(drop=True)

# Save to Excel
df_monthly.to_csv(output_file, index=False)

print(f"✅ Monthly events number saved: {output_file}")

✅ Monthly events number saved: /Users/edolatamafebrinal/Library/CloudStorage/OneDrive-MonashUniversity/Study@Monash/Semester 3/Data Analysis in Earth Sciences/github/daes_project/precipitation/event_number/monthly_event_number.csv


# Plots