In [1]:
import xarray as xr
import numpy as np
import os
from glob import glob
from tqdm import tqdm
import pandas as pd
import math
from multiprocess import Pool
import warnings

# Suppress all warnings
warnings.filterwarnings('ignore', category=FutureWarning)
## my own directory
os.chdir("/g/data/k10/dl6968/Semi-variogram_AU/")

In [2]:
## daily
df = pd.read_csv("./data/BoM_daily_stations_500km.csv")
## mannually remove some faulty stations
df = df[(df['ID'] != 40592) & (df['ID'] != 40593) & (df['ID'] != 58090) & (df["End_Year"]>=1960)]
daily_lat = []
daily_lon = []
for i in range(0, len(df)):
    daily_lat.append(df["Latitude"].iloc[i])
    daily_lon.append(df["Longitude"].iloc[i])

In [3]:
percentile = 0.90

## make a dataset for percentile first

In [4]:
# Dictionary to store 90th percentile values for each month
stn_bom_p90 = {"ID": [], "DJF": [], "MAM": [], "JJA": [], "SON": []}

faulty_stn = []

for stn_id in tqdm(df["ID"], leave=True, position=0):
    bom_id = str(stn_id).zfill(6)
    
    try:
        ds = xr.open_dataset(f"/g/data/k10/dl6968/BoM_daily_station/prcp_pc_ts_qc/{bom_id}.nc")
    except:
        faulty_stn.append(stn_id)
        continue
    
    precip = ds["prcp"].sel(time=slice("1940-03-02", "2024-06-30"))
    
    # Filter rainy days (precip > 1 mm)
    rainy_days = precip.where(precip > 1, drop=True)
    
    # If there are no rainy days, store -1 for all months
    if len(rainy_days) == 0:
        stn_bom_p90["ID"].append(stn_id)
        for month in ["DJF", "MAM", "JJA", "SON"]:
            stn_bom_p90[month].append(-1)
    else:
        stn_bom_p90["ID"].append(stn_id)
        p90_season = rainy_days.groupby('time.season').quantile(percentile, dim='time', skipna=True)
        # Store results for each month
        for season_num, season_name in enumerate(["DJF", "MAM", "JJA", "SON"], start=1):
            try:
                stn_bom_p90[season_name].append(p90_season.sel(season=season_name).values)
            except:
                stn_bom_p90[season_name].append(-1)

    ds.close()


100%|██████████| 9835/9835 [12:20<00:00, 13.29it/s]  


In [5]:
df_p90 = pd.DataFrame.from_dict(stn_bom_p90)

In [6]:
df_p90.to_csv("./data/BoM_stn_p90_seasonally.csv")