In [3]:
import pandas as pd
import numpy as np
import pandas as pd
from pathlib import Path

In [4]:
dir_sfcwind = Path("../data/processed/wind_srepi")
files_sfcwind = list(dir_sfcwind.glob("*"))
dict_wind = {file.stem: pd.read_parquet(file) for file in files_sfcwind}

In [None]:
def identify_drought_events(
    df: pd.DataFrame,
    srepi_col: str,
    time_col: str = "time",
    q: float = 0.10,
    min_duration_hours: int = 2,
    max_duration_hours: int = 15 * 24,
):

    d = df.copy()
    d[time_col] = pd.to_datetime(d[time_col])
    d = d.sort_values(time_col)

    threshold = d[srepi_col].quantile(q)

    d["is_drought"] = d[srepi_col] <= threshold

    d["grp"] = (d["is_drought"] != d["is_drought"].shift()).cumsum()

    events = []

    for _, g in d.groupby("grp"):
        if not g["is_drought"].iloc[0]:
            continue

        start = g[time_col].iloc[0]
        end   = g[time_col].iloc[-1]
        duration_hours = (end - start).total_seconds() / 3600.0

        if duration_hours < min_duration_hours:
            continue
        if duration_hours > max_duration_hours:
            continue

        events.append({
            "start_time": start,
            "end_time": end,
            "duration_hours": duration_hours,
            "n_points": len(g),
            "min_srepi": g[srepi_col].min(),
            "mean_srepi": g[srepi_col].mean(),
            "threshold": threshold,
        })

    events_df = pd.DataFrame(events)
    return events_df, threshold


In [None]:
def drought_events_all_sites(
    dic_wind_srepi: dict,
    windows: list[str],
    time_col: str = "time",
    q: float = 0.10,
    min_duration_hours: int = 2,
    max_duration_hours: int = 15 * 24,
):

    events_by_window = {w: [] for w in windows}
    thresholds = {w: {} for w in windows}

    for site, df in dic_wind_srepi.items():
        for w in windows:
            srepi_col = f"srepi_w{w}"

            events_df, thr = identify_drought_events(
                df=df,
                srepi_col=srepi_col,
                time_col=time_col,
                q=q,
                min_duration_hours=min_duration_hours,
                max_duration_hours=max_duration_hours,
            )

            if not events_df.empty:
                events_df["site"] = site
                events_df["window"] = w
                events_by_window[w].append(events_df)

            thresholds[w][site] = thr

    for w in windows:
        if len(events_by_window[w]) > 0:
            events_by_window[w] = pd.concat(events_by_window[w], ignore_index=True)
        else:
            events_by_window[w] = pd.DataFrame()

    return events_by_window, thresholds


In [7]:
WINDOWS_WIND = ["1h", "4h", "12h", "24h", "48h", "72h", "120h"]

wind_events_by_window, wind_thresholds = drought_events_all_sites(
    dict_wind,
    windows=WINDOWS_WIND,
    time_col="time",
    q=0.10,
    min_duration_hours=2,
    max_duration_hours=15*24,
)


In [None]:

def merge_drought_events(events_by_window: dict) -> pd.DataFrame:

    parts = []

    for window, df in events_by_window.items():
        if df is None or df.empty:
            continue

        d = df.copy()
        d["window"] = window 
        parts.append(d)

    if len(parts) == 0:
        return pd.DataFrame()

    out = pd.concat(parts, ignore_index=True)

    for col in ["start_time", "end_time"]:
        if col in out.columns:
            out[col] = pd.to_datetime(out[col], errors="coerce")

    return out
wind_events = merge_drought_events(wind_events_by_window)

In [16]:
wind_events

Unnamed: 0,start_time,end_time,duration_hours,n_points,min_srepi,mean_srepi,threshold,site,window
0,2014-01-06 11:00:00,2014-01-06 14:00:00,3.0,4,-1.097019,-1.097019,-1.097019,0WAMBOWF,1h
1,2014-01-11 17:00:00,2014-01-11 23:00:00,6.0,7,-1.097019,-1.097019,-1.097019,0WAMBOWF,1h
2,2014-01-12 17:00:00,2014-01-12 20:00:00,3.0,4,-1.097019,-1.097019,-1.097019,0WAMBOWF,1h
3,2014-01-18 01:00:00,2014-01-18 03:00:00,2.0,3,-1.097019,-1.097019,-1.097019,0WAMBOWF,1h
4,2014-01-18 17:00:00,2014-01-19 01:00:00,8.0,9,-1.097019,-1.097019,-1.097019,0WAMBOWF,1h
...,...,...,...,...,...,...,...,...,...
464523,2023-07-25 17:00:00,2023-07-27 07:00:00,38.0,39,-1.734504,-1.528800,-1.280706,YSWF,120h
464524,2023-08-16 04:00:00,2023-08-16 23:00:00,19.0,20,-1.449346,-1.355444,-1.280706,YSWF,120h
464525,2023-10-10 07:00:00,2023-10-11 08:00:00,25.0,26,-1.658804,-1.505744,-1.280706,YSWF,120h
464526,2023-11-21 03:00:00,2023-11-22 07:00:00,28.0,29,-1.532508,-1.431066,-1.280706,YSWF,120h


In [17]:
wind_events.to_parquet("../data/curated/wind_drought_events.parquet")