In [None]:
"""
Drought indicators from P75 daily climatology CSVs (not raw daily series).

Pipeline (per grid cell):
1) Convert precipitation from kg m^-2 s^-1 to mm/day; shift longitudes 0–360 -> -180–180.
2) Map day_of_year -> TRUE calendar months using a NON-LEAP year reference (2021).
   (If DOY=366 appears, it is assigned to December.)
3) Aggregate daily precipitation to MONTHLY TOTALS for historical (2015–2030) and future (2080–2100).
4) Compute monthly anomaly: future - historical.
5) Sort by (lat, lon, month) and compute a 12-month cumulative anomaly (one-year sum over the climatological cycle).
6) Mean annual rainfall per period = SUM of that period's 12 monthly totals.
7) DSI-12 (%) = cumulative anomaly / mean annual rainfall_of_respective_period * 100.
8) Indicators per location:
   - Probability future DSI-12 exceeds the historical max DSI-12.
   - Change factor = (max future DSI-12) / (max historical DSI-12).
"""

import argparse
import os
import pandas as pd

def doy_to_month_nonleap(series: pd.Series) -> pd.Series:
    """
    Map day_of_year (1..365/366) to calendar month (1..12) using a NON-LEAP year (2021).
    If 366 is present, map it to December (month=12).
    """
    base = pd.Timestamp('2021-01-01')  # non-leap year
    # Vectorized approach with a safe fallback for 366
    s = series.astype(int)
    months = (base + pd.to_timedelta((s.clip(upper=365) - 1), unit='D')).month
    # Any DOY==366 becomes December
    months = months.where(s != 366, other=12)
    return months

def main():
    ap = argparse.ArgumentParser(description="Compute drought indicators (probability & change factor) from P75 climatology CSVs.")
    ap.add_argument("--historical_csv", required=True, help="Historical P75 daily climatology CSV (2015–2030).")
    ap.add_argument("--future_csv", required=True, help="Future P75 daily climatology CSV (2080–2100).")
    ap.add_argument("--output_csv", required=True, help="Output CSV path for drought indicators.")
    args = ap.parse_args()

    # ----------------------------
    # Load data
    # ----------------------------
    hist = pd.read_csv(args.historical_csv)
    fut  = pd.read_csv(args.future_csv)

    # Basic checks
    for df_name, df in [("historical", hist), ("future", fut)]:
        if not {'lat','lon','day_of_year','pr'}.issubset(df.columns):
            missing = {'lat','lon','day_of_year','pr'} - set(df.columns)
            raise ValueError(f"{df_name} CSV missing columns: {missing}")

    # ----------------------------
    # Units & longitude
    # ----------------------------
    # pr: kg m^-2 s^-1 -> mm/day
    hist['pr'] *= 86400.0
    fut['pr']  *= 86400.0

    # longitudes: 0–360 -> -180–180
    for df in (hist, fut):
        df['lon'] = df['lon'].apply(lambda x: x - 360 if x > 180 else x)

    # ----------------------------
    # True calendar months from DOY (non-leap reference)
    # ----------------------------
    hist['month'] = doy_to_month_nonleap(hist['day_of_year'])
    fut['month']  = doy_to_month_nonleap(fut['day_of_year'])

    # ----------------------------
    # Monthly totals (sum daily -> monthly)
    # ----------------------------
    hist_m = (
        hist.groupby(['lat', 'lon', 'month'], as_index=False)['pr']
            .sum()
            .rename(columns={'pr': 'pr_hist'})
    )
    fut_m = (
        fut.groupby(['lat', 'lon', 'month'], as_index=False)['pr']
           .sum()
           .rename(columns={'pr': 'pr_fut'})
    )

    # ----------------------------
    # Merge & monthly anomalies
    # ----------------------------
    merged = pd.merge(hist_m, fut_m, on=['lat', 'lon', 'month'], how='inner')
    merged['pr_anomaly'] = merged['pr_fut'] - merged['pr_hist']

    # Ensure chronological order before rolling
    merged = merged.sort_values(['lat', 'lon', 'month'])

    # ----------------------------
    # 12-month cumulative anomaly (one-year sum over the 12 climatological months)
    # ----------------------------
    merged['cumulative_anomaly'] = (
        merged.groupby(['lat','lon'])['pr_anomaly']
              .rolling(window=12, min_periods=1)
              .sum()
              .reset_index(level=[0,1], drop=True)
    )

    # ----------------------------
    # Mean annual rainfall per period = SUM of 12 monthly totals
    # ----------------------------
    mean_annual_hist = (
        hist_m.groupby(['lat','lon'], as_index=False)['pr_hist']
              .sum()
              .rename(columns={'pr_hist': 'mean_annual_rainfall_historical'})
    )
    mean_annual_fut = (
        fut_m.groupby(['lat','lon'], as_index=False)['pr_fut']
             .sum()
             .rename(columns={'pr_fut': 'mean_annual_rainfall_future'})
    )
    rainfall = pd.merge(mean_annual_hist, mean_annual_fut, on=['lat','lon'], how='inner')

    merged = pd.merge(merged, rainfall, on=['lat','lon'], how='left')

    # ----------------------------
    # DSI-12 (%) normalized by respective period mean annual rainfall
    # ----------------------------
    merged['DSI-12_hist'] = (merged['cumulative_anomaly'] / merged['mean_annual_rainfall_historical']) * 100.0
    merged['DSI-12_fut']  = (merged['cumulative_anomaly'] / merged['mean_annual_rainfall_future']) * 100.0

    # ----------------------------
    # Max DSI-12 per period & indicators
    # ----------------------------
    max_hist = (
        merged.groupby(['lat','lon'], as_index=False)['DSI-12_hist']
              .max()
              .rename(columns={'DSI-12_hist':'max_historical_dsi12'})
    )
    max_fut = (
        merged.groupby(['lat','lon'], as_index=False)['DSI-12_fut']
              .max()
              .rename(columns={'DSI-12_fut':'max_future_dsi12'})
    )

    merged = pd.merge(merged, max_hist, on=['lat','lon'], how='left')
    merged = pd.merge(merged, max_fut, on=['lat','lon'], how='left')

    # Probability that future monthly DSI-12 exceeds the max historical DSI-12
    merged['above_max_historical'] = (merged['DSI-12_fut'] > merged['max_historical_dsi12']).astype(int)
    probability = (
        merged.groupby(['lat','lon'], as_index=False)['above_max_historical']
              .mean()
              .rename(columns={'above_max_historical':'prob_future_gt_hist_max'})
    )

    # Change factor of maximum drought severity
    indicators = pd.merge(max_hist, max_fut, on=['lat','lon'], how='inner')
    indicators['change_factor'] = indicators['max_future_dsi12'] / indicators['max_historical_dsi12']

    # ----------------------------
    # Final output
    # ----------------------------
    final = pd.merge(probability, indicators, on=['lat','lon'], how='inner')
    os.makedirs(os.path.dirname(args.output_csv), exist_ok=True)
    final.to_csv(args.output_csv, index=False)
    print(f"Saved drought indicators to: {args.output_csv}")

if __name__ == "__main__":
    main()