In [2]:
from __future__ import annotations
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import xarray as xr
import fsspec
import s3fs
from datetime import date, datetime
from typing import Any, Dict, Iterable, List, Optional
import asyncio
from pathlib import Path
import zarr

In [3]:
NASA_POWER_BASE = "https://power.larc.nasa.gov/api/temporal/daily/point"

# Known daily Zarr roots (LST) for POWER ARD on AWS S3 (public/anonymous)
SYN1DAILY_ZARR_HINT = (
    "https://nasa-power.s3.us-west-2.amazonaws.com/"
    "syn1deg/temporal/power_syn1deg_daily_temporal_lst.zarr"
)
MERRA2DAILY_ZARR_HINT = (
    "https://nasa-power.s3.us-west-2.amazonaws.com/"
    "merra2/temporal/power_merra2_daily_temporal_lst.zarr"
)

# Default variable sets
SOLAR_VARS = ["ALLSKY_SFC_SW_DWN"]  # SRAD source (W m^-2) -> convert to MJ m^-2 d^-1
MET_VARS = ["T2M", "T2M_MAX", "T2M_MIN", "PRECTOTCORR", "T2MDEW", "WS2M", "RH2M"]

In [4]:
#helper functions

#find the daily LST zarr under a given prefix
def _discover_daily_zarr(prefix: str) -> str:
    """Discover a DAILY temporal LST Zarr under a given POWER product prefix.
    prefix examples: "nasa-power/syn1deg/temporal/" or "nasa-power/merra2/temporal/"
    Returns an HTTPS URL.
    """
    fs = s3fs.S3FileSystem(anon=True)
    keys = [p for p in fs.ls(prefix) if p.endswith(".zarr")]
    # Prefer names containing daily + temporal + lst
    for k in keys:
        low = k.lower()
        if ("daily" in low) and ("temporal" in low) and ("lst" in low):
            # Strip leading bucket name when forming HTTPS URL
            path = k.split("nasa-power/", 1)[1]
            return f"https://nasa-power.s3.us-west-2.amazonaws.com/{path}"
    # Fallback: if nothing matches, raise
    raise RuntimeError(f"No DAILY LST Zarr found under {prefix}")

def _open_power_zarr(zarr_url: str) -> xr.Dataset:
    store = fsspec.get_mapper(zarr_url)
    return xr.open_zarr(store, consolidated=True)


def _slice_point(ds: xr.Dataset,
                 latitude: float,
                 longitude: float,
                 start_date: date,
                 end_date: date,
                 variables: Iterable[str]) -> xr.Dataset:
    avail = [v for v in variables if v in ds.data_vars]
    if not avail:
        raise KeyError("None of the requested variables are present. Available examples: "
                       + ", ".join(list(ds.data_vars)[:25]))
    sub = ds[avail].sel(lat=latitude, lon=longitude, method="nearest").sel(
        time=slice(datetime.combine(start_date, datetime.min.time()),
                   datetime.combine(end_date, datetime.min.time()))
    )
    return sub

In [5]:
async def get_power_s3_daily(latitude: float,
                             longitude: float,
                             start_date: date,
                             end_date: date,
                             include_srad: bool = True,
                             include_met: bool = True,
                             syn1_url: Optional[str] = None,
                             merra2_url: Optional[str] = None):
    """Fetch daily data directly from POWER S3/Zarr (ARD), merging solar + meteorology.

    - Solar SRAD comes from SYN1deg: ALLSKY_SFC_SW_DWN (W m^-2) -> SRAD = *0.0864 (MJ m^-2 d^-1)
    - Meteorology (T2M_MAX, T2M_MIN, PRECTOTCORR, etc.) comes from MERRA-2.

    Returns a dict with `records` (list of per-day dictionaries) and metadata.
    """
    # Resolve URLs (try provided first; else discover; else fall back to hints)
    def _resolve_syn1() -> str:
        if syn1_url:
            return syn1_url
        try:
            return _discover_daily_zarr("nasa-power/syn1deg/temporal/")
        except Exception:
            return SYN1DAILY_ZARR_HINT

    def _resolve_merra2() -> str:
        if merra2_url:
            return merra2_url
        try:
            return _discover_daily_zarr("nasa-power/merra2/temporal/")
        except Exception:
            return MERRA2DAILY_ZARR_HINT

    out: Dict[str, Any] = {
        "source": "s3-zarr",
        "latitude": latitude,
        "longitude": longitude,
        "start": start_date.isoformat(),
        "end": end_date.isoformat(),
    }
    df = None
    try:
        # Open datasets (in threads to avoid blocking loop)
        ds_sol = None
        ds_met = None
        if include_srad:
            url_sol = _resolve_syn1()
            ds_sol = await asyncio.to_thread(_open_power_zarr, url_sol)
            out["syn1_url"] = url_sol
        if include_met:
            url_met = _resolve_merra2()
            ds_met = await asyncio.to_thread(_open_power_zarr, url_met)
            out["merra2_url"] = url_met

        # Slice
        
        if ds_met is not None:
            sub_met = await asyncio.to_thread(
                _slice_point, ds_met, latitude, longitude, start_date, end_date, MET_VARS
            )
            df_met = sub_met.to_dataframe().reset_index().rename(
                columns={"T2M_MAX": "TMAX", "T2M_MIN": "TMIN", "PRECTOTCORR": "RAIN"}
            )
            df = df_met
        if ds_sol is not None:
            sub_sol = await asyncio.to_thread(
                _slice_point, ds_sol, latitude, longitude, start_date, end_date, SOLAR_VARS
            )
            df_sol = sub_sol.to_dataframe().reset_index().rename(
                columns={"ALLSKY_SFC_SW_DWN": "SRAD_WM2"}
            )
            # Convert W/m^2 (mean power) to MJ/m^2/day
            df_sol["SRAD"] = df_sol["SRAD_WM2"].astype(float) * 0.0864
            df_sol = df_sol[["time", "SRAD"]]
            if df is None:
                df = df_sol
            else:
                df = pd.merge(df, df_sol, on="time", how="inner")

        if df is None:
            return {**out, "error": "No data sources selected: set include_srad and/or include_met."}
    except Exception as e:
        print(e)
    return df

In [6]:
df = await get_power_s3_daily(
        latitude=42.0,
        longitude=-93.5,
        start_date=date(2020, 1, 1),
        end_date=date(2020, 3, 31),
        include_srad=True,
        include_met=True
    )

In [7]:
df["date"] = pd.to_datetime(df["time"]).dt.strftime("%Y%m%d")

In [8]:
cols = ["date"] + [c for c in df.columns if c not in ("time", "lat", "lon", "date")]

In [9]:
for c in cols:
    if c != "date":
        try:
            df[c] = df[c].astype(float).round(1)
        except Exception:
            pass

In [22]:
latitude=42.0
longitude=-93.5
start_date=date(2020, 1, 1)
end_date=date(2020, 3, 31)

In [23]:
out: Dict[str, Any] = {
        "source": "s3-zarr",
        "latitude": latitude,
        "longitude": longitude,
        "start": start_date.isoformat(),
        "end": end_date.isoformat(),
    }

In [24]:
records = df[cols].to_dict(orient="records")
out["records"] = records
out["variables"] = [c for c in cols if c != "date"]

In [28]:
def convert_to_wth_format(data_dict: Dict[str, Any], 
                         station_name: str = "S3PWR",
                         elevation: float = 0.0) -> str:
    """Convert NASA POWER data to ICASA .wth format.
    
    Args:
        data_dict: Dictionary with 'records' key containing daily data
        station_name: 4-character station identifier
        elevation: Station elevation in meters
        
    Returns:
        String in ICASA .wth format
    """
    if "error" in data_dict:
        raise ValueError(f"Cannot convert data with error: {data_dict['error']}")
    
    records = data_dict.get("records", [])
    if not records:
        raise ValueError("No data records found")
    
    # Extract metadata
    latitude = data_dict.get("latitude", 0.0)
    longitude = data_dict.get("longitude", 0.0)
    
    # Build header
    wth_lines = []
    wth_lines.append("*WEATHER DATA : NASA POWER via S3/Zarr")
    wth_lines.append("")
    wth_lines.append("@ INSI      LAT     LONG  ELEV   TAV   AMP REFHT WNDHT")
    wth_lines.append(f"  {station_name:>4} {latitude:8.3f} {longitude:8.3f} {elevation:5.0f}  -99.0  -99.0  -99.0  -99.0")
    wth_lines.append("")
    
    # Determine available variables and create header
    sample_record = records[0]
    variable_map = {
        'T2M': 'T2M',    # Average temperature (°C)
        'TMAX': 'TMAX',  # Maximum temperature (°C)
        'TMIN': 'TMIN',  # Minimum temperature (°C)
        'RAIN': 'RAIN',  # Precipitation (mm)
        'SRAD': 'SRAD',  # Solar radiation (MJ/m²/day)
        'T2MDEW': 'TDEW', # Dew point temperature (°C)
        'WS2M': 'WIND',   # Wind speed (m/s)
        'RH2M': 'RH2M'    # Relative humidity (%)
    }
    
    # Find which variables are available
    available_vars = []
    header_vars = ['DATE']
    for nasa_var, icasa_var in variable_map.items():
        if nasa_var in sample_record:
            available_vars.append((nasa_var, icasa_var))
            header_vars.append(icasa_var)
    
    # Add data header
    wth_lines.append("@  DATE" + "".join(f"{var:>8}" for var in header_vars[1:]))
    
    # Add data records
    for record in records:
        date_str = record['date']
        # Format: YYDDD (2-digit year + day of year)
        year = int(date_str[:4])
        month = int(date_str[4:6])
        day = int(date_str[6:8])
        
        # Calculate day of year
        date_obj = datetime(year, month, day)
        day_of_year = date_obj.timetuple().tm_yday
        
        formatted_date = f"{year}{day_of_year:03d}"
        
        # Build data line
        data_line = f"{formatted_date:>7}"
        for nasa_var, icasa_var in available_vars:
            value = record.get(nasa_var, -99.0)
            if value is None or pd.isna(value):
                value = -99.0
            data_line += f"{value:8.1f}"
        
        wth_lines.append(data_line)
    
    return "\n".join(wth_lines)

In [29]:
data_dict = out
station_name = "NASA"
elevation = 40.0
print(convert_to_wth_format(data_dict, station_name, elevation))

*WEATHER DATA : NASA POWER via S3/Zarr

@ INSI      LAT     LONG  ELEV   TAV   AMP REFHT WNDHT
  NASA   42.000  -93.500    40  -99.0  -99.0  -99.0  -99.0

@  DATE     T2M    TMAX    TMIN    RAIN    SRAD    TDEW    WIND    RH2M
2020001    -1.2     4.9    -7.1     0.4     4.6    -2.4     4.1    91.7
2020002     0.3     5.0    -3.6     5.1     5.6    -0.5     2.7    94.5
2020003    -1.2     2.2    -3.8     2.3     2.7    -2.2     3.3    93.5
2020004    -3.6     0.7    -7.5     0.0     2.7    -6.1     3.5    84.8
2020005     0.2     5.0    -3.7     0.0     7.4    -2.5     6.0    83.5
2020006    -1.4     2.2    -4.0     0.3     4.0    -3.4     1.5    87.2
2020007    -2.9     3.0    -7.4     0.0     6.5    -5.5     3.6    84.3
2020008    -7.5    -3.3   -12.6     0.0     8.2   -12.5     5.5    67.6
2020009     2.5    10.5    -2.8     0.0     5.8     1.0     5.9    90.4
2020010    -4.3    -0.8    -9.2     2.8     4.8    -7.6     5.7    78.8
2020011   -10.4    -6.7   -13.1     0.1     7.5   -13

In [30]:
print(out)

{'source': 's3-zarr', 'latitude': 42.0, 'longitude': -93.5, 'start': '2020-01-01', 'end': '2020-03-31', 'records': [{'date': '20200101', 'T2M': -1.2, 'TMAX': 4.9, 'TMIN': -7.1, 'RAIN': 0.4, 'T2MDEW': -2.4, 'WS2M': 4.1, 'RH2M': 91.7, 'SRAD': 4.6}, {'date': '20200102', 'T2M': 0.3, 'TMAX': 5.0, 'TMIN': -3.6, 'RAIN': 5.1, 'T2MDEW': -0.5, 'WS2M': 2.7, 'RH2M': 94.5, 'SRAD': 5.6}, {'date': '20200103', 'T2M': -1.2, 'TMAX': 2.2, 'TMIN': -3.8, 'RAIN': 2.3, 'T2MDEW': -2.2, 'WS2M': 3.3, 'RH2M': 93.5, 'SRAD': 2.7}, {'date': '20200104', 'T2M': -3.6, 'TMAX': 0.7, 'TMIN': -7.5, 'RAIN': 0.0, 'T2MDEW': -6.1, 'WS2M': 3.5, 'RH2M': 84.8, 'SRAD': 2.7}, {'date': '20200105', 'T2M': 0.2, 'TMAX': 5.0, 'TMIN': -3.7, 'RAIN': 0.0, 'T2MDEW': -2.5, 'WS2M': 6.0, 'RH2M': 83.5, 'SRAD': 7.4}, {'date': '20200106', 'T2M': -1.4, 'TMAX': 2.2, 'TMIN': -4.0, 'RAIN': 0.3, 'T2MDEW': -3.4, 'WS2M': 1.5, 'RH2M': 87.2, 'SRAD': 4.0}, {'date': '20200107', 'T2M': -2.9, 'TMAX': 3.0, 'TMIN': -7.4, 'RAIN': 0.0, 'T2MDEW': -5.5, 'WS2M': 3.