In [None]:
"""
NLDAS Noah Actual Evapotranspiration (ET) Download for Iowa (2019-2023)

This notebook downloads NLDAS-2 Noah Land Surface Model monthly actual
evapotranspiration data for Iowa using NASA's earthaccess library.

PURPOSE:
--------
Download actual ET from the Noah LSM to compare with NLDAS forcing PET
for irrigation detection:
    PET - ET = Irrigation signal (human-induced ET)

Where ET > PET, water inputs beyond atmospheric demand indicate irrigation.

Dataset: NLDAS Noah Land Surface Model L4 Monthly 0.125 x 0.125 degree V2.0
Short Name: NLDAS_NOAH0125_M
DOI: 10.5067/WB224IA3PVOJ
Variable: EVPsfc (Total Evapotranspiration)
Units: kg/m2 (monthly accumulated, equivalent to mm)

Noah ET Components (energy units, W/m2):
  - EVBSsfc: Direct soil evaporation
  - EVCWsfc: Canopy water evaporation
  - TRANSsfc: Plant transpiration
  - SBSNOsfc: Snow sublimation

Note: The monthly data is accumulated from hourly Noah model output.
      EVPsfc is already a monthly total in kg/m2 (= mm water equivalent).
      No rate conversion is needed.

Requirements:
- earthaccess library
- NASA Earthdata account (~/.netrc)

Adapted for SIF-ET-Irrigation Analysis Project - Iowa 2019-2023
"""

In [None]:
# Install earthaccess if needed
# pip install earthaccess

import earthaccess
print(f"earthaccess version: {earthaccess.__version__}")

In [None]:
from pathlib import Path

# Authenticate with NASA Earthdata (uses ~/.netrc credentials)
auth = earthaccess.login()

# =============================================================================
# Configuration for Iowa 2019-2023
# =============================================================================

# Full 5-year temporal range
start_year = 2019
end_year = 2023
temporal_range = (f"{start_year}-01-01", f"{end_year}-12-31")

# Iowa bounding box (west, south, east, north)
iowa_bbox = (-96.64, 40.38, -90.14, 43.50)

# Project paths - relative to notebook location
project_root = Path("../..").resolve()
local_path = project_root / "data" / "raw" / "NLDAS_Noah"
local_path.mkdir(parents=True, exist_ok=True)

print(f"Project root: {project_root}")
print(f"Download path: {local_path}")
print(f"Temporal range: {temporal_range}")
print(f"Expected granules: {(end_year - start_year + 1) * 12} (12 months x 5 years)")
print(f"Bounding box (Iowa): {iowa_bbox}")

# =============================================================================
# Search and Download NLDAS Noah LSM Monthly Data
# =============================================================================

print(f"\nSearching NLDAS Noah LSM monthly data...")

results = earthaccess.search_data(
    doi="10.5067/WB224IA3PVOJ",  # NLDAS Noah Land Surface Model L4 Monthly
    temporal=temporal_range,
    bounding_box=iowa_bbox
)

print(f"Found {len(results)} granules")

if results:
    downloaded_files = earthaccess.download(results, local_path=str(local_path))
    print(f"\nDownloaded {len(downloaded_files)} files to: {local_path}")
else:
    print("No data found for the specified parameters")

print("\nDownload complete!")

In [None]:
"""
Process and Clip NLDAS Noah Actual ET Data to Iowa Boundary

This cell:
1. Loads each downloaded Noah LSM NetCDF file
2. Extracts the total evapotranspiration (EVPsfc) variable
3. Clips to Iowa state boundary
4. Saves clipped files and computes annual summaries

EVPsfc units: kg/m2 (monthly accumulated total, equivalent to mm/month)
No rate conversion needed.
"""

import xarray as xr
import rioxarray as rxr
import geopandas as gpd
import pandas as pd
from pathlib import Path

# =============================================================================
# File Paths
# =============================================================================

project_root = Path("../..").resolve()

input_folder = project_root / "data" / "raw" / "NLDAS_Noah"
output_folder = project_root / "data" / "processed" / "NLDAS_Noah_Iowa"
output_folder.mkdir(parents=True, exist_ok=True)

iowa_boundary = project_root / "data" / "aoi" / "iowa.geojson"

print(f"Input folder: {input_folder}")
print(f"Output folder: {output_folder}")

nc_files = sorted(input_folder.glob("*.nc"))
print(f"Found {len(nc_files)} NetCDF files to process")

# Inspect variables in first file
if nc_files:
    ds_sample = xr.open_dataset(nc_files[0])
    print(f"\nVariables in dataset: {list(ds_sample.data_vars)}")
    if "EVPsfc" in ds_sample:
        print(f"EVPsfc attributes: {ds_sample['EVPsfc'].attrs}")
    ds_sample.close()

# =============================================================================
# Load Iowa Boundary
# =============================================================================

gdf = gpd.read_file(iowa_boundary)
print(f"\nIowa boundary CRS: {gdf.crs}")

# =============================================================================
# Process Each NetCDF File - Extract EVPsfc
# =============================================================================

all_clipped = []

for file in nc_files:
    try:
        ds = xr.open_dataset(file)
        data = ds["EVPsfc"]
        
        # Set CRS
        data = data.rio.write_crs("EPSG:4326")
        
        # Clip to Iowa
        gdf_projected = gdf.to_crs(data.rio.crs)
        clipped = data.rio.clip(gdf_projected.geometry, gdf_projected.crs, drop=True)
        
        all_clipped.append(clipped)
        
        # Save clipped file
        filename = file.stem + "_EVPsfc_Iowa.nc"
        output_path = output_folder / filename
        clipped.to_netcdf(output_path)
        
        print(f"Processed: {file.name} -> {filename}")
        
    except Exception as e:
        print(f"Error processing {file.name}: {e}")

print(f"\nProcessed {len(all_clipped)} files total")

# =============================================================================
# Compute Annual Summaries (2019-2023)
# =============================================================================

if all_clipped:
    combined = xr.concat(all_clipped, dim="time")
    
    # Overall mean monthly ET across all 60 months
    mean_et = combined.mean(dim="time", keep_attrs=True)
    if 'lat' in mean_et.dims and mean_et.lat[0] < mean_et.lat[-1]:
        mean_et = mean_et.sortby('lat', ascending=False)
    
    mean_output = output_folder / "ET_mean_monthly_2019_2023_Iowa.tif"
    mean_et.rio.to_raster(mean_output)
    print(f"\nSaved 5-year mean monthly ET: {mean_output}")
    
    print(f"\nMean monthly ET: {float(mean_et.mean()):.1f} mm/month")
    print(f"Note: EVPsfc values are monthly totals in kg/m2 (= mm)")

print("\nProcessing complete!")

In [None]:
"""
Visualize Noah Actual Evapotranspiration for Iowa (2019-2023)
"""

import matplotlib.pyplot as plt
import numpy as np

# Quick sanity check on the mean ET values
print(f"Mean monthly ET statistics (mm/month):")
print(f"  Min: {float(mean_et.min()):.1f}")
print(f"  Max: {float(mean_et.max()):.1f}")
print(f"  Mean: {float(mean_et.mean()):.1f}")

# Plot mean monthly ET
fig, ax = plt.subplots(figsize=(10, 7))

mean_et.plot(
    ax=ax,
    cmap='YlGnBu',
    cbar_kwargs={'label': 'Mean Monthly ET (mm/month)'}
)

ax.set_title('NLDAS Noah Mean Monthly ET - Iowa (2019-2023)', fontsize=14, fontweight='bold')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')

plt.tight_layout()
plt.show()

print(f"\nNote: These are actual ET values from the Noah Land Surface Model.")
print(f"Compare with NLDAS forcing PET to compute irrigation signal:")
print(f"  PET - ET = Irrigation signal")