In [None]:
"""
NLDAS Potential Evaporation (PET) Download for Iowa (2023)

This notebook downloads NLDAS (North American Land Data Assimilation System) 
monthly potential evaporation data for Iowa using NASA's earthaccess library.

PURPOSE:
--------
Download PET to compare with ECOSTRESS actual ET:
    PET - ET = Irrigation signal (human-induced ET)

Where ET > PET, water inputs beyond atmospheric demand indicate irrigation.

Dataset: NLDAS Primary Forcing Data L4 Monthly 0.125 x 0.125 degree
DOI: 10.5067/2DPKB5B5N14O
Variable: PEVAP (Potential Evaporation)

Requirements:
- earthaccess library
- NASA Earthdata account

Adapted for SIF-ET-Irrigation Analysis Project - Iowa 2023
"""

In [None]:
import earthaccess
from pathlib import Path

# Authenticate with NASA Earthdata
# Will use credentials from ~/.netrc or prompt for login
auth = earthaccess.login()

# =============================================================================
# Configuration for Iowa 2023
# =============================================================================

# Year to download (just 2023 for this project)
year = 2023
temporal_range = (f"{year}-01-01", f"{year}-12-31")

# Iowa bounding box (west, south, east, north)
# Coordinates from Iowa state boundary
iowa_bbox = (-96.64, 40.38, -90.14, 43.50)

# Project paths - relative to notebook location
project_root = Path("../..").resolve()
local_path = project_root / "data" / "raw" / "NLDAS"
local_path.mkdir(parents=True, exist_ok=True)

print(f"Project root: {project_root}")
print(f"Download path: {local_path}")
print(f"Temporal range: {temporal_range}")
print(f"Bounding box (Iowa): {iowa_bbox}")

# =============================================================================
# Download NLDAS Monthly Precipitation Data
# =============================================================================

print(f"\nSearching NLDAS data for: {temporal_range[0]} to {temporal_range[1]}")

results = earthaccess.search_data(
    doi="10.5067/2DPKB5B5N14O",  # NLDAS Primary Forcing Monthly
    temporal=temporal_range,
    bounding_box=iowa_bbox
)

print(f"Found {len(results)} granules")

if results:
    downloaded_files = earthaccess.download(results, local_path=str(local_path))
    print(f"\nDownloaded {len(downloaded_files)} files to: {local_path}")
else:
    print("No data found for the specified parameters")

print("\nDownload complete!")

In [None]:
"""
Process and Clip NLDAS Potential Evaporation Data to Iowa Boundary

This cell:
1. Loads each downloaded NetCDF file
2. Extracts the potential evaporation (PEVAP) variable
3. Clips to Iowa state boundary
4. Saves clipped files and computes monthly mean

PEVAP units: kg/m²/s (mass flux rate)
To convert to mm/day: multiply by 86400
"""

import os
import glob
import xarray as xr
import rioxarray as rxr
import geopandas as gpd
import pandas as pd
from pathlib import Path

# =============================================================================
# File Paths - Iowa Project
# =============================================================================

project_root = Path("../..").resolve()

# Input: Raw NLDAS data
input_folder = project_root / "data" / "raw" / "NLDAS"

# Output: Processed/clipped data
output_folder = project_root / "data" / "processed" / "NLDAS_Iowa"
output_folder.mkdir(parents=True, exist_ok=True)

# Iowa boundary - use the dissolved AOI we created
iowa_boundary = project_root / "data" / "aoi" / "iowa.geojson"

print(f"Input folder: {input_folder}")
print(f"Output folder: {output_folder}")
print(f"Iowa boundary: {iowa_boundary}")

# Verify files exist
nc_files = list(input_folder.glob("*.nc"))
print(f"\nFound {len(nc_files)} NetCDF files to process")

# =============================================================================
# Load Iowa Boundary
# =============================================================================

gdf = gpd.read_file(iowa_boundary)
print(f"Iowa boundary CRS: {gdf.crs}")

# =============================================================================
# Process Each NetCDF File - Extract PEVAP
# =============================================================================

datasets_2023 = []

for file in sorted(nc_files):
    try:
        # Open dataset
        ds = xr.open_dataset(file)
        
        # Extract POTENTIAL EVAPORATION variable
        data = ds["PEVAP"]
        
        # Set CRS for the data
        data = data.rio.write_crs("EPSG:4326")
        
        # Ensure shapefile is in same CRS
        gdf_projected = gdf.to_crs(data.rio.crs)
        
        # Clip to Iowa boundary
        clipped = data.rio.clip(gdf_projected.geometry, gdf_projected.crs, drop=True)
        
        # Store for averaging
        datasets_2023.append(clipped)
        
        # Save clipped file
        filename = file.stem + "_PEVAP_Iowa.nc"
        output_path = output_folder / filename
        clipped.to_netcdf(output_path)
        
        print(f"Processed: {file.name} -> {filename}")
        
    except Exception as e:
        print(f"Error processing {file.name}: {e}")

# =============================================================================
# Compute and Save Annual Mean PET
# =============================================================================

if datasets_2023:
    print(f"\nComputing annual mean PET from {len(datasets_2023)} months...")
    
    # Concatenate all months and compute mean
    mean_pet = xr.concat(datasets_2023, dim="time").mean(dim="time", keep_attrs=True)
    
    # Ensure correct orientation (north-up)
    if 'lat' in mean_pet.dims and mean_pet.lat[0] < mean_pet.lat[-1]:
        mean_pet = mean_pet.sortby('lat', ascending=False)
    
    # Save as GeoTIFF
    mean_output = output_folder / "PET_mean_2023_Iowa.tif"
    mean_pet.rio.to_raster(mean_output)
    print(f"Saved annual mean PET to: {mean_output}")
    
    # Also save in mm/day units for easier interpretation
    mean_pet_mm_day = mean_pet * 86400  # Convert kg/m²/s to mm/day
    mean_output_mm = output_folder / "PET_mean_2023_Iowa_mm_day.tif"
    mean_pet_mm_day.rio.to_raster(mean_output_mm)
    print(f"Saved annual mean PET (mm/day) to: {mean_output_mm}")
else:
    print("No data processed - check input files")

print("\nProcessing complete!")

In [None]:
"""
Visualize Mean Potential Evaporation for Iowa 2023
"""

import matplotlib.pyplot as plt

# Convert to mm/day for easier interpretation
mean_pet_mm_day = mean_pet * 86400

# Plot the annual mean PET
fig, ax = plt.subplots(figsize=(10, 8))

mean_pet_mm_day.plot(
    ax=ax,
    cmap='YlOrRd',
    cbar_kwargs={'label': 'Potential Evaporation (mm/day)'}
)

ax.set_title('NLDAS Mean Potential Evaporation (PET) - Iowa 2023', fontsize=14, fontweight='bold')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')

plt.tight_layout()
plt.show()

# Print statistics
print(f"\nPotential Evaporation Statistics (Iowa 2023):")
print(f"  Min: {float(mean_pet_mm_day.min()):.2f} mm/day")
print(f"  Max: {float(mean_pet_mm_day.max()):.2f} mm/day")
print(f"  Mean: {float(mean_pet_mm_day.mean()):.2f} mm/day")
print(f"\nNote: Compare with ECOSTRESS ET to identify irrigation:")
print(f"  Where ET > PET → likely irrigated")
print(f"  Where ET < PET → water-stressed or rainfed")