# NLDAS Noah Actual Evapotranspiration (ET) Visualization - Iowa 2019-2023

This notebook processes and visualizes NLDAS Noah Land Surface Model actual
evapotranspiration data for Iowa.

**Workflow:**
1. Load raw NLDAS Noah NetCDF files
2. Extract EVPsfc (Total Evapotranspiration) variable
3. Clip to Iowa boundary
4. Visualize temporal and spatial patterns

**Data Notes:**
- Variable: EVPsfc (Total Evapotranspiration)
- Units: kg/m2 (monthly accumulated, equivalent to mm/month)
- No rate conversion needed - values are already monthly totals
- Source: NLDAS Noah LSM L4 Monthly (NLDAS_NOAH0125_M)

In [1]:
import xarray as xr
import rioxarray as rxr
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('default')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 11

print("Libraries loaded successfully!")

Libraries loaded successfully!


## 1. Setup Paths and Load Iowa Boundary

In [None]:
# Project paths
project_root = Path("../..").resolve()
input_folder = project_root / "data" / "raw" / "NLDAS_Noah"
output_folder = project_root / "data" / "processed" / "NLDAS_Noah_Iowa"
figures_dir = project_root / "figures" / "nldas"
output_folder.mkdir(parents=True, exist_ok=True)
figures_dir.mkdir(parents=True, exist_ok=True)

# Iowa boundary
iowa_boundary = project_root / "data" / "aoi" / "iowa.geojson"

print(f"Input folder: {input_folder}")
print(f"Output folder: {output_folder}")
print(f"Figures folder: {figures_dir}")

# List available files
nc_files = sorted(input_folder.glob("*.nc"))
print(f"\nFound {len(nc_files)} NetCDF files")
print(f"Expected: 60 files (12 months x 5 years)")

In [3]:
# Load Iowa boundary
iowa_gdf = gpd.read_file(iowa_boundary)
print(f"Iowa boundary CRS: {iowa_gdf.crs}")
print(f"Iowa bounds: {iowa_gdf.total_bounds}")

Iowa boundary CRS: EPSG:4326
Iowa bounds: [-96.63949339  40.37544738 -90.14006749  43.50120364]


## 2. Process NLDAS Noah Files - Extract and Clip EVPsfc

In [None]:
# Process each file and extract EVPsfc (Total Evapotranspiration)
monthly_data = []
month_labels = []  # (year, month_num, month_name)

for file in nc_files:
    try:
        ds = xr.open_dataset(file)
        
        # Extract EVPsfc (Total Evapotranspiration, kg/m2 = mm/month)
        et = ds["EVPsfc"]
        
        # Set CRS
        et = et.rio.write_crs("EPSG:4326")
        
        # Clip to Iowa
        iowa_projected = iowa_gdf.to_crs(et.rio.crs)
        clipped = et.rio.clip(iowa_projected.geometry, iowa_projected.crs, drop=True)
        
        # Extract year and month from filename
        # NLDAS_NOAH0125_M.AYYYYMM.020.nc
        fname = file.name
        year_month = fname.split('.A')[1][:6]
        year = int(year_month[:4])
        month_num = int(year_month[4:6])
        month_name = pd.Timestamp(f"{year}-{month_num:02d}-01").strftime("%B")
        
        monthly_data.append(clipped)
        month_labels.append((year, month_num, month_name))
        
        print(f"Processed: {fname} -> {year} {month_name}")
        
    except Exception as e:
        print(f"Error processing {file.name}: {e}")

print(f"\nProcessed {len(monthly_data)} months total")

In [None]:
# Inspect one month's data
print("Sample month structure:")
print(monthly_data[0])
print(f"\nDimensions: {monthly_data[0].dims}")
print(f"Shape: {monthly_data[0].shape}")
print(f"\nUnits: kg/m2 (= mm/month, monthly accumulated)")
print(f"Sample values (first non-NaN): {float(monthly_data[0].squeeze().values[~np.isnan(monthly_data[0].squeeze().values)][0]):.2f} mm")

## 3. Calculate Statistics

EVPsfc is already in kg/m2 (= mm) as a monthly accumulated total. No rate conversion needed.

In [None]:
# Calculate monthly means across Iowa (values are mm/month)
monthly_means = []
monthly_mins = []
monthly_maxs = []
monthly_stds = []

for data in monthly_data:
    data_squeezed = data.squeeze()
    monthly_means.append(float(data_squeezed.mean()))
    monthly_mins.append(float(data_squeezed.min()))
    monthly_maxs.append(float(data_squeezed.max()))
    monthly_stds.append(float(data_squeezed.std()))

# Create summary dataframe
summary_df = pd.DataFrame({
    'Year': [m[0] for m in month_labels],
    'Month': [m[2] for m in month_labels],
    'Month_Num': [m[1] for m in month_labels],
    'Mean_ET_mm': monthly_means,
    'Min_ET_mm': monthly_mins,
    'Max_ET_mm': monthly_maxs,
    'Std_ET_mm': monthly_stds
})

print("Monthly ET Statistics for Iowa (mm/month):")
print("=" * 80)
print(summary_df.to_string(index=False))

## 4. Temporal Visualization - ET Through the Year

In [None]:
# Plot monthly ET time series (all years)
fig, ax = plt.subplots(figsize=(16, 6))

dates = [pd.Timestamp(f"{y}-{m:02d}-01") for y, m, _ in month_labels]

ax.plot(dates, summary_df['Mean_ET_mm'], 'o-', color='steelblue', linewidth=1.5, 
        markersize=5, label='Mean ET')
ax.fill_between(dates, 
                summary_df['Mean_ET_mm'] - summary_df['Std_ET_mm'],
                summary_df['Mean_ET_mm'] + summary_df['Std_ET_mm'],
                alpha=0.2, color='steelblue', label='±1 Std Dev')

ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Actual Evapotranspiration (mm/month)', fontsize=12)
ax.set_title('NLDAS Noah Actual ET - Iowa (2019-2023)\nMonthly Mean Across State', 
             fontsize=14, fontweight='bold')

ax.legend(loc='upper right')
ax.grid(True, alpha=0.3)

for year in range(2019, 2024):
    ax.axvline(pd.Timestamp(f"{year}-01-01"), color='gray', linestyle=':', alpha=0.3)

plt.tight_layout()
plt.savefig(figures_dir / 'nldas_et_timeseries.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Year-over-year seasonal cycle comparison
colors_yr = {2019: '#1f77b4', 2020: '#ff7f0e', 2021: '#2ca02c', 2022: '#d62728', 2023: '#9467bd'}

fig, ax = plt.subplots(figsize=(12, 6))

for year in range(2019, 2024):
    year_mask = summary_df['Year'] == year
    year_data = summary_df[year_mask]
    if not year_data.empty:
        ax.plot(year_data['Month_Num'], year_data['Mean_ET_mm'], 'o-',
                color=colors_yr[year], linewidth=2, markersize=6, label=str(year))

ax.set_xlabel('Month', fontsize=12)
ax.set_ylabel('Mean ET (mm/month)', fontsize=12)
ax.set_title('NLDAS Noah ET - Year-over-Year Seasonal Cycle\nIowa', fontsize=14, fontweight='bold')
ax.set_xticks(range(1, 13))
ax.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax.legend(title='Year')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(figures_dir / 'nldas_et_seasonal_byyear.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Monthly ET heatmap (Year x Month)
pivot_et = summary_df.pivot(index='Year', columns='Month_Num', values='Mean_ET_mm')

fig, ax = plt.subplots(figsize=(14, 4))
im = ax.imshow(pivot_et.values, cmap='YlGnBu', aspect='auto')

ax.set_xticks(range(12))
ax.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax.set_yticks(range(len(pivot_et.index)))
ax.set_yticklabels(pivot_et.index)

# Annotate with values
for i in range(len(pivot_et.index)):
    for j in range(12):
        val = pivot_et.values[i, j]
        if not np.isnan(val):
            ax.text(j, i, f'{val:.0f}', ha='center', va='center', fontsize=9,
                    color='white' if val > pivot_et.values[~np.isnan(pivot_et.values)].max() * 0.6 else 'black')

cbar = plt.colorbar(im, ax=ax, shrink=0.8)
cbar.set_label('ET (mm/month)')
ax.set_title('NLDAS Noah Monthly ET Heatmap - Iowa (2019-2023)', fontsize=14, fontweight='bold')
ax.set_xlabel('Month')
ax.set_ylabel('Year')

plt.tight_layout()
plt.savefig(figures_dir / 'nldas_et_heatmap.png', dpi=150, bbox_inches='tight')
plt.show()

## 5. Annual Total ET and Distribution

In [None]:
# Annual total ET bar chart + seasonal distributions
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Left: Annual total ET
annual_totals = summary_df.groupby('Year')['Mean_ET_mm'].sum().reset_index()
annual_totals.columns = ['Year', 'Annual_ET_mm']

bars = axes[0].bar(annual_totals['Year'], annual_totals['Annual_ET_mm'],
                   color=[colors_yr.get(y, 'gray') for y in annual_totals['Year']],
                   edgecolor='black', linewidth=0.5, alpha=0.8)

for bar, val in zip(bars, annual_totals['Annual_ET_mm']):
    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5,
                 f'{val:.0f}', ha='center', fontsize=11, fontweight='bold')

mean_annual = annual_totals['Annual_ET_mm'].mean()
axes[0].axhline(mean_annual, color='black', linestyle='--', linewidth=1.5,
                label=f'5-year mean: {mean_annual:.0f} mm')
axes[0].set_xlabel('Year', fontsize=12)
axes[0].set_ylabel('Annual Total ET (mm/year)', fontsize=12)
axes[0].set_title('Annual Total ET', fontsize=13, fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3, axis='y')

# Right: Seasonal distribution
all_values = []
for data in monthly_data:
    vals = data.squeeze().values.flatten()
    vals = vals[~np.isnan(vals)]
    all_values.extend(vals)

seasons = {
    'Winter (DJF)': [12, 1, 2],
    'Spring (MAM)': [3, 4, 5],
    'Summer (JJA)': [6, 7, 8],
    'Fall (SON)': [9, 10, 11]
}

season_colors = ['#4393c3', '#66c2a5', '#fc8d62', '#e78ac3']
for (season_name, season_months), color in zip(seasons.items(), season_colors):
    season_vals = []
    for i, (year, month_num, _) in enumerate(month_labels):
        if month_num in season_months:
            vals = monthly_data[i].squeeze().values.flatten()
            vals = vals[~np.isnan(vals)]
            season_vals.extend(vals)
    if season_vals:
        axes[1].hist(season_vals, bins=30, alpha=0.5, label=season_name, color=color)

axes[1].set_xlabel('Actual ET (mm/month)', fontsize=11)
axes[1].set_ylabel('Frequency', fontsize=11)
axes[1].set_title('ET Distribution by Season', fontsize=13, fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.suptitle('NLDAS Noah Actual ET - Iowa (2019-2023)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(figures_dir / 'nldas_et_annual_and_distribution.png', dpi=150, bbox_inches='tight')
plt.show()

## 6. Spatial Maps

Spatial patterns of ET across Iowa: seasonal comparison for 2023 and annual totals by year.

In [None]:
# Seasonal spatial maps: January, July, October 2023
target_months = {'January': 1, 'July': 7, 'October': 10}

plot_data = {}
for month_name, month_num in target_months.items():
    for i, (year, m_num, m_name) in enumerate(month_labels):
        if year == 2023 and m_num == month_num:
            plot_data[month_name] = monthly_data[i].squeeze()
            break

fig, axes = plt.subplots(1, 3, figsize=(16, 5))

vmin = min(float(data.min()) for data in plot_data.values())
vmax = max(float(data.max()) for data in plot_data.values())

for ax, (month_name, data) in zip(axes, plot_data.items()):
    im = data.plot(ax=ax, cmap='YlGnBu', vmin=vmin, vmax=vmax, add_colorbar=False)
    iowa_gdf.boundary.plot(ax=ax, color='black', linewidth=1)
    ax.set_title(f'{month_name} 2023\nMean: {float(data.mean()):.1f} mm', 
                 fontsize=12, fontweight='bold')
    ax.set_xlabel('Longitude')
    ax.set_ylabel('Latitude')

cbar = fig.colorbar(im, ax=axes, orientation='horizontal', fraction=0.05, pad=0.15)
cbar.set_label('Actual ET (mm/month)', fontsize=11)

plt.suptitle('NLDAS Noah Actual ET - Iowa 2023\nSeasonal Comparison', 
             fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(figures_dir / 'nldas_et_maps_seasonal_2023.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Annual total ET maps for each year (2019-2023)
fig, axes = plt.subplots(1, 5, figsize=(22, 5))

annual_maps = {}
for year in range(2019, 2024):
    year_data = []
    for i, (y, m, _) in enumerate(month_labels):
        if y == year:
            year_data.append(monthly_data[i].squeeze())
    if year_data:
        annual_maps[year] = sum(year_data)  # Sum monthly totals -> annual total

# Consistent colorbar
vmin = min(float(d.min()) for d in annual_maps.values())
vmax = max(float(d.max()) for d in annual_maps.values())

for ax, (year, data) in zip(axes, annual_maps.items()):
    im = data.plot(ax=ax, cmap='YlGnBu', vmin=vmin, vmax=vmax, add_colorbar=False)
    iowa_gdf.boundary.plot(ax=ax, color='black', linewidth=1)
    ax.set_title(f'{year}\n{float(data.mean()):.0f} mm/yr', fontsize=12, fontweight='bold')
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_xticklabels([])
    ax.set_yticklabels([])

cbar = fig.colorbar(im, ax=axes, orientation='horizontal', fraction=0.04, pad=0.1)
cbar.set_label('Annual Total ET (mm/year)', fontsize=11)

plt.suptitle('NLDAS Noah Annual Total ET - Iowa (2019-2023)', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(figures_dir / 'nldas_et_maps_annual.png', dpi=150, bbox_inches='tight')
plt.show()

## 7. Save Processed Data

In [None]:
# Save individual months as GeoTIFFs
for i, (year, month_num, month_name) in enumerate(month_labels):
    month_path = output_folder / f'ET_{year}_{month_num:02d}_{month_name}_Iowa.tif'
    monthly_data[i].squeeze().rio.to_raster(month_path)

print(f"Saved {len(month_labels)} monthly GeoTIFFs to {output_folder}")

# Save summary CSV
summary_path = output_folder / 'ET_monthly_summary_2019_2023_Iowa.csv'
summary_df.to_csv(summary_path, index=False)
print(f"Saved monthly summary: {summary_path}")

print(f"\nFigures saved to: {figures_dir}")
print(f"  - nldas_et_timeseries.png")
print(f"  - nldas_et_seasonal_byyear.png")
print(f"  - nldas_et_heatmap.png")
print(f"  - nldas_et_annual_and_distribution.png")
print(f"  - nldas_et_maps_seasonal_2023.png")
print(f"  - nldas_et_maps_annual.png")

## 8. Summary Statistics

In [None]:
print("=" * 70)
print("NLDAS Noah Actual ET Summary - Iowa 2019-2023")
print("=" * 70)

# Overall statistics
all_vals = np.array(all_values)
print(f"\nOverall Statistics (all months, all years):")
print(f"  Mean ET: {all_vals.mean():.1f} mm/month")
print(f"  Min ET:  {all_vals.min():.1f} mm/month")
print(f"  Max ET:  {all_vals.max():.1f} mm/month")

# Annual totals per year
print(f"\nAnnual Total ET by Year:")
for year in range(2019, 2024):
    year_data = summary_df[summary_df['Year'] == year]
    if not year_data.empty:
        annual_total = year_data['Mean_ET_mm'].sum()
        print(f"  {year}: {annual_total:.0f} mm/year")

print(f"\nData Resolution: 0.125° (~12 km)")
print(f"Source: NLDAS-2 Noah Land Surface Model Monthly (NLDAS_NOAH0125_M)")
print(f"Variable: EVPsfc (Total Evapotranspiration, monthly accumulated)")
print(f"\nOutput files saved to: {output_folder}")