# Santa Ana Winds Notebook

Santa Ana winds are intense dry winds that blow from the Mojave Desert downslope to the Southern California coast (Seto et al. 2025). Upper level temperature and winds, along with the gradient in sea level pressure, can be used to identify conditions that support Santa Ana Wind occurence. 

**Intended Application:** As a user, I want to generate timeseries of key Santa Ana Wind indicators.

**Runtime:** On the Analytics Engine Jupyter Hub platform this notebook will take approximately X minutes to run.

**References:**


Seto, D., C. Jones, D. Siuta, N. Wagenbrenner, C. Thompson, and N. Quinn, 2025: Evaluation of HRRR Wind Speed Forecast and WindNinja Downscaling Accuracy during Santa Ana Wind Events in Southern California. Wea. Forecasting, 40, 525â€“541, https://doi.org/10.1175/WAF-D-24-0013.1. 

In [None]:
# TODO: delete in final version
# %load_ext autoreload
# %autoreload 2

In [None]:
# TODO: Need h5netcdf in environment
#!pip install h5netcdf

In [None]:
from climakitae.core.constants import WRF_CRS
from climakitae.core.data_export import export
from climakitae.new_core.processors.warming_level import WarmingLevel
from climakitae.util.utils import add_dummy_time_to_wl, get_closest_gridcell
from climakitae.tools.derived_variables import (
    compute_wind_dir,
    compute_sea_level_pressure,
)

import geopandas as gpd
import numpy as np
import pyproj
from pyproj import CRS, Geod, Proj
import s3fs
from shapely.geometry import box, mapping, LineString
import xarray as xr
import matplotlib.pyplot as plt

# Setup

### Choose points
The value at `gradient_point_2` will be subtracted from the value at `gradient_point_1`.

In [None]:
gradient_point_1 = (34.031899244954694, -118.47507877286985)  # Santa Monica
gradient_point_2 = (34.31392420709518, -116.16003299789273)  # Mojave Desert

### Choose global warming Levels

In [None]:
baseline_gwl = 0.8
future_gwl = 2.0

# Temperature and Geostrophic Wind

## 1. Get Warming Levels

Access the WRF data for temperature and geopotential height.

In [None]:
hist_path = "s3://santa-ana-winds/miroc6_r1i1p1f1_historical/d01/temperature_geopotential_plevs.zarr"
ssp370_path = "s3://santa-ana-winds/miroc6_r1i1p1f1_ssp370/d01/temperature_geopotential_plevs.zarr/"

ds_hist = xr.open_dataset(
    hist_path, consolidated=True, zarr_format=2, engine="zarr", chunks={}
)
ds_ssp370 = xr.open_dataset(
    ssp370_path, consolidated=True, zarr_format=2, engine="zarr", chunks={}
)

ds_hist = ds_hist.sel(time=slice("1980-09-01", "2014-08-31 13:00"))
ds_ssp370 = ds_ssp370.sel(time=slice("2014-09-01", "2066-08-04 13:00"))

ds_hist = ds_hist.expand_dims({"member_id": ["r1i1p1f1"]})
ds_ssp370 = ds_ssp370.expand_dims({"member_id": ["r1i1p1f1"]})

ds_hist = ds_hist.convert_calendar("noleap")
ds_ssp370 = ds_ssp370.convert_calendar("noleap")

Get the warming levels.

In [None]:
values = {
    "warming_levels": [baseline_gwl, future_gwl],
    "warming_level_months": [x for x in range(1, 13)],
}
WL = WarmingLevel(values)

data = {}
data["WRF.UCLA.MIROC6.ssp370.1hr.d03"] = ds_ssp370
data["WRF.UCLA.MIROC6.historical.1hr.d03"] = ds_hist
context = {"activity_id": "WRF"}
wls = WL.execute(data, context)

ds_wl = wls["WRF.UCLA.MIROC6.ssp370.1hr.d03.r1i1p1f1"]

ds_wl.attrs["frequency"] = "1hr"
ds_wl = add_dummy_time_to_wl(ds_wl)

## 3. Temperature gradient

Extract the temperature data array from the warming levels dataset.

In [None]:
temperature = ds_wl["temperature"]

Find the temperature at the two points of interest and get the difference.

In [None]:
temp_pt_1 = get_closest_gridcell(temperature, gradient_point_1[0], gradient_point_1[1])
temp_pt_2 = get_closest_gridcell(temperature, gradient_point_2[0], gradient_point_2[1])

temperature_diff = temp_pt_2 - temp_pt_1
temperature_diff.name = "temperature_gradient"
temperature_diff["attrs"].long_name = "Temperature Gradient on Pressure Level"

Pick a pressure level to display and export. Valid levels are 1000, 925, 850, 700, 500, 300, or 200.

In [None]:
level = 700

This plot shows two years of the difference data for the 700 hPa pressure level.

In [None]:
temperature_diff.isel(warming_level=0, time=slice(0, 365 * 24 * 2)).sel(
    pressure_level=level
).compute().plot()
plt.title(f"Hourly {level} hPa temperature difference, WL {baseline_gwl} year 1-2")

In this plot, the data is resampled to a daily mean value and shown for the entire 30-year warming level period.

In [None]:
temperature_diff_daily = temperature_diff.resample(time="1D").mean()
temperature_diff_daily.isel(warming_level=0).sel(pressure_level=level).compute().plot()
plt.title(f"Daily mean {level} hPa temperature difference, WL {baseline_gwl}")

Uncomment the code below to export this temperature data to file.

In [None]:
# Export hourly data
export(
    temperature_diff,
    f"temperature_gradient_{level}hPa.nc",
    format="NetCDF",
    mode="local",
)

# Uncomment to export daily mean data
# export(temperature_diff_daily,f"temperature_gradient_{level}hPa.nc",format="NetCDF",mode="local")

## 4. Geostrophic Wind Gradient

These functions are used to compute the geostrophic wind.

In [None]:
def deltas(h: xr.DataArray) -> tuple[xr.DataArray]:
    """Get the actual x and y spacing in meters.

    Parameters
    ----------
    h : xr.DataArray
        DataArray with x and y dimensions on WRF grid

    Returns
    -------
    Tuple[xr.DataArray]
    """
    g = Geod(ellps="sphere")
    forward_az, _, dy = g.inv(
        h.lon[0:-1, :], h.lat[0:-1, :], h.lon[1:, :], h.lat[1:, :]
    )
    dy[(forward_az < -90.0) | (forward_az > 90.0)] *= -1

    forward_az, _, dx = g.inv(
        h.lon[:, 0:-1], h.lat[:, 0:-1], h.lon[:, 1:], h.lat[:, 1:]
    )
    dx[(forward_az < -90.0) | (forward_az > 90.0)] *= -1
    # Convert to data array with coordinates of terminus point
    dx = xr.DataArray(
        data=dx,
        dims=["y", "x"],
        coords={
            "y": (["y"], h.y.data),
            "x": (["x"], h.x.data[1:]),
            "lon": (["y", "x"], h.lon.data[:, 1:]),
            "lat": (["y", "x"], h.lat.data[:, 1:]),
        },
    )
    dy = xr.DataArray(
        data=dy,
        dims=["y", "x"],
        coords={
            "y": (["y"], h.y.data[1:]),
            "x": (["x"], h.x.data),
            "lon": (["y", "x"], h.lon.data[1:, :]),
            "lat": (["y", "x"], h.lat.data[1:, :]),
        },
    )
    return dx, dy


def get_dhdx(h: xr.DataArray, center: xr.DataArray) -> xr.DataArray:
    """Get the spatial derivative in the x direction.

    Parameters
    ----------
    h : xr.DataArray
        Data on WRF grid
    center : xr.DataArray
        Single center point extracted from h dataset.

    Returns
    -------
    xr.DataArray
        Derivative of h with respect to x
    """
    delta_x, _ = deltas(h)
    nominal_spacing = 45000.0  # WRF projection
    #back_one = h.sel(
    #    x=(center.x.data - nominal_spacing), y=center.y.data, method="nearest"
    #)
    #forward_one = h.sel(
    #    x=(center.x.data + nominal_spacing), y=center.y.data, method="nearest"
    #)
    back = h.isel(x=slice(1,-2))
    forward = h.isel(x=slice(2,-1))

    # delta coordinates are for terminus point of delta
    #diff_one = delta_x.sel(x=center.x.data, y=center.y.data, method="nearest")
    #diff_two = delta_x.sel(
    #    x=center.x.data + nominal_spacing, y=center.y.data, method="nearest"
    #)

    diff_one = delta_x
    diff_two = delta_x.sel(x=slice(1,:))

    derivative = (
        (-diff_two) / ((diff_one + diff_two) * diff_one) * back_one
        + (diff_two - diff_one) / (diff_one * diff_two) * center
        + (diff_one) / ((diff_one + diff_two) * diff_two) * forward_one
    )
    return derivative


def get_dhdy(h: xr.DataArray, center: xr.DataArray) -> xr.DataArray:
    """Get the spatial derivative in the y direction.

    Parameters
    ----------
    h : xr.DataArray
        Data on WRF grid
    center : xr.DataArray
        Single center point extracted from h dataset.

    Returns
    -------
    xr.DataArray
        Derivative of h with respect to y
    """
    _, delta_y = deltas(h)
    nominal_spacing = 45000.0  # WRF projection
    back_one = h.sel(
        x=center.x.data, y=(center.y.data - nominal_spacing), method="nearest"
    )
    forward_one = h.sel(
        x=center.x.data, y=(center.y.data + nominal_spacing), method="nearest"
    )

    # delta coordinates are for terminus point of delta
    diff_one = delta_y.sel(x=center.x.data, y=center.y.data, method="nearest")
    diff_two = delta_y.sel(
        x=center.x.data, y=center.y.data + nominal_spacing, method="nearest"
    )

    # This method is for derivative on an uneven grid. Mimicing the metpy first derivative.
    derivative = (
        (-diff_two) / ((diff_one + diff_two) * diff_one) * back_one
        + (diff_two - diff_one) / (diff_one * diff_two) * center
        + (diff_one) / ((diff_one + diff_two) * diff_two) * forward_one
    )
    return derivative


def _get_rotated_geostrophic_wind(u, v, point, gridlabel):
    # Read in the appropriate file depending on the data resolution
    # This file contains sinalpha and cosalpha for the WRF grid
    wrf_angles_ds = xr.open_zarr(
        "s3://cadcat/tmp/era/wrf/wrf_angles_{}.zarr/".format(gridlabel),
        storage_options={"anon": True},
    )
    wrf_angles_ds = get_closest_gridcell(wrf_angles_ds, point[0], point[1])
    sinalpha = wrf_angles_ds.SINALPHA
    cosalpha = wrf_angles_ds.COSALPHA

    # Wind components
    Uearth = u * cosalpha - v * sinalpha
    Vearth = v * cosalpha + u * sinalpha

    # Add variable name
    Uearth.name = "u"
    Vearth.name = "v"

    return Uearth, Vearth

def geostrophic_wind_single_point(geopotential_height: xr.DataArray, point: tuple[float]) -> tuple[xr.DataArray]:
    center = get_closest_gridcell(geopotential_height.compute(), point[0], point[1])

    lat_rad = center.lat.data * np.pi / 180
    omega = 7292115e-11  # rad/s
    g = 9.81  # m/s2
    f = 2 * omega * np.sin(lat_rad)
    norm_factor = g / f
    
    dhdx = get_dhdx(geopotential_height, center)
    dhdy = get_dhdy(geopotential_height, center)
    
    # These components are u and v on the WRF grid
    geo_u, geo_v = -norm_factor * dhdy, norm_factor * dhdx
    
    # Rotate these components to an earth-relative E/W orientation
    geo_u_earth, geo_v_earth = get_rotated_geostrophic_wind(geo_u, geo_v, (point[0], point[1]), "d01")
    
    geo_u_earth.name = "u"
    geo_u_earth.attrs["long_name"] = "Geostrophic Wind U Component"
    geo_v_earth.name = "v"
    geo_v_earth.attrs["long_name"] = "Geostrophic Wind V Component"


First, the dataset is being clipped to a large box around the points of interest. This is to help with memory management.

In [None]:
# Need to work with a smaller area for geostrophic wind calculation
# so creating a box around our area of interest.
corner_ur = (35.996934928227034, -114.96313953879522)  # upper right
corner_ll = (32.00137436601684, -121.07369629580487)  # lower left

geom = gpd.GeoDataFrame(
    geometry=[
        box(
            minx=corner_ll[1],
            miny=corner_ll[0],
            maxx=corner_ur[1],
            maxy=corner_ur[0],
        )
    ],
    crs=pyproj.CRS.from_epsg(4326),
)
ds_wl_clipped = ds_wl.rio.clip(
    geom.geometry.apply(mapping),
    geom.crs,
    drop=True,
    all_touched=True,
).rio.write_crs(ds_wl.rio.crs)

Then the geostrophic wind is calculated for a point in between the points of interest. The `get_closest_gridcell` function will print the latitude and longitude of the selected gridpoint. Verify that the same coordinates are picked each time.

In [None]:
geopotential_height = ds_wl_clipped["geopotential_height"]

# Calculate a center point between our points of interest
# Attaching CRS to the points so we can get a centroid
points = gpd.GeoSeries([LineString([gradient_point_1, gradient_point_2])])
points.set_crs("EPSG:4326")
# Get the midpoint cell
point = (points.centroid.geometry.x.item(),points.centroid.geometry.y.item())

# Get the geostrophic wind components at this point
geo_wind_u_earth, geo_wind_v_earth = geostrophic_wind_single_point(geopotential_height, point)

print("Done")

## Select a pressure level before computing the magnitude and direction of the geostrophic wind. Valid levels are 1000, 925, 850, 700, 500, 300, or 200.

In [None]:
level = 850
geo_u_level = geo_u_earth.sel(pressure_level=level)
geo_v_level = geo_v_earth.sel(pressure_level=level)

# Magnitude
magnitude = np.sqrt(geo_u_level**2 + geo_v_level**2)
magnitude.name = "magnitude"
magnitude.attrs["long_name"] = "Geostrophic Wind Magnitude"
magnitude.attrs["units"] = "m/s"

# Direction
direction = compute_wind_dir(geo_u_level, geo_v_level)
direction.name = "wind_direction"
direction.attrs["long_name"] = "Geostrophic Wind Direction"
# Units added by compute_wind_dir

Here are some visualizations of one year of data for each of these variables.

In [None]:
magnitude.sel(warming_level=baseline_gwl).isel(time=slice(0, 365 * 24)).plot()
plt.title("Warming level")

In [None]:
direction.sel(warming_level=baseline_gwl).isel(time=slice(0, 365 * 24)).plot()

Finally, export the timeseries for this level to file.

In [None]:
export(magnitude, f"geostrophic_wind_mag_{level}hPa.nc", format="NetCDF", mode="local")
export(direction, f"geostrophic_wind_dir_{level}hPa.nc", format="NetCDF", mode="local")

# Sea Level Pressure

Sea level is computed using surface pressure, air temperature, and mixing ratio. Then the difference between two points is calculated and saved to file.

### Setup
Choose the time frequency in the cell below. This section will use the warming levels and locations selected in the "Setup" section at the top of the notebook.

In [None]:
frequency = "1hr"  # 1hr or day

This function will handle the sea level pressure calculation for the selection gridpoints.

In [None]:
def get_slp_at_point(
    psfc: xr.DataArray,
    t2: xr.DataArray,
    q2: xr.DataArray,
    elevation: xr.DataArray,
    point: tuple[float],
) -> xr.DataArray:
    """Extract point of interest and return sea level pressure at that point.

    This function uses the default settings for `compute_sea_level_pressure`
    including for lapse rate.

    Parameters
    ----------
        surface_pressure : xr.DataArray
            Surface pressure in Pascals
        air_temperature : xr.DataArray
            Surface air temperature in Kelvin
        mixing_ratio : xr.DataArray
            Surface mixing ratio
        elevation : xr.DataArray
            Elevation in meters

    Returns
    -------
    xr.DataArray
        Sea level pressure in Hectopascals
    """

    # Extract data at point of interest
    psfc_pt = get_closest_gridcell(psfc.rio.write_crs(WRF_CRS), point[0], point[1])
    t2_pt = get_closest_gridcell(t2.rio.write_crs(WRF_CRS), point[0], point[1])
    q2_pt = get_closest_gridcell(q2.rio.write_crs(WRF_CRS), point[0], point[1])
    elev_pt = get_closest_gridcell(elevation.rio.write_crs(WRF_CRS), point[0], point[1])

    if frequency == "1hr":
        slp_pt = compute_sea_level_pressure(psfc_pt, t2_pt, q2_pt, elev_pt)
        slp_pt.attrs["frequency"] = "hourly"
    else:
        slp_pt = compute_sea_level_pressure(
            psfc_pt, t2_pt, q2_pt, elev_pt, average_t2=False
        )
        slp_pt.attrs["frequency"] = "daily"

    # Convert to hPa
    slp_pt = slp_pt / 100.0
    slp_pt.attrs["units"] = "hPa"

    # Add dummy time axis
    slp_pt = add_dummy_time_to_wl(slp_pt)

    return slp_pt

Access the three variables (surface pressure, air temperature, and mixing ratio) we need to get sea level pressure. Results are clipped to the two points of interest. This cell will take several minutes to run.

In [None]:
import climakitae as ck

cd = ck.ClimateData(verbosity=-1)

# Surface pressure
psfc = (
    cd.catalog("cadcat")
    .activity_id("WRF")
    .institution_id("UCLA")
    .table_id(frequency)
    .grid_label("d03")
    .variable("psfc")
    .processes(
        {
            "clip": [gradient_point_1, gradient_point_2],
            "warming_level": {
                "warming_levels": [baseline_gwl, future_gwl],
                "warming_level_window": 5,  # TODO: normal window in final version
            },
        }
    )
    .get()
)

# Air temperature
t2 = (
    cd.catalog("cadcat")
    .activity_id("WRF")
    .institution_id("UCLA")
    .table_id(frequency)
    .grid_label("d03")
    .variable("t2")
    .processes(
        {
            "clip": [gradient_point_1, gradient_point_2],
            "warming_level": {
                "warming_levels": [baseline_gwl, future_gwl],
                "warming_level_window": 5,
            },
        }
    )
    .get()
)

# Mixing ratio
q2 = (
    cd.catalog("cadcat")
    .activity_id("WRF")
    .institution_id("UCLA")
    .table_id(frequency)
    .grid_label("d03")
    .variable("q2")
    .processes(
        {
            "clip": [gradient_point_1, gradient_point_2],
            "warming_level": {
                "warming_levels": [baseline_gwl, future_gwl],
                "warming_level_window": 5,
            },
        }
    )
    .get()
)

Get the elevation data for the d03 grid.

In [None]:
fs = s3fs.S3FileSystem(anon=True)
elevation_url = "s3://cadcat/wrf/cae/elevation_wrf.nc"
elevation = xr.open_dataset(fs.open(elevation_url), engine="h5netcdf")

Calculate the sea level pressure at each point and get the difference.

In [None]:
# Get SLP at points
slp_pt_1 = get_slp_at_point(
    psfc["psfc"], t2["t2"], q2["q2"], elevation["elevation"], gradient_point_1
)
slp_pt_2 = get_slp_at_point(
    psfc["psfc"], t2["t2"], q2["q2"], elevation["elevation"], gradient_point_2
)

# Get difference
slp_dif = slp_pt_1 - slp_pt_2

# Convert to dataset with name "slp_gradient"
slp_ds = slp_dif.to_dataset(name="slp_gradient")

This figure demonstrates the resulting gradient for a single model using two years from the baseline warming level.

In [None]:
slp_ds["slp_gradient"].isel(
    sim=0, warming_level=0, time=slice(365 * 24 * 3, 365 * 24 * 4)
).plot()

Run this cell to export the difference timeseries to file.

In [None]:
export(slp_ds, "slp_gradient.nc", format="NetCDF", mode="local")