In [20]:
import numpy as np
import xarray as xr
from awips.dataaccess import DataAccessLayer
from pyproj import Proj, transform
from datetime import datetime, timedelta, timezone
from metpy.calc import lat_lon_grid_deltas
from scipy.interpolate import griddata
from scipy.linalg import inv
import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
from siphon.catalog import TDSCatalog

# Set the time window (timezone-aware)
start_time = datetime.now(timezone.utc) - timedelta(days=1)

# Define THREDDS catalog and dataset for NCEP RAP CONUS
catalog_url = 'https://thredds.ucar.edu/thredds/catalog/grib/NCEP/RAP/CONUS_13km/catalog.xml'
catalog = TDSCatalog(catalog_url)
dataset = catalog.datasets[0]
ncep_data = dataset.remote_access(use_xarray=True)

# Load the temperature data from the prediction window
temperature_data = ncep_data.metpy.parse_cf('Temperature_isobaric')

# Get initial time point and subsequent prediction window
time = temperature_data.coords['time']
initial_time = time[0]
start_time = initial_time
prediction_window = time[-1] - time[0]

# Get lat/lon grid for interpolation
lon = temperature_data['x'].values
lat = temperature_data['y'].values
temp_data_initial = temperature_data.sel(time=initial_time)

# Use PyProj to get Lambert Conformal projection details
proj = Proj(proj='lcc', lat_1=38.5, lat_2=38.5, lat_0=38.5, lon_0=-97.5)

# Ingest METAR temperature observations from AWIPS using DataAccessLayer
DataAccessLayer.changeEDEXHost("edex-cloud.unidata.ucar.edu")
request = DataAccessLayer.newDataRequest()
request.setDatatype('obs')
request.setParameters('temperature')
request.setLocationNames('CONUS')

# Specify the request time for the first half of the prediction window
request.setStartTime(start_time + timedelta(hours=3))
response = DataAccessLayer.getGeometryData(request)

# Extract METAR data (lat/lon/temperature)
metar_lon = np.array([ob.getNumber('longitude') for ob in response])
metar_lat = np.array([ob.getNumber('latitude') for ob in response])
metar_temp = np.array([ob.getNumber('temperature') for ob in response])

# Transform METAR lat/lon to Lambert conformal coordinates for interpolation
x, y = transform(Proj(init='epsg:4326'), proj, metar_lon, metar_lat)

# Interpolate METAR data onto HRRR grid
hrrr_x, hrrr_y = np.meshgrid(lon, lat)
metar_temp_on_hrrr_grid = griddata((x, y), metar_temp, (hrrr_x, hrrr_y), method='linear')

# **Background Error Covariance (B) Estimation**
# - Use NMC method: compute differences between 24h and 48h forecast fields
B = np.eye(len(lon) * len(lat)) * 0.1  # For simplicity, actual method can be applied with data

# **Observation Error Covariance (R) Estimation**
# Assuming 0.5 degree standard deviation in observation error
R = np.eye(len(metar_temp)) * 0.25

# Calculate the innovation d = (y - H * x_b)
# x_b is the initial model state (background) temperature
x_b = temp_data_initial.values.flatten()  # Background state from initial temperature data
H = griddata((lon.flatten(), lat.flatten()), x_b, (metar_lon, metar_lat), method='linear')
y = metar_temp  # Observations from METAR data
d = y - H

# Set up the 4DVAR cost function
def cost_function(x, x_b, B, H, y, R):
    xb_diff = x - x_b
    hx_diff = H @ x - y
    return 0.5 * (xb_diff.T @ inv(B) @ xb_diff) + 0.5 * (hx_diff.T @ inv(R) @ hx_diff)

# Gradient of the cost function
def gradient(x, x_b, B, H, y, R):
    xb_diff = x - x_b
    hx_diff = H @ x - y
    return inv(B) @ xb_diff + H.T @ inv(R) @ hx_diff

# Solve the optimization problem
from scipy.optimize import minimize

x0 = x_b.copy()  # Initial guess is the background state
result = minimize(cost_function, x0, args=(x_b, B, H, y, R), jac=gradient, method='BFGS')

# The result of the optimization is the analysis state
x_analysis = result.x

# Reshape the analysis state back into the grid for plotting
analysis_temp_grid = x_analysis.reshape(lat.shape)

# Fetch METAR observations from the second half of the window
request.setStartTime(start_time + timedelta(hours=prediction_window.total_seconds() / 2))
response_second_half = DataAccessLayer.getGeometryData(request)
metar_temp_second_half = np.array([ob.getNumber('temperature') for ob in response_second_half])

# RMSE calculation
from sklearn.metrics import mean_squared_error
rmse_before = np.sqrt(mean_squared_error(metar_temp, H @ x_b))  # RMSE before 4DVAR
rmse_after = np.sqrt(mean_squared_error(metar_temp_second_half, H @ x_analysis))  # RMSE after 4DVAR

print(f"RMSE before 4DVAR: {rmse_before}")
print(f"RMSE after 4DVAR: {rmse_after}")

# Plot the analysis temperature field with Cartopy
fig, ax = plt.subplots(subplot_kw={'projection': ccrs.LambertConformal()})
ax.coastlines()
ax.add_feature(cartopy.feature.BORDERS)
ax.contourf(lon, lat, analysis_temp_grid, transform=ccrs.PlateCarree(), cmap='coolwarm')
plt.show()


KeyError: "no index found for coordinate 'time'"

In [24]:
temperature_data.where(time==temperature_data.time[0])

HTTPError: Error accessing https://thredds.ucar.edu/thredds/cdmremote/grib/NCEP/RAP/CONUS_13km/TwoD?req=data&var=%2FTemperature_isobaric
Server Error (403: Request Too Large: RequestTooLarge: Len greater that 100M )

In [9]:
temperature_data.coords['time']

In [17]:
time.sel(time=slice(start_time, prediction_window))

KeyError: "no index found for coordinate 'time'"

In [11]:
time

In [12]:
time

In [13]:
slice(start_time, start_time + prediction_window)

slice(datetime.datetime(2024, 9, 27, 17, 44, 26, 572477, tzinfo=datetime.timezone.utc), datetime.datetime(2024, 9, 27, 23, 44, 26, 572477, tzinfo=datetime.timezone.utc), None)

In [16]:
time.time[start_time]

TypeError: invalid indexer array, does not have integer dtype: array(datetime.datetime(2024, 9, 27, 17, 44, 26, 572477, tzinfo=datetime.timezone.utc),
      dtype=object)