## Demonstration of Herbie for easily getting NWP/AIFS/etc. datasets

[![Latest release](https://badgen.net/github/release/Naereen/Strapdown.js)](https://github.com/eabarnes1010/ai_weather_to_climate_ats780A8/tree/main/lecture_code)
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eabarnes1010/ai_weather_to_climate_ats780A8/blob/main/lecture_code/herbie_examples.ipynb)

In [None]:
from herbie import Herbie

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

### these are plotting functions built in to Herbie, they are optional to use here
from herbie import paint
from herbie.toolbox import EasyMap, pc

### get AIFS forecast grids

In [None]:
init = pd.Timestamp(2024,10,15,0)

fxx = 72  ### start with th 72-h forecast as an example

H = Herbie(init.strftime("%Y-%m-%d %H:%M"), model="aifs", product="oper", fxx=fxx)  

#### you can look at the full inventory this way, though it's a little cumbersome

In [None]:
H.inventory()

#### better if you have an idea of what level or variable you want to look at:

In [None]:
H.inventory(":z:500")

### if you want to make a really quick map, the built-in functions in Herbie make that really easy. This reads the data into xarray, and plots it

#### this will temporarily download the needed file to $HOME/data/aifs, and then delete it. If you don't want it deleted, you can add "remove_grib=False" when calling xarray below

In [None]:
ds = H.xarray(":z:500")  
ds.z.plot(
    x="longitude",
    y="latitude",
    ax=EasyMap().ax,
    transform=pc,
    cbar_kwargs=dict(orientation="horizontal", shrink=0.9, pad=0.01))

ds

### turns out that the AIFS output is in geopotential rather than geopotential height. we'll fix that and make a plot more traditionally over North America

In [None]:
datacrs = ccrs.PlateCarree()

plotcrs = ccrs.LambertConformal(central_longitude=-100, central_latitude=37.5)

def plot_background(ax):
     ### set up bounding box surrounding specified station
    latmin=27.75
    latmax=48.25
    lonmin=-118.7
    lonmax=-83.75

    ax.set_extent([lonmin,lonmax,latmin,latmax])
    ax.coastlines('50m', edgecolor='black', linewidth=0.9)
    ax.add_feature(cfeature.STATES.with_scale('10m'), linewidth=1.2)

    return lonmin,lonmax,latmin,latmax

In [None]:
fig = plt.figure(figsize=(16,8.6))
ax = fig.add_subplot(1,1,1,projection=plotcrs)

lonmin, lonmax, latmin, latmax = plot_background(ax)

vtime = pd.to_datetime(ds.valid_time.values)
step = ds.step

fcst_lead = int((vtime - init) / pd.Timedelta('1 hour'))

print("lead "+str(fcst_lead)+", valid "+str(vtime))

lon2d, lat2d = np.meshgrid(ds.longitude,ds.latitude)

### height
cf = ax.contourf(lon2d, lat2d,
                 ds.z/9.80665,
                    np.arange(4800, 6060, 60), extend='both',
                 transform=ccrs.PlateCarree(),
                     transform_first=True)
cb = plt.colorbar(cf, shrink=0.9, pad=0.01)

cs1 = ax.contour(lon2d, lat2d,
                 ds.z/9.80665,
                    np.arange(4800, 6060, 60), colors='black',
                     linewidths=2, transform=ccrs.PlateCarree(),
                     transform_first=True)
plt.clabel(cs1, fontsize=11, inline=True, inline_spacing=5, fmt='%i',
                    rightside_up=True, use_clabeltext=True)

ax.set_title("ECMWF AIFS\n500-hPa geopotential height",
              loc="left", horizontalalignment='left', fontsize=10.5, fontweight='bold')
ax.set_title("initialized "+init.strftime("%H%M UTC %a %d %b %Y")+"\n"+str(fcst_lead)+"-h forecast valid "+vtime.strftime("%H%M UTC %a %d %b %Y"),
              loc="right", horizontalalignment='right', fontsize=9.5)

plt.show()

plt.close('all')

### now we could also easily get the IFS forecast valid at the same time, and see how they differ

In [None]:
H_ifs = Herbie(init.strftime("%Y-%m-%d %H:%M"), model="ifs", product="oper", fxx=fxx)  

#### unlike AIFS, the IFS gives geopotential height rather than geopotential

In [None]:
ds_ifs = H_ifs.xarray(":gh:500")
ds_ifs

### plot map

In [None]:
fig = plt.figure(figsize=(16,8.6))
ax = fig.add_subplot(1,1,1,projection=plotcrs)

lonmin, lonmax, latmin, latmax = plot_background(ax)

vtime = pd.to_datetime(ds_ifs.valid_time.values)
step = ds_ifs.step

fcst_lead = int((vtime - init) / pd.Timedelta('1 hour'))

print("lead "+str(fcst_lead)+", valid "+str(vtime))

lon2d, lat2d = np.meshgrid(ds_ifs.longitude,ds_ifs.latitude)

### height
cf = ax.contourf(lon2d, lat2d,
                 ds_ifs.gh,
                    np.arange(4800, 6060, 60), extend='both',
                 transform=ccrs.PlateCarree(),
                     transform_first=True)
cb = plt.colorbar(cf, shrink=0.9, pad=0.01)

cs1 = ax.contour(lon2d, lat2d,
                 ds_ifs.gh,
                    np.arange(4800, 6060, 60), colors='black',
                     linewidths=2, transform=ccrs.PlateCarree(),
                     transform_first=True)
#plt.clabel(cs1, fontsize=11, inline=True, inline_spacing=5, fmt='%i',
#                    rightside_up=True, use_clabeltext=True)

ax.set_title("ECMWF IFS\n500-hPa geopotential height",
              loc="left", horizontalalignment='left', fontsize=10.5, fontweight='bold')
ax.set_title("initialized "+init.strftime("%H%M UTC %a %d %b %Y")+"\n"+str(fcst_lead)+"-h forecast valid "+vtime.strftime("%H%M UTC %a %d %b %Y"),
              loc="right", horizontalalignment='right', fontsize=9.5)

plt.show()

plt.close('all')

### now let's look at the difference

In [None]:
z500_diff = ds.z/9.80665 - ds_ifs.gh

z500_diff

In [None]:
fig = plt.figure(figsize=(16,8.6))
ax = fig.add_subplot(1,1,1,projection=plotcrs)

lonmin, lonmax, latmin, latmax = plot_background(ax)

vtime = pd.to_datetime(z500_diff.valid_time.values)
step = z500_diff.step

fcst_lead = int((vtime - init) / pd.Timedelta('1 hour'))

print("lead "+str(fcst_lead)+", valid "+str(vtime))

lon2d, lat2d = np.meshgrid(z500_diff.longitude,z500_diff.latitude)

### height
cf = ax.contourf(lon2d, lat2d,
                 z500_diff,
                    np.arange(-60,66,6), extend='both',
                 cmap='bwr',
                 transform=ccrs.PlateCarree(),
                     transform_first=True)
cb = plt.colorbar(cf, shrink=0.9, pad=0.01)

ax.set_title("ECMWF AIFS minus IFS\n500-hPa geopotential height",
              loc="left", horizontalalignment='left', fontsize=10.5, fontweight='bold')
ax.set_title("initialized "+init.strftime("%H%M UTC %a %d %b %Y")+"\n"+str(fcst_lead)+"-h forecast valid "+vtime.strftime("%H%M UTC %a %d %b %Y"),
              loc="right", horizontalalignment='right', fontsize=9.5)

plt.show()

plt.close('all')

### you could get just about any model from Herbie this same way, just check the herbie docs for what the "products" are. Ensembles too!

#### NOAA and ECWMF models do have slightly different names for their variables as well

In [None]:
H_gfs = Herbie(init.strftime("%Y-%m-%d %H:%M"), model="gfs", product="pgrb2.0p25", fxx=fxx)  
ds_gfs = H_gfs.xarray(":HGT:500")

H_nam = Herbie(init.strftime("%Y-%m-%d %H:%M"), model="nam", product="awphys", fxx=fxx)  
ds_nam = H_nam.xarray(":HGT:500")


In [None]:
fig = plt.figure(figsize=(16,8.6))
ax = fig.add_subplot(1,1,1,projection=plotcrs)

lonmin, lonmax, latmin, latmax = plot_background(ax)

vtime = pd.to_datetime(ds_gfs.valid_time.values)
step = ds_gfs.step

fcst_lead = int((vtime - init) / pd.Timedelta('1 hour'))

print("lead "+str(fcst_lead)+", valid "+str(vtime))

lon2d, lat2d = np.meshgrid(ds_gfs.longitude,ds_gfs.latitude)

### height
cf = ax.contourf(lon2d, lat2d,
                 ds_gfs.gh,
                    np.arange(4800, 6060, 60), extend='both',
                 transform=ccrs.PlateCarree(),
                     transform_first=True)
cb = plt.colorbar(cf, shrink=0.9, pad=0.01)

cs1 = ax.contour(lon2d, lat2d,
                 ds_gfs.gh,
                    np.arange(4800, 6060, 60), colors='black',
                     linewidths=2, transform=ccrs.PlateCarree(),
                     transform_first=True)
plt.clabel(cs1, fontsize=11, inline=True, inline_spacing=5, fmt='%i',
                    rightside_up=True, use_clabeltext=True)

ax.set_title("NCEP GFS\n500-hPa geopotential height",
              loc="left", horizontalalignment='left', fontsize=10.5, fontweight='bold')
ax.set_title("initialized "+init.strftime("%H%M UTC %a %d %b %Y")+"\n"+str(fcst_lead)+"-h forecast valid "+vtime.strftime("%H%M UTC %a %d %b %Y"),
              loc="right", horizontalalignment='right', fontsize=9.5)

plt.show()

plt.close('all')

### now what gets really cool is FastHerbie, which uses multiprocessing to get lots of files at once (https://herbie.readthedocs.io/en/stable/user_guide/tutorial/fast.html) 

In [None]:
from herbie import FastHerbie

### get initializations two days apart
DATES = pd.date_range(start="2024-10-14 00:00",
                      periods=2,freq="1d")

# Create a range of forecast lead times
fxx = range(0, 132,6)

In [None]:
# Make FastHerbie Object.
FH = FastHerbie(DATES, model="ifs", product="oper", fxx=fxx)
FH.objects

In [None]:
# Get 500-hPa height for all in one dataset!
ds = FH.xarray(":gh:500:", remove_grib=False)


In [None]:
ds