In [None]:
import xarray as xr
import numpy as np
import scipy.stats
import src.evt
import metpy.calc
import metpy.units
import matplotlib.pyplot as plt
import seaborn as sns
import os.path
import cartopy.crs as ccrs
import matplotlib.patches as mpatches
import cmocean
import os
import time
import pathlib

## set plotting style
sns.set(rc={"axes.facecolor": "white", "axes.grid": False})

#### Functions

In [None]:
def load_whoi_data():
    """Load data originally obtained from WHOI's data server"""

    ## open pre-computed PNW data
    # data = xr.open_mfdataset("../data/*ure.nc").compute()
    data = xr.open_dataset("../data/whoi_data_US.nc")

    return data


def plot_setup_simple(fig, projection, lon_range, lat_range):
    """Add a subplot to the figure with the given map projection
    and lon/lat range. Returns an Axes object."""

    ## Create subplot with given projection
    ax = fig.add_subplot(projection=projection)

    ## Subset to given region
    extent = [*lon_range, *lat_range]
    ax.set_extent(extent, crs=ccrs.PlateCarree())

    ## draw coastlines
    ax.coastlines(linewidths=0.5)

    return ax


def plot_setup_US(fig):
    """Plot Pacific region"""

    ## Make projection
    proj = ccrs.Orthographic(central_longitude=255, central_latitude=35)
    # proj = ccrs.PlateCarree(central_longitude=240)
    proj._threshold /= 1000
    ax = plot_setup_simple(fig, proj, lon_range=[230, 290], lat_range=[15, 60])

    ## Plot bartusek's box
    ax.add_patch(
        mpatches.Rectangle(
            xy=[-130, 40],
            width=20,
            height=20,
            facecolor="none",
            edgecolor="magenta",
            transform=ccrs.PlateCarree(),
            zorder=10,
        )
    )

    return ax


def make_cb_range(amp, delta):
    """Make colorbar_range for cmo.balance"""
    return np.concatenate(
        [np.arange(-amp, 0, delta), np.arange(delta, amp + delta, delta)]
    )


def get_mse(data):
    """compute moist static energy"""

    ## get height
    geopot = data["geopotential"] * metpy.units.units("m^2/s^2")
    height = metpy.calc.geopotential_to_height(geopot)

    ## add units to temp, humidity
    temp = data["temperature"] * metpy.units.units.kelvin
    q = data["specific_humidity"] * metpy.units.units("kg/kg")

    # compute MSE
    mse = metpy.calc.moist_static_energy(
        height=height, temperature=temp, specific_humidity=q
    )

    return mse

# Load and prep data

In [None]:
## load data
data = load_whoi_data()

## drop un-needed vars
data = data.drop_vars(["d2m", "sp"]).metpy.dequantify()

## Compute annual max
data_annual_max = data.groupby("time.year").max()
year = data_annual_max.year

Function to compute return period

In [105]:
def get_tr_max(X):
    """estimate return period for maximum value w/ and w/o LOO training.
    Function takes in a 1D np.array"""

    ## Fit model and get return levels
    bounds = dict(c=[-2, 2], loc=[200, 400], scale=[-1e2, 1e2])
    
    ## Get indices for "Leave-one-out" version of data
    LOO_idx = np.array([i for i in range(len(X)) if i!=np.argmax(X)])
    
    ## fit models
    kwargs = dict(model_class = scipy.stats.genextreme, bounds=bounds)
    model = src.evt.fit_model(X, **kwargs)
    model_LOO = src.evt.fit_model(X[LOO_idx], **kwargs)
    
    ## Get return periods for each model
    Xr, tr = src.evt.get_return_levels(model, return_periods=np.logspace(0.01, 5, 100))
    Xr_LOO, _ = src.evt.get_return_levels(model_LOO, return_periods=np.logspace(0.01, 5, 100))
    
    # ## Empirical return period
    tr_empirical, Xr_empirical = src.evt.get_empirical_return_period(X)
    
    ## compute estimated return time for max event
    tr_max = tr[np.argmin(np.abs(Xr_empirical[-1] - Xr))]
    tr_max_LOO = tr[np.argmin(np.abs(Xr_empirical[-1] - Xr_LOO))]

    return tr_max + 1j*tr_max_LOO

#### Do the computation (slow!)

In [None]:
print("Computing for T2m...")
t1 = time.time()
tr_max_t2m = xr.apply_ufunc(
    get_tr_max, 
    data_annual_max["t2m"], 
    input_core_dims=[["year"]], 
    vectorize=True
)
t2 = time.time()
print(f"{t2-t1:.2f}")

## save to file
tr_max_t2m.to_netcdf(pathlib.Path(os.environ['DATA_FP']) / "tr_max_T2m.nc")

print("Computing for mse...")
t1 = time.time()
tr_max_mse = xr.apply_ufunc(
    get_tr_max, 
    data_annual_max["mse"], 
    input_core_dims=[["year"]], 
    vectorize=True
)
t2 = time.time()
print(f"{t2-t1:.2f}")

## save to file
tr_max_mse.to_netcdf(pathlib.Path(os.environ['DATA_FP']) / "tr_max_mse.nc")

Computing for T2m...
