In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression
import xarray as xr
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('default')
sns.set_palette("colorblind")
from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.weight'] = 'light'
rcParams['mathtext.fontset'] = 'cm'
rcParams['mathtext.rm'] = 'serif'
mpl.rcParams["figure.dpi"] = 500
import cartopy.crs as ccrs
import cartopy as ct
import matplotlib.colors as c
import regionmask
import cmasher as cmr
import scipy
from cartopy.util import add_cyclic_point
mpl.rcParams['hatch.linewidth'] = 0.375
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from geocat.comp import eofunc_eofs, eofunc_pcs
from datetime import datetime
import warnings
from matplotlib.patches import Rectangle
import pdo_functions
import importlib
importlib.reload(pdo_functions)
import random
import numba
import statsmodels.api as sm
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
import pandas as pd

In [2]:
# Open CAPE data
cape = xr.open_mfdataset('/hurrell-scratch2/ivyglade/pdo/wrf_cape/era5_daily_max_mucape_*s.nc', combine='nested', concat_dim='year')['__xarray_dataarray_variable__']

In [3]:
# Open S06 data
s06 = xr.open_mfdataset('/hurrell-scratch2/ivyglade/pdo/ERA5_S06/era5_s06_*s.nc', combine='nested', concat_dim='time')['__xarray_dataarray_variable__']

In [4]:
# Open NDSEV data
ndsev = xr.open_dataset('/hurrell-scratch2/ivyglade/pdo/ndsev/diff_trapp_ndsev_mam_1940-2024.nc')['__xarray_dataarray_variable__']

In [9]:
# Select the CONUS
cape_conus = cape.sel(longitude=slice(pdo_functions.convert_longitudes(-125), pdo_functions.convert_longitudes(-66))).sel(latitude=slice(49, 24))
s06_conus  = s06.sel(longitude=slice(pdo_functions.convert_longitudes(-125), pdo_functions.convert_longitudes(-66))).sel(latitude=slice(49, 24))
ndsev_conus  = ndsev.sel(longitude=slice(pdo_functions.convert_longitudes(-125), pdo_functions.convert_longitudes(-66))).sel(latitude=slice(49, 24))

In [6]:
# Need to compute monthly means
ndsev_monthly = np.zeros((85, 101, 237))
for i in range(85):
    # Load in one year of data
    ndsev_monthly[i] = ndsev_conus.isel(time=slice(i*92, (i+1)*92)).transpose('time', 'latitude', 'longitude').values.sum(axis=0)

    # Sum up NDSEV for Mar, Apr, May, respectively
    # ndsev_monthly[i*3] = ndsev_load[0:31].sum(axis=0)
    # ndsev_monthly[i*3+1] = ndsev_load[31:61].sum(axis=0)
    # ndsev_monthly[i*3+2] = ndsev_load[61:].sum(axis=0)

    # Progress
    print(f'{1940+i} is complete.')

1940 is complete.
1941 is complete.
1942 is complete.
1943 is complete.
1944 is complete.
1945 is complete.
1946 is complete.
1947 is complete.
1948 is complete.
1949 is complete.
1950 is complete.
1951 is complete.
1952 is complete.
1953 is complete.
1954 is complete.
1955 is complete.
1956 is complete.
1957 is complete.
1958 is complete.
1959 is complete.
1960 is complete.
1961 is complete.
1962 is complete.
1963 is complete.
1964 is complete.
1965 is complete.
1966 is complete.
1967 is complete.
1968 is complete.
1969 is complete.
1970 is complete.
1971 is complete.
1972 is complete.
1973 is complete.
1974 is complete.
1975 is complete.
1976 is complete.
1977 is complete.
1978 is complete.
1979 is complete.
1980 is complete.
1981 is complete.
1982 is complete.
1983 is complete.
1984 is complete.
1985 is complete.
1986 is complete.
1987 is complete.
1988 is complete.
1989 is complete.
1990 is complete.
1991 is complete.
1992 is complete.
1993 is complete.
1994 is complete.
1995 is co

In [12]:
# realigning dimensions
cape_conus_stack = cape_conus.stack(time=('year', 'date'))

year_bcast, date_bcast = np.meshgrid(cape_conus['year'].values, cape_conus['date'].values, indexing='ij')

year_strs = year_bcast.astype(str)
date_strs = date_bcast.astype(str)

full_date_strs = np.char.add(np.char.add(year_strs, '-'), date_strs)

time = pd.to_datetime(full_date_strs.ravel())

cape_conus_stack = cape_conus_stack.assign_coords(time=('time', time))

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array.reshape(shape)

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    >>> array.reshape(shape, limit='128 MiB')
  return func(*args, **kwargs)
  cape_conus_stack = cape_conus_stack.assign_coords(time=('time', time))


In [13]:
# Select MAM and resample to monthly means
cape_conus_mam = cape_conus_stack.sel(time=cape_conus_stack.time.dt.month.isin([3, 4, 5])).resample(time='YE').mean().dropna(dim='time').load()
s06_conus_mam = s06_conus.sel(time=s06_conus.time.dt.month.isin([3, 4, 5])).resample(time='YE').mean().dropna(dim='time').load()

In [14]:
# Convert to XR
ndsev_conus_mam = xr.DataArray(ndsev_monthly, coords={'time':cape_conus_mam['time'], 'latitude':cape_conus_mam['latitude'], 'longitude':cape_conus_mam['longitude']}, dims=['time', 'latitude', 'longitude'])

In [15]:
# degree 2 polynomial detrend
cape_conus_mam_detrend = pdo_functions.detrend_dim(cape_conus_mam, 'time', 2)
s06_conus_mam_detrend = pdo_functions.detrend_dim(s06_conus_mam, 'time', 2)
ndsev_conus_mam_detrend = pdo_functions.detrend_dim(ndsev_conus_mam, 'time', 2)

In [16]:
# Remove the seasonal cycle
cape_conus_mam_anoms = cape_conus_mam_detrend.groupby('time.month') #- cape_conus_mam_detrend.groupby('time.month').mean()
s06_conus_mam_anoms = s06_conus_mam_detrend.groupby('time.month') #- s06_conus_mam_detrend.groupby('time.month').mean()
ndsev_conus_mam_anoms = ndsev_conus_mam_detrend.groupby('time.month') #- ndsev_conus_mam_detrend.groupby('time.month').mean()

In [17]:
# Open SST data and compute the PDO and Nino3.4
sst = xr.open_dataset('/hurrell-scratch2/ivyglade/pdo/HadISST_sst.nc')['sst']

pdo = pdo_functions.pdo_from_hadisst(sst, 1980, 2010)

nino_34 = pdo_functions.calc_nino_34_timeseries(sst, False, 1980, 2010)

In [18]:
# Standardize Nino3.4
nino_34_std = (nino_34 - nino_34.mean()) / nino_34.std()

In [20]:
# Convert pdo to xarray
pdo_xr = xr.DataArray(pdo, coords={'time':nino_34['time']}, dims=['time'])

# Subset only 1940-2024
pdo_1940_2024 = pdo_xr.sel(time=pdo_xr.time.dt.year.isin(np.arange(1940, 2025, 1)))
# nino_34_1940_2024 = nino_34_std.sel(time=nino_34_std.time.dt.year.isin(np.arange(1940, 2025, 1)))

# only MAM
pdo_1940_2024_mam = pdo_1940_2024.sel(time=pdo_1940_2024.time.dt.month.isin([3, 4, 5]))#.resample(time='YE').mean().dropna(dim='time')
# nino_34_1940_2024_mam = nino_34_1940_2024.sel(time=nino_34_1940_2024.time.dt.month.isin([3, 4, 5]))#.resample(time='YE').mean().dropna(dim='time')

In [21]:
# 3- month rolling mean
nino_34_rolling = nino_34.rolling(time=3, center=True).mean()

nino_34_calc = np.zeros((1860))
for i in range(1860):
    if i < 5:
        continue
    else:
        if (nino_34_rolling[(i-4)] > 0.5) & (nino_34_rolling[(i-3)] > 0.5) & (nino_34_rolling[(i-2)] > 0.5) & (nino_34_rolling[(i-1)] > 0.5) & (nino_34_rolling[i] > 0.5):
            nino_34_calc[i] = 1
        elif (nino_34_rolling[(i-4)] < -0.5) & (nino_34_rolling[(i-3)] < -0.5) & (nino_34_rolling[(i-2)] < -0.5) & (nino_34_rolling[(i-1)] < -0.5) & (nino_34_rolling[i] < -0.5):
            nino_34_calc[i] = -1
        else:
            continue

# convert nino_34_calc to xr
nino_34_calc_xr = xr.DataArray(nino_34_calc, coords={'time':sst['time']}, dims=['time'])

# 1940-2024 only
nino_34_1940_2024 = nino_34_calc_xr.sel(time=nino_34_calc_xr.time.dt.year.isin(np.arange(1940, 2025, 1)))
nino_34_1940_2024_mam = nino_34_1940_2024.sel(time=nino_34_1940_2024.time.dt.month.isin([3, 4, 5]))

In [None]:
lat_len = len(cape_conus_mam_anoms.latitude)
lon_len = len(cape_conus_mam_anoms.longitude)

In [None]:
np.shape(ndsev_conus_mam_anoms)

In [None]:
coef      = np.full((lat_len, lon_len, 2), np.nan)
intercept = np.full((lat_len, lon_len), np.nan)
r2_partial = np.full((lat_len, lon_len, 2), np.nan)  # for partial R²
r2_total = np.full((lat_len, lon_len), np.nan)

model = LinearRegression()

for i in range(lat_len):
    for j in range(lon_len):
        PDO = pdo_1940_2024_mam.values
        Nino = nino_34_1940_2024_mam.values
        cape = ndsev_conus_mam_anoms[:, i, j]

        X_full = np.column_stack([PDO, Nino])

        valid = ~np.isnan(X_full).any(axis=1) & ~np.isnan(cape)

        if np.sum(valid) >= 3:
            X_valid = X_full[valid]
            y_valid = cape[valid]

            # Fit full model
            model.fit(X_valid, y_valid)
            y_pred_full = model.predict(X_valid)
            r2_full = r2_score(y_valid, y_pred_full)

            r2_total[i, j] = model.score(X_valid, y_valid)

            coef[i, j, :] = model.coef_
            intercept[i, j] = model.intercept_

            # Residuals of y after removing effect of Nino
            model.fit(X_valid[:, [1]], y_valid)
            resid_y_nino = y_valid - model.predict(X_valid[:, [1]])
            
            # Residuals of PDO after removing effect of Nino
            model.fit(X_valid[:, [1]], X_valid[:, 0])
            resid_pdo_nino = X_valid[:, 0] - model.predict(X_valid[:, [1]])
            
            # Partial R² for PDO
            if np.std(resid_y_nino) > 0 and np.std(resid_pdo_nino) > 0:
                r_pdo, _ = pearsonr(resid_y_nino, resid_pdo_nino)
                r2_partial[i, j, 0] = r_pdo ** 2
            
            # Repeat for Nino
            model.fit(X_valid[:, [0]], y_valid)
            resid_y_pdo = y_valid - model.predict(X_valid[:, [0]])
            
            model.fit(X_valid[:, [0]], X_valid[:, 1])
            resid_nino_pdo = X_valid[:, 1] - model.predict(X_valid[:, [0]])
            
            if np.std(resid_y_pdo) > 0 and np.std(resid_nino_pdo) > 0:
                r_nino, _ = pearsonr(resid_y_pdo, resid_nino_pdo)
                r2_partial[i, j, 1] = r_nino ** 2

    print(f'latitude {i+1} out of {lat_len} is complete.')

In [None]:
pdo_coef = coef.swapaxes(0, 2).swapaxes(1, 2)[0]
nino_coef = coef.swapaxes(0, 2).swapaxes(1, 2)[1]

In [None]:
pdo_r_2 = r2_partial.swapaxes(0, 2).swapaxes(1, 2)[0]
nino_r_2 = r2_partial.swapaxes(0, 2).swapaxes(1, 2)[1]

In [None]:
# IDing when the regression slope is positive
pdo_pos_coef = np.where(pdo_coef > 0, 1, pdo_coef)
nino_pos_coef = np.where(nino_coef > 0, 1, nino_coef)

In [None]:
# IDing when the regression slope is negative
pdo_coef_sign = np.where(pdo_pos_coef < 0, -1, 1)
nino_coef_sign = np.where(nino_pos_coef < 0, -1, 1)

In [None]:
# Calculating r from r^2 
pdo_r = pdo_coef_sign * np.sqrt(pdo_r_2)
nino_r = nino_coef_sign * np.sqrt(nino_r_2)

In [None]:
# Calculating the t-statistic
pdo_t = pdo_r * ((255-2)**0.5) / ((1-pdo_r **2)**0.5)
nino_t = nino_r * ((255-2)**0.5) / ((1-nino_r **2)**0.5)

In [None]:
# Calculating the p-values
pdo_p = 2 * (1 - scipy.stats.t.cdf(abs(pdo_t), 255-2))
nino_p = 2 * (1 - scipy.stats.t.cdf(abs(nino_t), 255-2))

In [None]:
# Calculating adjusted p-values to account for the False Discovery Rate
pdo_adj_p = pdo_functions.control_FDR(pdo_p, 101, 237)
nino_adj_p = pdo_functions.control_FDR(nino_p, 101, 237)

In [None]:
# Evaluating significance
pdo_sig = np.where(pdo_p < pdo_adj_p, 3, 0)
nino_sig = np.where(nino_p < nino_adj_p, 3, 0)

In [None]:
# land mask
land_110 = regionmask.defined_regions.natural_earth_v4_1_0.land_110
era5_land = xr.where(land_110.mask_3D(cape_conus)==True, 1, np.nan).squeeze()

In [None]:
fig, ax = plt.subplots(1, 2, subplot_kw=dict(projection=ccrs.AlbersEqualArea(central_longitude=-97, central_latitude=36.5)))

bounds = [-1, -0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, -0.025, 0.025, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
norm = c.BoundaryNorm(bounds, plt.get_cmap('cmr.fusion_r').N)

for i in range(2):
    ax[i].coastlines(lw=0.25, color='xkcd:gunmetal')
    ax[i].spines['geo'].set_linewidth(.25)
    ax[i].spines['geo'].set_edgecolor('xkcd:gunmetal')
    ax[i].add_feature(ct.feature.STATES, lw=0.25, edgecolor='xkcd:gunmetal')

ax[0].pcolormesh(cape_conus['longitude'], cape_conus['latitude'], pdo_coef*era5_land, transform=ccrs.PlateCarree(), shading='auto', norm=norm, cmap='cmr.fusion_r')
ax[1].pcolormesh(cape_conus['longitude'], cape_conus['latitude'], nino_coef*era5_land, transform=ccrs.PlateCarree(), shading='auto', norm=norm, cmap='cmr.fusion_r')

# ax[0].contourf(cape_conus['longitude'], cape_conus['latitude'], pdo_sig*era5_land, transform=ccrs.PlateCarree(), hatches=[None, '\\\\\\\\\\\\\\\\\\'], colors=None, alpha=0) 
# ax[1].contourf(cape_conus['longitude'], cape_conus['latitude'], nino_sig*era5_land, transform=ccrs.PlateCarree(), hatches=[None, '\\\\\\\\\\\\\\\\\\'], colors=None, alpha=0)

cax = plt.axes([0.1, .2, 0.8, 0.02])
cbar = plt.colorbar(mpl.cm.ScalarMappable(cmap='cmr.fusion_r', norm=norm), cax=cax, orientation='horizontal', spacing='proportional', extend='both', \
                    ticks=[-1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1])
cbar.set_label(r'days month$^{-1}$ standard deviation$^{-1}$', size=10, fontweight='normal', color='black')
cbar.ax.tick_params(which='both', labelsize=10, width=0.5, length=0, labelcolor='black')
cbar.outline.set_linewidth(0.5)
cbar.outline.set_color('black')
# cbar.outline.set_visible(False)

ax[0].set_title('PDO')
ax[1].set_title('Nino3.4')

plt.subplots_adjust(left=0,
                    bottom=0.25, 
                    right=1., 
                    top=0.62, 
                    wspace=0.05, 
                    hspace=0.05)

In [None]:
fig, ax = plt.subplots(1, 2, subplot_kw=dict(projection=ccrs.AlbersEqualArea(central_longitude=-97, central_latitude=36.5)))

bounds = [-0.42, -0.36, -0.3, -0.24, -0.18, -0.15, -0.12, -0.09, -0.06, -0.045, -0.03, -0.015, 0.015, 0.03, 0.045, 0.06, 0.09, 0.12, 0.15, 0.18, 0.24, 0.3, 0.36, 0.42]

norm = c.BoundaryNorm(bounds, plt.get_cmap('cmr.fusion_r').N)

for i in range(2):
    ax[i].coastlines(lw=0.25, color='xkcd:gunmetal')
    ax[i].add_feature(ct.feature.STATES, edgecolor='xkcd:gunmetal', lw=0.25)
    ax[i].spines['geo'].set_linewidth(0.25)
    ax[i].spines['geo'].set_edgecolor('xkcd:gunmetal')

ax[0].pcolormesh(cape_conus['longitude'], cape_conus['latitude'], pdo_r*era5_land, transform=ccrs.PlateCarree(), cmap='cmr.fusion_r', norm=norm)

ax[1].pcolormesh(cape_conus['longitude'], cape_conus['latitude'], nino_r*era5_land, transform=ccrs.PlateCarree(), cmap='cmr.fusion_r', norm=norm)

cax = plt.axes([0.1, .2, 0.8, 0.02])
cbar = plt.colorbar(mpl.cm.ScalarMappable(cmap='cmr.fusion_r', norm=norm), cax=cax, orientation='horizontal', spacing='proportional', extend='both', \
                    ticks=[-0.42, -0.3, -0.18, -0.09, 0, 0.09, 0.18, 0.3, 0.42])
cbar.set_label(r'correlation (r)', size=10, fontweight='normal', color='black')
cbar.ax.tick_params(which='both', labelsize=10, width=0.5, length=0, labelcolor='black')
cbar.outline.set_linewidth(0.5)
cbar.outline.set_color('black')
# cbar.outline.set_visible(False)

ax[0].set_title('PDO')
ax[1].set_title('Nino3.4')

plt.subplots_adjust(left=0,
                    bottom=0.25, 
                    right=1., 
                    top=0.62, 
                    wspace=0.05, 
                    hspace=0.05)

In [None]:
# full cmap
full_cmap = cmr.fusion_r

redside = c.LinearSegmentedColormap.from_list('fusion_r_red', full_cmap(np.linspace(0.5, 1, 256)))

In [None]:
fig, ax = plt.subplots(1, 2, subplot_kw=dict(projection=ccrs.AlbersEqualArea(central_longitude=-97, central_latitude=36.5)))

bounds = np.arange(0, 0.105, 0.005)
norm = c.BoundaryNorm(bounds, redside.N)

for i in range(2):
    ax[i].coastlines(lw=0.25, color='xkcd:gunmetal')
    ax[i].spines['geo'].set_linewidth(0.25)
    ax[i].spines['geo'].set_edgecolor('xkcd:gunmetal')
    ax[i].add_feature(ct.feature.STATES, edgecolor='xkcd:gunmetal', lw=0.25)

# ax[0].pcolormesh(cape_conus['longitude'], cape_conus['latitude'], r2_total*100*era5_land, transform=ccrs.PlateCarree(), shading='auto', norm=norm, cmap='Reds')
ax[0].pcolormesh(cape_conus['longitude'], cape_conus['latitude'], pdo_r_2*era5_land, transform=ccrs.PlateCarree(), shading='auto', norm=norm, cmap=redside)
ax[1].pcolormesh(cape_conus['longitude'], cape_conus['latitude'], nino_r_2*era5_land, transform=ccrs.PlateCarree(), shading='auto', norm=norm, cmap=redside)

ax[0].contourf(cape_conus['longitude'], cape_conus['latitude'], pdo_sig*era5_land, transform=ccrs.PlateCarree(), hatches=[None, '\\\\\\\\\\\\\\\\\\'], colors=None, alpha=0) 
ax[1].contourf(cape_conus['longitude'], cape_conus['latitude'], nino_sig*era5_land, transform=ccrs.PlateCarree(), hatches=[None, '\\\\\\\\\\\\\\\\\\'], colors=None, alpha=0)

cax = plt.axes([0.1, .2, 0.8, 0.02])
cbar = plt.colorbar(mpl.cm.ScalarMappable(cmap=redside, norm=norm), cax=cax, orientation='horizontal', spacing='proportional', extend='max', \
                    ticks=[0, 0.02, 0.04, 0.06, 0.08, 0.1])
cbar.set_label(r'r$^{2}$', size=10, fontweight='normal', color='black')
cbar.ax.tick_params(which='both', labelsize=10, width=0.5, length=0, labelcolor='black')
cbar.outline.set_linewidth(0.5)
cbar.outline.set_color('black')
# cbar.outline.set_visible(False)

# ax[0].set_title('Both')
ax[0].set_title('PDO')
ax[1].set_title('Nino3.4')

plt.subplots_adjust(left=0,
                    bottom=0.25, 
                    right=1., 
                    top=0.62, 
                    wspace=0.05, 
                    hspace=0.05)