In [3]:
# -------------------------------- Import necessary modules
%run read_redata.ipynb
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.basemap import Basemap
import matplotlib.ticker as ticker
from scipy import signal
import pandas as pd
import scipy.stats as ss
import seaborn as sns
import xesmf as xe
import xarray as xr
import sys
from datetime import datetime
from dateutil.relativedelta import relativedelta
from eofs.standard import Eof
from eofs.multivariate.standard import MultivariateEof


In [4]:
# Helper function to check if month is in NOV
def is_nov(month):
    return (month==11)

In [5]:
# Helper function to check if month is in DEC
def is_dec(month):
    return(month==12)

In [6]:
# Helper function to calculate standardized anomalies
def calc_std_anom(data):
    #Calculate monthly climatological mean and standard deviation
    data_mean = data.groupby('time.month').mean(dim='time')
    data_std  = data.groupby('time.month').std(dim='time')

    #Apply function to compute standardized anomalies
    data_std_anom = xr.apply_ufunc(lambda x, m, s: (x - m) / s, data.groupby('time.month'), data_mean, data_std)
        
    # Return data
    return data_std_anom

In [7]:
# Helper function to calculate composites by filtering by principal component (PC) values
def calc_comp(data, pcs):
    # Get composites for time steps above 1 standard deviation of PC values
    comp_p = np.nanmean(data[pcs > (np.std(pcs)*0.5), :, :], axis=0)

    return comp_p

In [8]:
def calc_eof(lats, data, neofs, npcs, multi = False):
    """
    This function will use the Python eofs package to perform standard or extended EOF analysis using the given input data.
    
    Parameters:
    -"lats" refers to the array of latitude values associated with the input data - should be the same for all input datasets 
    -"data" is the data to be used in EOF analysis - should be a list with each element being an array, max 3 input arrays
    -"neofs" refers to the number of EOFs to be returned
    -"npcs" is the number of PC time series to be returned 
    -"multi" should be set to "False" for standard and not multivariate EOF

    Returns:
    Output from this function is a dictionary with EOF spatial patterns corresponding to the input data ("eofs" or "eofs1/2/3"),
    PC values ("pcs", variance explained by each EOF ("var"), and eigenvalues ("eigs") for each EOF (multivariate only).
    
    Author: Carolina Bieri (bieri2@illinois.edu)
    """
    
    # Define latitude weights 
    coslat = np.cos(np.deg2rad(lats))
    wgts   = np.sqrt(coslat)[..., np.newaxis]
        
    # Do this if multivariate EOF
    if multi:
        # Create a multivariate EOF solver to do the EOF analysis. Square-root of cosine of
        # latitude weights are applied before the computation of EOFs.
 
        # Do this if 2 datasets are passed to function
        if len(data) == 2:
            # Instantiate solver
            msolver      = MultivariateEof(data, weights = [wgts, wgts])
            # Calculate EOFs
            # scaling = 1 means that eigenvectors are divided by the sq root of eigenvalue
            # scaling = 2 means that eigenvectors are multiplied by the sq root of eigenvalue
            # See eofs package documentation: https://ajdawson.github.io/eofs/latest/index.html
            eofs1, eofs2 = msolver.eofs(neofs = neofs, eofscaling = 2)
            # Get desired number of PC value arrays
            pcs          = msolver.pcs(npcs = npcs, pcscaling = 1)
            # Get variance fractions
            variance_fraction = msolver.varianceFraction(neofs)
        
            # Create dictionary to hold output
            calc = {"eofs1" : eofs1,
                    "eofs2" : eofs2,
                    "pcs"   : pcs,
                    "var"   : variance_fraction,
                    "north" : msolver.northTest(neofs, vfscaled=True)
                   }
        
        # Do this if 3 datasets are passed to function
        if len(data) == 3:
            # Similar process as above, but with 3 datasets
            msolver     = MultivariateEof(data, weights = [wgts, wgts, wgts])
            eofs1, eofs2, eofs3 = msolver.eofs(neofs = neofs, eofscaling = 2)
            pcs         = msolver.pcs(npcs = npcs, pcscaling = 1)
            variance_fraction = msolver.varianceFraction(neofs)
        
            calc = {"eofs1" : eofs1,
                    "eofs2" : eofs2,
                    "eofs3" : eofs3,
                    "pcs"   : pcs,
                    "var"   : variance_fraction 
                    }
            
    # Else do this if not multivariate EOF
    else:
        # Create an EOF solver to do the EOF analysis. Square-root of cosine of
        # latitude weights are applied before the computation of EOFs.
        solver    = Eof(data, wgts)
        
        # Get EOFs, PCs, and variance fractions
        # EOF scaling = 1 means that eigenvectors are divided by the sq root of eigenvalue
        # EOF scaling = 2 means that eigenvectors are multiplied by the sq root of eigenvalue
        eof       = solver.eofs(neofs = neofs, eofscaling = 2)
        pc        = solver.pcs(npcs = npcs, pcscaling = 1)
        variance_fraction = solver.varianceFraction(10)
        
        # Define dictionary to hold output 
        calc = {"eofs" : eof,
                "pcs"  : pc,
                "var"  : variance_fraction}
        
        
    return calc

In [9]:
def plot_eof(lons, lats, eofs, neofs, vmin, vmax, var, sm = False, pcp = False):
    """
    This function will plot multivariate EOF output from the calc_eof function in a standard format.
    
    Parameters:
    -"lons" is the lon array corresponding to the input EOF arrays
    -"lats" is the lat array corresponding to the input EOF arrays 
    -"eofs" is an array containing the EOF patterns to be plotted
    -"neofs" is the number of EOFs to be plotted 
    -"vmin" is the lowest value to be plotted
    -"vmax" is the highest values to be plotted
    -"sm" should be set to True if SM EOFs are being plotted
    -"pcp" should be set to True if precip EOFs are being plotted
    
    Returns:
    One plot is created for each EOF.
    """
    
    # Define subplots
    fig, axes = plt.subplots(1, neofs, figsize = (8,4))
    
    # Plot each EOF
    for i in range(neofs):
        # Define bins for discrete colorbar
        bins = np.arange(vmin, vmax + 0.1, 0.1)
        number_bins = len(bins)-1

        # Create map
        map = Basemap(resolution = 'l', projection = 'cyl', llcrnrlon = min(lons), urcrnrlon = max(lons), 
                     llcrnrlat = min(lats), urcrnrlat = max(lats), lat_0 = 0, lon_0 = 0, ax = axes[i])
        
        lon_plt, lat_plt = np.meshgrid(np.asarray(lons), np.asarray(lats))
    
        xi, yi = map(np.asarray(lon_plt), np.asarray(lat_plt))
        
        # Define colormap colormap based on variable
        if sm:
            cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#6c3811", "white", "#21A926"])
        elif pcp:
            cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#6c3811", "white", "#179DC9"])
        else: 
            cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#131DBF", "white", "#C91717"])
        
        # Plot the spatial pattern for the current EOF
        fill = map.contourf(xi, yi, eofs[i,:,:], levels=bins, cmap = cmap,vmin=vmin,vmax=vmax, extend='both')
    
        map.drawcoastlines()
        map.drawcountries()
        
        # Add colorbar with tickmarks in scientific notation
        cb = fig.colorbar(fill, orientation='horizontal',ax=axes[i], pad=0.05, ticks=[-0.65, -0.35, 0, 0.35, 0.65], label='Normalized units')
        cb.ax.tick_params(labelsize=11)
        # Set number of tick marks on colorbar
        #tick_locator = ticker.MaxNLocator(nbins=13)
        #cb.locator = tick_locator
        #cb.update_ticks()
        
        # Add title to each plot
        axes[i].set_title('EOF {:1d} - {:2.2%}'.format(i+1, var[i]), fontsize=16)
    
    plt.show()

In [11]:
def plot_pc(time, pcs, ymin, ymax, yearbegin = 1980, yearend = 2016, lw = 3):
    """
    Function to plot principal component (PC) time series for EOFs. 
    
    Parameters:
    -"time" is an array which includes the time values (should be in years) to be plotted on the x-axis.
    -"pcs" is an array including the PC values to be plotted. pcs[:,0] should include the values for the first EOF
            while pcs[:,1] should include the values for the second EOF.
    -"ymin" is the minimum y-value to be plotted.
    -"ymax" is the maximum y-value to be plotted.
    -"yearbegin" is the minimum time value to be plotted.
    -"yearend" is the maximum time value to be plotted. 
    -"lw" is the line width to be used for the PC time series plots
    
    Returns:
    Two PC time series plots, one corresponding to EOF 1 and the other to EOF 2. 
    """
    
    # Create subplots that will have identical x axes 
    plt.subplots(2, 1, figsize = (8, 6), sharex = True)

    # Plot PC 1 on first subplot
    plt.subplot(211)
    plt.plot(time, pcs[:,0], color = 'skyblue', linewidth = lw)
    plt.title('a) Principal component time series - EOF 1', size = 16, loc = 'left')
    plt.xticks(fontsize = 14)
    plt.yticks(fontsize = 14)
    plt.xlim(yearbegin, yearend)
    plt.ylim(ymin, ymax)
    plt.ylabel('Normalized units')
    # Plot horizontal line indicating 1 STD above the mean
    plt.hlines(np.std(pcs[:, 0])*0.5, np.min(time), np.max(time), linestyles = 'dashed')
    # Highlight points above 1 STD
    plt.scatter(time[(pcs[:, 0] > (np.std(pcs[:, 0])*0.5)).nonzero()], pcs[:, 0][pcs[:, 0] > (np.std(pcs[:, 0])*0.5)],
                zorder = 10, color = 'black', s = 20)

    # Plot PC 2 on second subplot
    plt.subplot(212)
    plt.plot(time, pcs[:, 1], color = 'cornflowerblue', linewidth = lw)
    plt.title('b) Principal component time series - EOF 2', size = 16, loc = 'left')
    plt.xticks(fontsize = 14)
    plt.yticks(fontsize = 14)
    plt.xlim(yearbegin, yearend)
    plt.ylim(ymin, ymax)
    plt.ylabel('Normalized units')
    plt.hlines(np.std(pcs[:, 1])*0.5, np.min(time), np.max(time), linestyles = 'dashed')
    plt.scatter(time[(pcs[:, 1] > (np.std(pcs[:, 0])*0.5)).nonzero()], pcs[:, 1][pcs[:, 1] > (np.std(pcs[:, 0])*0.5)], 
                zorder = 10, color = 'black', s = 20)

    plt.tight_layout()
    plt.subplots_adjust(left = None, bottom = None, right = None, top = None, wspace = None, hspace = 0.6)
    plt.show()

In [12]:
def plot_comp(lon, lat, comp, title, cmap, eofnum, interval, ticks, filename,
              vmin, vmax, extend='both', label='Standardized anomaly'):
    """
    This function plots a spatial composite of reanalysis data. Should be used in conjunction with
    calc_comp. Input data should be in units of standardized anomalies. 
    
    Parameters:
    -lon: NumPy array. Contains the longitude values to be plotted.
    -lat: NumPy array. Contains the latitude values to be plotted. 
    -comp: NumPy array. Conatins composite values returned from the calc_comp function. 
    -title: String. Desired title of the composite plot. 
    -cmap: String. Color map to be used. 
    -interval: Float. The interval to be used when calculating bins for color bar.
    -ticks: NumPy array. Specifies color bar tick labels.
    -vmin: Minimum value to be plotted on the color bar. Default -1.5 standard deviations. 
    -vmax: Maximum value to be plotted on the color bar. Default 1.5 standard deviations.
    -filename: Name of output file. 
    
    Returns:
    Plots composite figures using input data. Figure saved to a file with a name specified
    by filename. 
    """
    
    # Calculate bins to be used for color bar
    bins = np.arange(vmin, vmax+interval, interval)
    print(bins)
    number_bins = len(bins) - 1

    # Create figure and add basemap
    plt.figure(figsize = (8, 8))
    map = Basemap(resolution='l', projection='cyl', llcrnrlon=min(lon), urcrnrlon=max(lon), 
                     llcrnrlat=min(lat), urcrnrlat=max(lat), lat_0=0, lon_0=0)
    
    # Prepare coordinate data for plotting
    lon_plt, lat_plt = np.meshgrid(np.asarray(lon), np.asarray(lat))
    
    xi, yi = map(np.asarray(lon_plt), np.asarray(lat_plt))
    
    # Plot composite
    fill = map.contourf(xi, yi, comp, cmap = cmap, levels=bins,
                          vmin = vmin, vmax = vmax, extend=extend)

    map.drawcoastlines()
    map.drawcountries()
    
    # Add color bar
    cb = plt.colorbar(fill, orientation = 'horizontal', ticks = ticks, pad = 0.05)
    cb.set_label(label = label, fontsize = 24)
    cb.ax.tick_params(labelsize = 20)
    
    # Add title
    plt.title(title, fontsize = 26)
    
    # Save to file
    plt.savefig(filename + '.png', dpi = 300)
    plt.show()

In [2]:
def do_eof(T2M, SM, pcp, resolution, beginyear, nyears, filename, merra2, LH=None,
           SH=None, meoflim1=-0.65, meoflim2=0.65, 
           clim1=-1.25,clim2=1.25, ancillary=True, opp=False):
    """
    This function makes use of helper functions to complete the necessary data processing steps and apply 
    extended EOF analysis.
    
    Parameters:
    -T2M: xarray DataArray containing 2-m temp data.
    -SM: xarray DataArray containing soil moisture data.  
    -pcp: xarray DataArray containing precipitation data.  
    -resolution: String. Currently only the option "monthly" is supported.
    -beginyear: Int. Beginning year of input data. 
    -nyears: Int. Number of years included in data. 
    -filename: String. File name prefix to be used when saving figure files.
    -merra2: True or False. Set to True if MERRA-2 data are being used in EOF analysis.  
    -LH: xarray DataArray (optional, use only with ancillary = True.) containing latent heat flux data.
    -SH: xarray DataArray (optional, use only with ancillary = True.) containing sensible heat flux data.
    -meoflim1: Float. Lower bound for EOF plot colorbar.
    -meoflim2: Float. Upper bound for EOF plot colorbar. 
    -clim1: Float. Lower bound for EOF composite plot. 
    -clim2: Float. Upper bound for EOF composite plot. 
    -ancillary: True or False. Set to True if LH and SH composites should be generated. 
    -opp: True or False. Set to True to multiply EOF 2 patterns by -1. 
    
    Returns:
    Generates extended EOF plots. LH and SH composite plots can also be optionally generated. 
    """
    
    import seaborn as sns
    sns.set_style("ticks")
    
    # -------------------------------- Calculate standardized anomalies
    SM  = calc_std_anom(SM)
    pcp = calc_std_anom(pcp)
    
    # -------------------------------- Detrend data
    
    # Get indices of NaNs in SM array
    where_nan_sm = np.isnan(np.array(SM)).nonzero()
    # Fill NaNs with dummy value
    # This must be done in order to detrend using signal.detrend
    SM_temp_dt   = SM.fillna(0.0)
    
    # Detrend T2M, pcp, SM data
    t2m_dt = signal.detrend(T2M.values, axis=0)
    pcp_dt = signal.detrend(pcp.values, axis=0)
    sm_dt  = signal.detrend(SM_temp_dt.values, axis=0)
    
    # Replace NaNs in SM array
    sm_dt[where_nan_sm] = np.NaN
    
    T2M.values = t2m_dt
    pcp.values = pcp_dt
    SM.values = sm_dt
    
    # -------------------------------- Select wanted time steps and lag data
    if resolution == 'monthly':
        # Select data only from N (non-lagged) or D (lagged)
            
        T2M = T2M.sel(time = is_nov(T2M['time.month']))
        SM  = SM.sel(time  = is_nov(SM['time.month']))
        
        pcp = pcp.sel(time = is_dec(pcp['time.month']))
    
        if ancillary:
            LH  = LH.sel(time = is_nov(LH['time.month']))
            SH  = SH.sel(time = is_nov(SH['time.month']))
        
    if merra2:
        # Get lons and lats of domain
        lon_small = T2M.coords['lon'].values
        lat_small = T2M.coords['lat'].values
    else:
        lon_small = T2M.coords['longitude'].values
        lat_small = T2M.coords['latitude'].values
       
    ntime = len(T2M.coords['time.month'].values)
        
    if resolution == "monthly":
        # Create date array which corresponds to 2 time steps/year
        dates = np.linspace(0,nyears,ntime) + beginyear
        
    
    ###################################
    # Calculate multivariate EOFs for SM, PCP, and T2M
    # Use detrended time series
    eofs = calc_eof(lat_small,[SM.values,pcp.values],10,2,multi=True)
    # Get EOFs 1 and 2 for each variable
    eof_sm  = eofs['eofs1']
    eof_pcp = eofs['eofs2']
    
    # Print variance accounted for by each EOF
    print ('Variance')
    print(eofs['var'])
    # Print arrays with upper and lower bounds for North et al. (1982) test
    print ('North test bounds')
    print(eofs['north'])
    
    # Multiply EOF2 patterns by -1 if opp = True
    if opp:
        eof_sm[1,:,:]  = eof_sm[1,:,:]*-1
        eof_pcp[1,:,:] = eof_pcp[1,:,:]*-1
    
    # Plot EOF 1 and 2 for SM
    plot_eof(lon_small,lat_small,eof_sm,2,meoflim1,meoflim2, sm=True, var=eofs['var'])
    plt.savefig(filename + 'sm.png', dpi=300)
    plt.show()
    
    # Plot EOF 1 and 2 for pcp
    plot_eof(lon_small,lat_small,eof_pcp,2,meoflim1,meoflim2, pcp=True, var=eofs['var'])
    plt.savefig(filename + 'pcp.png', dpi=300)
    plt.show()
    
    # Plot PCs
    plot_pc(dates,eofs['pcs'],ymin=-3.0,ymax=3.0)
    plt.savefig(filename + 'mpcs.pdf', dpi=300)
    plt.show()
    
    ###################################
    

    ###################################
    # -------------------------------- Calculate composites based on PC values
    # Get PCs for EOF 1
    pc1 = eofs['pcs'][:,0]
    
    # Get PCs for EOF 2
    pc2 = eofs['pcs'][:,1]

    if ancillary:
        LH  = calc_std_anom(LH)
        SH  = calc_std_anom(SH)
        lh_comp_m1  = calc_comp(LH, pc1) 
        sh_comp_m1  = calc_comp(SH, pc1) 
        lh_comp_m2  = calc_comp(LH, pc2) 
        sh_comp_m2  = calc_comp(SH, pc2) 

        lh_comp_m2  = lh_comp_m2*-1
        sh_comp_m2  = sh_comp_m2*-1
        
    ###################################
    
    # Plot composites
        plot_comp(lon_small,lat_small,lh_comp_m1, title='EOF 1',cmap=matplotlib.colors.LinearSegmentedColormap.from_list("", ["#131DBF","white","#C91717"]),
                  eofnum=1,interval=0.1, ticks=[-1.25,-0.55,0,0.55,1.25], vmin=clim1,vmax=clim2, filename=filename+'eof1_lh')

        plot_comp(lon_small,lat_small,sh_comp_m1, title='EOF 1',cmap=matplotlib.colors.LinearSegmentedColormap.from_list("", ["#131DBF","white","#C91717"]),
                  eofnum=1,interval=0.1, ticks=[-1.25,-0.55,0,0.55,1.25], vmin=clim1,vmax=clim2, filename=filename+'eof1_sh')

        plot_comp(lon_small,lat_small,lh_comp_m2, title='EOF 2',cmap=matplotlib.colors.LinearSegmentedColormap.from_list("", ["#131DBF","white","#C91717"]),
                  eofnum=2,interval=0.1, ticks=[-1.25,-0.55,0,0.55,1.25], vmin=clim1,vmax=clim2, filename=filename+'eof2_lh')

        plot_comp(lon_small,lat_small,sh_comp_m2, title='EOF 2',cmap=matplotlib.colors.LinearSegmentedColormap.from_list("", ["#131DBF","white","#C91717"]),
                  eofnum=2,interval=0.1, ticks=[-1.25,-0.55,0,0.55,1.25], vmin=clim1,vmax=clim2, filename=filename+'eof2_sh')
        
        plot_comp(lon_small,lat_small, br_comp_m1, title='EOF 1',cmap=matplotlib.colors.LinearSegmentedColormap.from_list("", ["#131DBF","white","#C91717"]),
                  eofnum=2,interval=0.1, ticks=[-1.25,-0.55,0,0.55,1.25], vmin=clim1,vmax=clim2, filename=filename+'eof1_br')

        plot_comp(lon_small,lat_small, br_comp_m2, title='EOF 2',cmap=matplotlib.colors.LinearSegmentedColormap.from_list("", ["#131DBF","white","#C91717"]),
                  eofnum=2,interval=0.1, ticks=[-1.25,-0.55,0,0.55,1.25], vmin=clim1,vmax=clim2, filename=filename+'eof2_br')
        
    ###################################