In [1]:
# Standard Python modules
import os, sys
import yaml
from pathlib import Path
import numpy as np
import pandas as pd
import xarray as xr
# matplotlib
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from mpl_toolkits.axes_grid1 import AxesGrid
from matplotlib import rcParams
from matplotlib.colors import ListedColormap
import matplotlib.ticker as mticker
# plot styles/formatting
import seaborn as sns
import cmocean.cm as cmo
import cmocean
# cartopy
import cartopy.crs as ccrs
from cartopy.mpl.geoaxes import GeoAxes

# Extra 
from scipy.ndimage import gaussian_filter    # smoothing contour lines
from scipy import stats

import metpy.calc as mpcalc
from metpy.units import units
from IPython.display import Image, display

# import personal modules

# Path to modules
sys.path.append('../modules')

# Import my modules
from plotter import draw_basemap
from timeseries import persistence, select_months, create_list_all_dates
from teleconnections import build_teleconnection_df
from statistical_tests import build_zscore_df, ttest_1samp_new

In [2]:
# Set up paths

path_to_data = '/home/nash/DATA/data/'                            # project data -- read only
path_to_out  = '/home/nash/DATA/repositories/AR_types/out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '/home/nash/DATA/repositories/AR_types/figs/'      # figures

# USE pandas.options TO DISPLAY FLOATS TO TWO DECIMAL PLACES
pd.options.display.float_format = "{:,.2f}".format

In [3]:
ceofid = 'HUV500'
neofs = 2 # choose number of eofs
nk = 3 # choose number of clusters
ssn = 'djfmam'

In [4]:
# import configuration file for dictionary choice
yaml_doc = '../data/config.yml'

config = yaml.load(open(yaml_doc), Loader=yaml.SafeLoader)
#select dictionaries - choose var, anom/nanom, and season
# upper_ precip_ ivt_ and non_anom anom
plot_dict_upper = config['upper_non_anom']
plot_dict_ivt = config['ivt_non_anom']
plot_dict_prec = config['precip_non_anom']

plot_dicts = [plot_dict_upper, plot_dict_ivt, plot_dict_prec]

# djf_dict mam_dict djfmam_dict
ar_dict = config[ssn]

In [5]:
def ar_daily_df(ssn, nk):
    out_path = path_to_out + ceofid + '/' + ssn + '/' + 'neof' + str(neofs) + '/k' + str(nk) + '/'
    filepath = out_path + 'AR-types_ALLDAYS.csv'
    df = pd.read_csv(filepath)

    # set up datetime index
    df = df.rename(columns={'Unnamed: 0': 'date'})
    df = df.set_index(pd.to_datetime(df.date))
    
    ## Break up columns into different AR Types
    keys = []
    for k in range(nk):
        keys.append("AR_CAT{:1d}".format(k+1,))

    values = np.zeros((len(df.index)))
    dicts = dict(zip(keys, values))

    df_cat = pd.DataFrame(dicts, index=df.index)

    for k in range(nk):
        idx = (df['AR_CAT'] == k+1)
        col = "AR_CAT{:1d}".format(k+1,)
        df_cat.loc[idx, col] = 1
        
    # get total of all AR types
    df_cat['AR_ALL'] = df_cat['AR_CAT1'] + df_cat['AR_CAT2'] + df_cat['AR_CAT3']
    df_cat['AR_CAT'] = df['AR_CAT']
    
    return df_cat

In [6]:
start_date = '1979-03-01'
end_date = '2019-05-31'

tele = build_teleconnection_df('daily', 'COND', start_date, end_date)
tele

Unnamed: 0_level_0,AO,PDO,ENSO,SH
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1979-03-01 09:00:00,0,-1,0,-1
1979-03-02 09:00:00,1,-1,0,-1
1979-03-03 09:00:00,1,-1,0,-1
1979-03-04 09:00:00,1,-1,0,-1
1979-03-05 09:00:00,1,-1,0,-1
...,...,...,...,...
2018-05-27 09:00:00,1,0,0,1
2018-05-28 09:00:00,1,0,0,1
2018-05-29 09:00:00,0,0,0,1
2018-05-30 09:00:00,0,0,0,1


In [7]:
df_index = select_months(tele, 12, 5)
df = ar_daily_df(ssn, nk)
# combine ar df with tele df
# join indices with AR count
new_df = df.join(df_index)
new_df

Unnamed: 0_level_0,AR_CAT1,AR_CAT2,AR_CAT3,AR_ALL,AR_CAT,AO,PDO,ENSO,SH
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1979-12-01 09:00:00,0.00,0.00,1.00,1.00,3,1.00,0.00,0.00,0.00
1979-12-02 09:00:00,0.00,0.00,1.00,1.00,3,1.00,0.00,0.00,0.00
1979-12-03 09:00:00,0.00,0.00,0.00,0.00,0,1.00,0.00,0.00,0.00
1979-12-04 09:00:00,0.00,0.00,0.00,0.00,0,1.00,0.00,0.00,0.00
1979-12-05 09:00:00,0.00,0.00,0.00,0.00,0,1.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...
2019-05-27 09:00:00,0.00,0.00,0.00,0.00,0,,,,
2019-05-28 09:00:00,0.00,0.00,0.00,0.00,0,,,,
2019-05-29 09:00:00,0.00,0.00,0.00,0.00,0,,,,
2019-05-30 09:00:00,0.00,0.00,0.00,0.00,0,,,,


### Composite Plots

In [8]:
def preprocess(ds):
    '''keep only selected lats and lons'''
    if plot_dict['name'] == 'huv250':
        subset = ds.sel(latitude=slice(latmax, latmin), longitude=slice(lonmin, lonmax), level=plot_dict['lev'])
    else:
        subset = ds.sel(latitude=slice(latmax, latmin), longitude=slice(lonmin, lonmax))
    return subset

f = []

# Select lat/lon grid
lonmin = 0.
lonmax = 120
latmin = 10
latmax = 50

    
for p, plot_dict in enumerate(plot_dicts):

    # # open data  
    f.append(xr.open_mfdataset(path_to_data + plot_dict['fname'] , preprocess=preprocess, combine='by_coords'))


In [9]:
var_dict = {'H': (['time', 'lat', 'lon'], (f[0]['z'].values/(9.80665))), # convert to geopotential height (m)
            'U': (['time', 'lat', 'lon'], f[0]['u'].values),
            'V': (['time', 'lat', 'lon'], f[0]['v'].values),
            'ivte': (['time', 'lat', 'lon'], f[1]['p71.162'].values),
            'ivtn': (['time', 'lat', 'lon'], f[1]['p72.162'].values)}


ds1 = xr.Dataset(var_dict,
                coords={'time': (['time'], f[0]['time'].values),
                        'lat': (['lat'], f[0]['latitude'].values),
                        'lon': (['lon'], f[0]['longitude'].values)})

ds2 = xr.Dataset({'prec': (['time', 'lat', 'lon'], f[2]['mtpr'].values*86400)},
                coords={'time': (['time'], f[2]['time'].values),
                        'lat': (['lat'], f[2]['latitude'].values),
                        'lon': (['lon'], f[2]['longitude'].values)})
ds1
print('ds size in GB {:0.2f}\n'.format(ds1.nbytes / 1e9))

ds_lst = [ds1, ds2]

ds size in GB 5.85



In [10]:
df = new_df
for i, ds in enumerate(ds_lst):
    # Trim date range
    idx = slice(ar_dict['start_date'], ar_dict['end_date'])
    ds = ds.sel(time=idx)
    
    # Select months
    if ar_dict['mon_s'] > ar_dict['mon_e']:
        idx = (ds.time.dt.month >= ar_dict['mon_s']) | (ds.time.dt.month <= ar_dict['mon_e'])
    else:
        idx = (ds.time.dt.month >= ar_dict['mon_s']) & (ds.time.dt.month <= ar_dict['mon_e'])
    ds = ds.sel(time=idx)
    
    # Combine AR Cat data w/ reanalysis data
    # Add ar time series to the ERA dataset
    cols = ['AR_CAT', 'AO', 'ENSO', 'SH']
    for i, col in enumerate(cols):
        ds[col] = ('time', df[col])

    ds = ds.set_coords(tuple(cols))
    
    ds_lst[i] = ds

IndexError: list assignment index out of range

In [None]:
cols = ['AO', 'ENSO', 'SH']
popmean=np.zeros([len(ds.lat), len(ds.lon)])
data_lst = []
tval_lst = []

for i, tele in enumerate(cols):
    # all days positive teleconnection
    idx = (ds[tele] == 1)
    tmp = ds.sel(time=idx)
    # make time series of [0, 1, 0, 0] for dates where conditions are met
    date_lst = tmp.time.values
    df_tmp = create_list_all_dates(start_date, end_date, date_lst)
    # calculate number of independent events
    event_id, nevents, duration = persistence(df_tmp)
    
    # calculate t-value based on nevents
    a_mean, tval_mask = ttest_1samp_new(a=tmp, popmean=popmean, dim='time', n=nevents)
    data_lst.append(a_mean)
#     tval_lst.append(np.ones(tval_mask.shape, dtype=bool)) # make a tval mask with all True
    tval_lst.append(tval_mask)
    idx_lst = [(tmp['AR_CAT'] > 0),  (tmp['AR_CAT'] == 1),  (tmp['AR_CAT'] == 2),  (tmp['AR_CAT'] == 3)]
    
    # repeat for each AR Type Grouping
    for j, idx in enumerate(idx_lst):
        tmp_ar = tmp.sel(time=idx_lst[j])
        
        # make time series of [0, 1, 0, 0] for dates where conditions are met
        date_lst = tmp_ar.time.values
        df_tmp = create_list_all_dates(start_date, end_date, date_lst)

        # calculate number of independent events
        event_id, nevents, duration = persistence(df_tmp)

        # calculate t-value based on nevents
        a_mean, tval_mask = ttest_1samp_new(a=tmp_ar, popmean=popmean, dim='time', n=nevents)
        data_lst.append(a_mean)
        tval_lst.append(tval_mask)

In [None]:
# Set seaborn plot style
sns.set()
sns.set_style("ticks", {'patch.force_edgecolor':False})

# Set up projection
mapcrs = ccrs.NorthPolarStereo()
datacrs = ccrs.PlateCarree()

# Set tick/grid locations
dx = np.arange(lonmin,lonmax+20,20)
dy = np.arange(latmin,latmax+20,20)

# # cmap with white in the middle
# n=50
# x = 0.25
# lower = plt.cm.seismic(np.linspace(0, x, n))
# white = plt.cm.seismic(np.ones(100)*0.25)
# upper = plt.cm.seismic(np.linspace(1-x, 1, n))
# colors = np.vstack((lower, white, upper))
# tmap = mpl.colors.LinearSegmentedColormap.from_list('terrain_map_white', colors)

In [None]:
data_lst[0]

In [None]:
fig_path = path_to_figs + ceofid + '/' + ssn + '/'+ 'neof' + str(neofs) + '/k' + str(nk) + '/'
filepath = fig_path + 'composite_teleconnection_H.png'    
nrows = 5
ncols = 4

cols = ['AO', 'PDO', 'ENSO', 'SH']
plt_lbls = ['AO']+['']*4 + ['PDO']+['']*4 +['ENSO']+['']*4 + ['SH']+['']*4
row_lbl1 = ['All Days','AR Days','AR Type 1','AR Type 2', 'AR Type 3']+ ['']*15

# Create figure
fig = plt.figure(figsize=(10,15))

# Set up Axes Grid
axes_class = (GeoAxes,dict(map_projection=mapcrs))
axgr = AxesGrid(fig, 111, axes_class=axes_class,
                nrows_ncols=(nrows, ncols), axes_pad = 0.2,
                cbar_location='bottom', cbar_mode='single',
                cbar_pad=0.10, cbar_size='2%',label_mode='',
                direction='column')

for k, ax in enumerate(axgr): 
    # data for plotting
    data = data_lst[k]
    tval = tval_lst[k]
    
    # lat/lon arrays
    lats = data.lat.values
    lons = data.lon.values 
    ax = draw_basemap(ax, extent=[lonmin,lonmax,latmin,latmax], grid=True)
    
    # Contour Filled (mask==True) (only significant values)
    hgts_mask = data.H.where(tval.H == True).values #convert to decimeters
#     hgts = data
    cflevs = np.arange(-90, 95, 15)
    cf = ax.contourf(lons, lats, hgts_mask, transform=datacrs,
                     levels=cflevs, cmap='RdBu_r', alpha=0.9, extend='both')
    
#     # Contour (ALL)
#     hgts_mask = data.where(tval_mask == False)
#     cf_mask = ax.contourf(lons, lats, hgts_mask, transform=datacrs,
#                      levels=cflevs, cmap=cmap, alpha=0.25, extend='both')

    hgts = data.H.values
    clevs = np.arange(-60, 65, 15)
    cs = ax.contour(lons, lats, hgts, transform=datacrs,
                    levels=clevs, colors='k', linewidths=0.75)
#     plt.clabel(cs, fmt='%d',fontsize=8.5, inline_spacing=5) 
    
    # plot titles
    ax.set_title(plt_lbls[k], fontsize=13)
    # Row labels
    ax.text(-0.07, 0.55, row_lbl1[k], va='bottom', ha='center',
        rotation='vertical', rotation_mode='anchor', fontsize=13,
        transform=ax.transAxes)
                  
# # Colorbar (single)
cb = fig.colorbar(cf, axgr.cbar_axes[0], orientation='horizontal', drawedges=True, spacing='uniform')
cb.set_label('m')
    
# Save figure
plt.savefig(filepath, dpi=150, bbox_inches='tight')

# Show
plt.show()

In [None]:
fig_path = path_to_figs + ceofid + '/' + ssn + '/'+ 'neof' + str(neofs) + '/k' + str(nk) + '/'
filepath = fig_path + 'composite_teleconnection_UV.png'    
nrows = 5
ncols = 4

cols = ['AO', 'PDO', 'ENSO', 'SH']
plt_lbls = ['AO']+['']*4 + ['PDO']+['']*4 +['ENSO']+['']*4 + ['SH']+['']*4
row_lbl1 = ['All Days','AR Days','AR Type 1','AR Type 2', 'AR Type 3']+ ['']*15

# Create figure
fig = plt.figure(figsize=(10,15))

# Set up Axes Grid
axes_class = (GeoAxes,dict(map_projection=mapcrs))
axgr = AxesGrid(fig, 111, axes_class=axes_class,
                nrows_ncols=(nrows, ncols), axes_pad = 0.2,
                cbar_location='bottom', cbar_mode='single',
                cbar_pad=0.10, cbar_size='2%',label_mode='',
                direction='column')

for k, ax in enumerate(axgr): 
    # data for plotting
    data = data_lst[k]
    tval = tval_lst[k]
    
    # lat/lon arrays
    lats = data.lat.values
    lons = data.lon.values 
    ax = draw_basemap(ax, extent=[lonmin,lonmax,latmin,latmax], grid=True)
    
    # Contour Filled (mask==True) (only significant values)
    uwnd = data.U.values
    vwnd = data.V.values
    wnd_mag = np.sqrt(uwnd**2+vwnd**2)
    cflevs = np.arange(0, 14, 2)
    cf = ax.contourf(lons, lats, wnd_mag, transform=datacrs,
                     levels=cflevs, cmap='BuPu', alpha=0.9, extend='neither')
    
    # Wind barbs / vectors (only plot where significant)
    # accept u and v if either is significant
    uwnd_mask = data.U.where((tval.U == True) | (tval.V == True)).values
    vwnd_mask = data.V.where((tval.U == True) | (tval.V == True)).values
    Q = ax.quiver(lons, lats, uwnd_mask, vwnd_mask, transform=datacrs, 
                  color='k', regrid_shape=25
#                   ,pivot='middle',
#                   angles='xy', scale_units='xy', scale=2, units='xy'
                 )
    
    # quiver key
    qk = ax.quiverkey(Q, 0.7, 1.07, 2, '2 m s$^{-1}$', labelpos='E',
                     coordinates='axes', fontproperties={'size': 8.0})
    
    # plot titles
    ax.set_title(plt_lbls[k], fontsize=13)
    # Row labels
    ax.text(-0.07, 0.55, row_lbl1[k], va='bottom', ha='center',
        rotation='vertical', rotation_mode='anchor', fontsize=13,
        transform=ax.transAxes)
                  
# # Colorbar (single)
cb = fig.colorbar(cf, axgr.cbar_axes[0], orientation='horizontal', drawedges=True, spacing='uniform')
cb.set_label('m')
    
# Save figure
plt.savefig(filepath, dpi=150, bbox_inches='tight')

# Show
plt.show()