In [1]:
# (C) Copyright 1996- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

In [2]:
import xarray as xr
import pandas as pd
import numpy as np

from xmca.xarray import xMCA
from xeofs.xarray import EOF

import xskillscore as xs
from scipy.signal import detrend

from itertools import product
import multiprocessing # parallel processing
import tqdm # timing

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
import cartopy.crs as ccrs
from mpl_toolkits.axes_grid1 import AxesGrid # for multiplots and nice mixing with cartopy
from cartopy.mpl.geoaxes import GeoAxes # for adding cartopy attributes to subplots

In [3]:
dir_loc = ''

# data for Mediterranean patterns (based on previous work)
patterns_file = 'Med_LocalizedPatterns_Composites.nc'
clusters_for_patterns = 9
rolling_days = 1 # rolling days used for the Med. Patterns
use_standardised_anom = False # Standardized anomalies? If not (default) then only anomalies from Mean value
atm_var_used = ['SLP', 'Z500']

variables_eof_used = ['SST', 'SMmean', 'SML1', 'SML2'] # variables used for the eof and k-means analysis
# area_subset_eof = [60, -20, 15, 60] # boundary used for the eof analysis of the variables used for the indices
area_subset_eof = [50, -15, 25, 45] # boundary used for the eof analysis of the variables used for the indices

days_shift = 4 # for climatology (use dates before/after date of interest for more robust results)

In [4]:
var_type_name = 'Anomalies_Std' if use_standardised_anom==True else 'Anomalies' 

In [5]:
atm_var_data = []
for i_var in atm_var_used:
    # read daily data of the atmospheric variable of interest
    file_path = dir_loc + 'D1_Mean_'+i_var+'_Anomalies.nc'
    daily_data = xr.open_dataarray(file_path).reset_coords(drop=True) # read data
    atm_var_data.append(daily_data)
    
atm_var_data = xr.merge(atm_var_data)

del(i_var, file_path, daily_data)

In [6]:
patterns = xr.open_dataset(patterns_file)
patterns = patterns.sel(ClustersNumber=clusters_for_patterns, rolling=rolling_days)
patterns = patterns.sel(variable_type=var_type_name).reset_coords(drop=True)

In [7]:
composites = patterns.to_array()
subplot_id = list(map(chr, range(97, 123)))# letters for subplots titles
colors_used = sns.color_palette('RdBu_r', n_colors=15)
colors_used = colors_used[:5]+['white']+colors_used[-5:]
colors_used = ListedColormap(colors_used)
colors_limits = [-16, -13, -10, -7, -4, -1, 1, 4, 7, 10, 13, 16] # levels for colors (actual abs. max is 15.5)
cont_levels = np.linspace(-21, 21, 13) # act. max is 21.2

x, y = np.meshgrid(composites.longitude, composites.latitude)
max_values = np.abs(composites).max(dim=['cluster', 'latitude', 'longitude']).values
max_values = np.round(max_values/np.array([100, 98.1]), 2)
print('Absolute Max values for SLP and Z500 are {} hPa and {} dam respectively'.\
      format(max_values[0], max_values[1]))

In [8]:
output_loc = ''

pat_names = ['Atlantic Low', 'Biscay Low', 'Iberian Low', 'Sicilian Low', 'Balkan Low', 'Black Sea Low',
             'Mediterranean High', 'Minor Low', 'Minor High'] # naming of patterns
pat_names_short = ['AtlL', 'BscL', 'IbrL', 'SclL', 'BlkL', 'BlSL', 'MedH', 'MnrL', 'MnrH'] # short names 

axes_class = (GeoAxes, dict(map_projection=ccrs.PlateCarree()))
fig = plt.figure(figsize=(18/2.54, 9.1/2.54))
grid = AxesGrid(fig, 111, nrows_ncols=(3,3), axes_pad=.2,
                cbar_mode='single', cbar_location='right', cbar_pad=.1,
                axes_class=axes_class, cbar_size='3%', label_mode='')

for i, ax in enumerate(grid):

    ax.set_extent([x.min(), x.max(), y.min(), y.max()], crs=ccrs.PlateCarree()) # set extent
    ax.outline_patch.set_linewidth(.2) # reduce the border thickness
    
    ax.coastlines(resolution='110m', linewidth=.5, color='grey') # add coastline

    contf = ax.contourf(x, y, composites.sel(variable='SLP', cluster=i)/100, # plot contourf for SLP anomalies
                        transform=ccrs.PlateCarree(), levels=colors_limits, cmap=colors_used) 
    cont = ax.contour(x, y, composites.sel(variable='Z500', cluster=i)/98.1, # plot contour for Z500 anomalies
                      transform=ccrs.PlateCarree(),levels=cont_levels, colors='black', linewidths=1) 
    ax.clabel(cont, inline=1, fontsize=6, fmt='%d')
    for line, lvl in zip(cont.collections, cont.levels):
        if lvl == 0:
            line.set_linestyle(':')

    ax.set_title('{}) {} ({})'.format(subplot_id[i], pat_names[i], pat_names_short[i]), 
                 pad=4, size=7.5, loc='left',)# backgroundcolor=pat_colors[i])

cbar = ax.cax.colorbar(contf, ticks=colors_limits, spacing='proportional') # add colorbar
cbar.ax.set_title("SLP' (hPa)", size=6.5, loc='left')
cbar.ax.yaxis.set_tick_params(width=.25, length=2, labelsize=6.5)
[i.set_linewidth(0.5) for i in cbar.ax.spines.values()]
    
plt.subplots_adjust(left=0.02, bottom=0.01, right=.95, top=.96)
fig.savefig(f'{output_loc}/Patterns.png', dpi=600, transparent=True)

del(axes_class, fig, grid, i, ax, contf, cont, line, lvl, cbar,
    output_loc, pat_names, pat_names_short)

In [9]:
def clim_anom(input_data):
    
    daymonth_used, standarization = input_data
    
    # keep dates of interest (exact day month)
    dates_used = all_dates_atm_var_extd[all_dates_atm_var_extd.strftime('%m%d').isin([daymonth_used])]
    
    # add buffer days (before/after) for having a more robust climatology for mean and std
    # when considering n-days data (n>1), then use only dates with no overlapping subsets
    all_dates_used = [list(pd.date_range(i_dt, i_dt-pd.DateOffset(days=days_shift), freq='-1D'))+
                      list(pd.date_range(i_dt, i_dt+pd.DateOffset(days=days_shift), freq='D')[1:]) 
                      for i_dt in dates_used]
    all_dates_used = np.array([j for i in all_dates_used for j in i]) # flatten data to have 1-d array
    all_dates_used = all_dates_used[pd.to_datetime(all_dates_used).isin(all_dates_atm_var)] # only existing dates
    
    # keep all dates of interest and get climatology
    subset_i_date = data_used.sel(time=all_dates_used)
    subset_i_date_clim = subset_i_date.sortby('time').sel(time=slice('1979','2019')) # same years as Papers 1,2,3
    
    clim_mean = subset_i_date_clim.mean('time')
    
    # get (standardized) anomalies for the exact days of interest
    subset_used = subset_i_date.sel(time=dates_used[dates_used.isin(all_dates_atm_var)])
    anom_final = subset_used-clim_mean
    
    if standarization:
        anom_final = anom_final/subset_i_date_clim.std('time')
    
    return anom_final.astype('float32')

In [10]:
atm_var_pat = atm_var_data.sel(longitude=patterns.longitude.values, 
                               latitude=patterns.latitude.values,
                               Type='Actual').drop('Type')
atm_var_pat = atm_var_pat[list(patterns.keys())].copy(deep=True)
weights = np.cos(np.deg2rad(atm_var_pat.latitude)) # weights due to areal differences of each grid
weights_2d = weights.expand_dims({'longitude': atm_var_pat.longitude.values}) # weights on both lat-lon

proj_act = (atm_var_pat*patterns).weighted(weights).mean(['latitude', 'longitude']) # projections to patt

proj_pat = proj_act/proj_act.std('time') # standardize dot product
proj_pat = proj_pat.to_array('atm_var').mean('atm_var') # get mean projection considering all used variables

model = EOF(proj_pat, norm=True, dim='time')
model.solve()
expvar = model.explained_variance_ratio()
n_eofs = expvar>.01 # at least 1% of total variance
eofs = model.eofs()
pcs = model.pcs()
pcs = pcs.isel(mode=n_eofs).assign_coords({'mode': np.arange(-sum(n_eofs), 0, 1)}).rename({'mode': 'cluster'})
eofs = eofs.isel(mode=n_eofs).assign_coords({'mode': np.arange(-sum(n_eofs), 0, 1)})

proj_pat = xr.concat([pcs, proj_pat], dim='cluster')

# get the variables used for the clim_anom function
all_dates_atm_var = pd.to_datetime(proj_act.time.values)
all_dates_atm_var_extd = pd.date_range(all_dates_atm_var[0] - pd.DateOffset(years=1), 
                                       all_dates_atm_var[-1] + pd.DateOffset(years=1))
data_used = proj_act
unique_daymonth = all_dates_atm_var.strftime('%m%d')
unique_daymonth = sorted(set(unique_daymonth))
combs_used = list(product(unique_daymonth, [True]))

pool = multiprocessing.Pool() # object for multiprocessing
proj_pat_norm = list(tqdm.tqdm(pool.imap(clim_anom, combs_used), total=len(unique_daymonth), position=0))
pool.close(); pool.join()

proj_pat_norm = xr.concat(proj_pat_norm, dim='time').sortby('time')
proj_pat_norm = proj_pat_norm.to_array('atm_var').mean('atm_var') # mean proj. considering all used variables

model = EOF(proj_pat_norm, norm=True, dim='time')
model.solve()
expvar = model.explained_variance_ratio()
n_eofs = expvar>.01 # at least 1% of total variance
eofs = model.eofs()
pcs = model.pcs()
pcs = pcs.isel(mode=n_eofs).assign_coords({'mode': np.arange(-sum(n_eofs), 0, 1)}).rename({'mode': 'cluster'})
eofs = eofs.isel(mode=n_eofs).assign_coords({'mode': np.arange(-sum(n_eofs), 0, 1)})

proj_pat_norm = xr.concat([pcs, proj_pat_norm], dim='cluster')

dim_name = pd.Index(['projection', 'projection_norm', 'correlation'], name='Indicator')[:2]
ts_patterns = xr.concat([proj_pat, proj_pat_norm], dim=dim_name) # combine

# corr_pat = xs.pearson_r(atm_var_pat, patterns, dim=['latitude', 'longitude'], weights=weights_2d) # cor to pat
# ts_patterns = xr.concat([proj_pat, proj_pat_norm, corr_pat], dim=dim_name) # combine
# ts_patterns = ts_patterns.to_array().mean('variable')

ts_patterns = ts_patterns.transpose('time', ...)

del(atm_var_pat, weights, weights_2d, proj_act, proj_pat, model, expvar, n_eofs, eofs, pcs, 
    all_dates_atm_var, all_dates_atm_var_extd, data_used, unique_daymonth, proj_pat_norm, dim_name)

In [11]:
ts_pat_detr = ts_patterns*0+np.apply_along_axis(detrend, 0, ts_patterns)
ts_pat_seas = ts_pat_detr.assign_coords({'time': pd.PeriodIndex(ts_pat_detr.time.values, freq='Q-Nov')})
ts_pat_seas = ts_pat_seas.groupby('time') - ts_pat_seas.groupby('time').mean()
ts_pat_seas = ts_pat_seas.assign_coords({'time': ts_pat_detr.time.values})

ts_patterns = [ts_patterns, ts_pat_detr, ts_pat_seas]
ts_patterns = xr.concat(ts_patterns, dim=pd.Index(['Actual', 'Detrended', 'InterAnnRemov'], name='Type'))
ts_patterns.to_netcdf(dir_loc+'MedIndices.nc')

del(ts_pat_detr, ts_pat_seas)

### Indices timeseries aquisition and preprocessing

In [12]:
eof_indices = {i_var: {} for i_var in variables_eof_used}
for i_var in variables_eof_used:
        
    print(f'Indices generation for {i_var} started.')
    # read daily data of the atmospheric variable of interest
    if i_var=='SMmean':
        file_path = dir_loc + '../Data/D1_Mean_'+i_var+'.nc'
        daily_data = xr.open_dataarray(file_path).reset_coords(drop=True) # read data
    else:
        file_path = dir_loc + '../Data/D1_Mean_'+i_var+'.grb'
        daily_data = xr.open_dataarray(file_path, engine='cfgrib').reset_coords(drop=True) # read data
    
    daily_data = daily_data.sel(longitude=slice(area_subset_eof[1], area_subset_eof[3]), 
                                latitude=slice(area_subset_eof[0], area_subset_eof[2]))
    daily_data = daily_data.rename({'longitude': 'lon', 'latitude': 'lat'}) # xMCA works with lon/lat

    if i_var!='SST':
        mask_used = (daily_data.mean('time')<=0) # this is needed cause sea points (SM<=0) should be masked
        daily_data = daily_data.where(mask_used==0)
        stand_data = False
    else:
        stand_data = True    
        
    # mask data so NaNs don't consume memory
    mask_used = np.isnan(daily_data.isel(time=0)).reset_coords(drop=True)
    daily_data = daily_data.fillna(0)
    
    # get the variables used for the clim_anom function
    all_dates_atm_var = pd.to_datetime(daily_data.time.values)
    all_dates_atm_var_extd = pd.date_range(all_dates_atm_var[0] - pd.DateOffset(years=1), 
                                           all_dates_atm_var[-1] + pd.DateOffset(years=1))
    data_used = daily_data
    unique_daymonth = all_dates_atm_var.strftime('%m%d')
    unique_daymonth = sorted(set(unique_daymonth))
    combs_used = list(product(unique_daymonth, [stand_data]))

    pool = multiprocessing.Pool() # object for multiprocessing
    daily_data = list(tqdm.tqdm(pool.imap(clim_anom, combs_used), total=len(unique_daymonth), position=0))
    pool.close(); pool.join()

    daily_data = xr.concat(daily_data, dim='time').sortby('time')
    daily_data = daily_data.where(mask_used==0) # convert back to the actual NaNs 
    
    pca = xMCA(daily_data)
    pca.apply_coslat() # apply weights based on latitude
    
    # calculate non-complex PCA
    pca.solve(complexify=False) # no complex PCA
    expvar = pca.explained_variance() # explained variance 
    pcs = pca.pcs() # Principal component scores (PCs) 
    eofs = pca.eofs() # spatial patterns (EOFs)

    n_eofs = int(np.searchsorted(np.cumsum(expvar), 95)) # n. of EOFs for explaining >= 95% of total variance

    # get results after varimax rotation
    pca.rotate(n_rot=n_eofs, power=1) # Varimax rotation
    expvar_rot = pca.explained_variance() # explained variance 
    pcs_rot = pca.pcs() # Principal component scores (PCs) 
    eofs_rot = pca.eofs() # spatial patterns (EOFs)

    # keep PCs that have at least 0.9% explained variance
    expvar_rot = expvar_rot.isel(mode=expvar_rot>.90)
    pcs_rot = pcs_rot['left'].isel(mode=range(len(expvar_rot)))
    pcs_rot = pcs_rot.assign_coords({'mode': [f'{i_var}_{i}' for i in range(1, len(pcs_rot.mode)+1)]})
    eofs_rot = eofs_rot['left'].isel(mode=range(len(expvar_rot)))
    
    eof_indices[i_var] = {'ExpVars': expvar_rot, 'PCs': pcs_rot, 'EOFs': eofs_rot}
    print(f'Indices generation for {i_var} completed.\n')
    
del(i_var, stand_data, file_path, daily_data, all_dates_atm_var, all_dates_atm_var_extd, data_used, 
    unique_daymonth, combs_used, pool, pca, expvar, pcs, eofs, n_eofs, expvar_rot, pcs_rot, eofs_rot)

In [13]:
explvar_final = [eof_indices[i_var]['ExpVars'] for i_var in variables_eof_used]
explvar_final = xr.concat(explvar_final, dim=pd.Index(variables_eof_used, name='atm_var'))
explvar_final.to_netcdf(dir_loc+'ExpVarAtmVars.nc')

eofs_final = [eof_indices[i_var]['EOFs'] for i_var in variables_eof_used]
eofs_final = xr.concat(eofs_final, dim=pd.Index(variables_eof_used, name='atm_var'))
eofs_final.to_netcdf(dir_loc+'EOFAtmVars.nc')

pcs_final = [eof_indices[i_var]['PCs'].to_dataframe('S').pivot_table(index='time', columns='mode', values='S')
             for i_var in variables_eof_used]
pcs_final = pd.concat(pcs_final, axis=1)

In [14]:
explvar_final = xr.open_dataarray(dir_loc+'ExpVarAtmVars.nc').sel(atm_var=['SST', 'SML1'])
eof_final = xr.open_dataarray(dir_loc+'EOFAtmVars.nc').sel(atm_var=['SST', 'SML1'])
used_combs = list(product(np.arange(1, 7), ['SST', 'SML1']))
max_val = np.ceil(np.abs(eof_final.sel(mode=range(1, 7))).max()*20)/20
colors_used = sns.color_palette('RdBu_r', n_colors=15)
colors_used = colors_used[:5]+colors_used[-5:]
colors_used = ListedColormap(colors_used)
colors_limits = list(np.linspace(-max_val, max_val, 11))
x, y = np.meshgrid(eof_final.lon, eof_final.lat)

In [15]:
output_loc = ''

axes_class = (GeoAxes, dict(map_projection=ccrs.PlateCarree()))
fig = plt.figure(figsize=(8/2.54, 10/2.54))
grid = AxesGrid(fig, 111, nrows_ncols=(6,2), axes_pad=.2,
                cbar_mode='single', cbar_location='right', cbar_pad=.1,
                axes_class=axes_class, cbar_size='3%', label_mode='')


for i, ax in enumerate(grid):

    ax.set_extent([x.min(), x.max(), y.min(), y.max()], crs=ccrs.PlateCarree()) # set extent
    ax.outline_patch.set_linewidth(.2) # reduce the border thickness
    
    ax.coastlines(resolution='110m', linewidth=.5, color='grey') # add coastline

    contf = ax.contourf(x, y, eof_final.sel(mode=used_combs[i][0], atm_var=used_combs[i][1]),
                        transform=ccrs.PlateCarree(), levels=colors_limits, cmap=colors_used)
    
    ax.set_title('{}-{} ({:.2f}%)'.format(used_combs[i][1][:3], used_combs[i][0], 
                                     explvar_final.sel(mode=used_combs[i][0], atm_var=used_combs[i][1]).values), 
                 pad=4, size=7.5, loc='left',)# backgroundcolor=pat_colors[i])

cbar = ax.cax.colorbar(contf, ticks=colors_limits, spacing='proportional') # add colorbar
cbar.ax.yaxis.set_tick_params(width=.25, length=2, labelsize=6.5)
    
plt.subplots_adjust(left=0.02, bottom=0.01, right=.85, top=.96)
fig.savefig(f'{output_loc}/EOFs.png', dpi=600, transparent=True)

del(fig, cbar, ax, contf, i, grid, axes_class, output_loc)

In [16]:
def noaa_indices(indice_name, suffix='b500101'):
    
    indice_data = f'ftp://ftp.cpc.ncep.noaa.gov/cwlinks/norm.daily.{indice_name}.index.{suffix}.current.ascii'
    indice_data = pd.read_csv(indice_data, delimiter="\s+", header=None)
    indice_data.columns = ['Year', 'Month', 'Day', indice_name.upper()]
    indice_data['Day'] = indice_data['Day'].str.replace(r'*', '').astype(int)
    indice_data.index = pd.to_datetime(indice_data[['Year', 'Month', 'Day']])
    indice_data = indice_data[[indice_name.upper()]]

    return indice_data

In [17]:
def mo_indices(mo_name):
    
    'data from https://crudata.uea.ac.uk/cru/data/moi/'
    # mo_name is either "moi1" (Alriers/Cairo) or "moi2" (Gibraltar/Israel)
    
    ind_mo = f'/{mo_name}.output.dat.txt'
    ind_mo = pd.read_csv(ind_mo, delimiter=".", header=None, index_col=0,)
    ind_mo.columns = ['Month', 'Day', mo_name.upper(), 'Add']
    ind_mo[mo_name.upper()] = ind_mo[mo_name.upper()].astype(str)+'.'+ind_mo['Add'].astype(str)
    ind_mo = ind_mo.astype('float')
    ind_mo['Year'] = ind_mo.index
    
    ind_mo.index = pd.to_datetime(ind_mo[['Year', 'Month', 'Day']])
    ind_mo = ind_mo[[mo_name.upper()]]

    return ind_mo

In [18]:
# Caclulate West Mediterranean Oscillation, similar as https://doi.org/10.1002/joc.1388

slp_all = 'D1_Mean_SLP.grb'
slp_all = xr.open_dataarray(slp_all, engine='cfgrib').reset_coords(drop=True)

slp_Padua = slp_all.sel(latitude=45.24, longitude=11.47, method="nearest")
slp_Padua = slp_Padua.reset_coords(drop=True).to_dataframe('Padua')
slp_Cadiz = slp_all.sel(latitude=36.17, longitude=-6.07, method="nearest")
slp_Cadiz = slp_Cadiz.reset_coords(drop=True).to_dataframe('Cadiz')

west_MO = pd.concat([slp_Cadiz, slp_Padua], axis=1)
means = west_MO.groupby([west_MO.index.month]).transform('mean')
stds = west_MO.groupby([west_MO.index.month]).transform('std')
west_MO = (west_MO-means)/stds
west_MO = west_MO['Cadiz'] - west_MO['Padua']
west_MO = pd.DataFrame(west_MO, columns=['WeMO'])

del(slp_all, slp_Padua, slp_Cadiz, means, stds)

In [19]:
ind_BSISO = pd.read_pickle(dir_loc+'../Data/daily_total_precipitation_anomaly_EOF_norm_projections')
ind_BSISO.index = pd.to_datetime(ind_BSISO.index)
ind_BSISO.columns = ['BSISO_EOF1', 'BSISO_EOF2']

In [20]:
mjo_BOM = pd.read_csv(dir_loc+'../Data/rmm.txt', delimiter="\s+", skiprows=[0])

mjo_BOM = mjo_BOM.iloc[:, :7] # keep only the actual MJO_BOM of interest
mjo_BOM.columns = ['Year', 'Month', 'Day', 'MJO_RMM1', 'MJO_RMM2', 'MJO_Phase', 'MJO_Amplitude'] # rename columns
mjo_BOM.index = pd.to_datetime(mjo_BOM[['Year', 'Month', 'Day']]) # create datetime column

# change missing value flags with NaN (Missing Value= 1.E36 or 999)
mjo_BOM[mjo_BOM==999] = np.nan
mjo_BOM.loc[mjo_BOM.MJO_RMM1>1e+35, 'MJO_RMM1'] = np.nan
mjo_BOM.loc[mjo_BOM.MJO_RMM2>1e+35, 'MJO_RMM2'] = np.nan
mjo_BOM.loc[mjo_BOM.MJO_Amplitude>1e+35, 'MJO_Amplitude'] = np.nan

mjo_BOM = mjo_BOM[['MJO_RMM1', 'MJO_RMM2', 'MJO_Amplitude', 'MJO_Phase']] # columns of interest

Sahel precipitation daily index, considering same area as http://research.jisao.washington.edu/data/sahel/

In [21]:
prpShl = xr.open_dataarray(dir_loc+'../Data/D1_Total_Precipitation.grb', engine='cfgrib').reset_coords(drop=True)
prpShl = prpShl.sel(latitude=slice(20, 10), longitude=slice(-10, 20))
weights = np.cos(np.deg2rad(prpShl.latitude))
prpShl = prpShl.weighted(weights).mean(['latitude', 'longitude'])
prpShl = prpShl.assign_coords({'time': pd.to_datetime(prpShl.time)-np.timedelta64(18, 'h')})
del(weights)

In [22]:
all_dates_atm_var = pd.to_datetime(prpShl.time.values)
all_dates_atm_var_extd = pd.date_range(all_dates_atm_var[0] - pd.DateOffset(years=1), 
                                       all_dates_atm_var[-1] + pd.DateOffset(years=1))
data_used = prpShl
unique_daymonth = all_dates_atm_var.strftime('%m%d')
unique_daymonth = sorted(set(unique_daymonth))
combs_used = list(product(unique_daymonth, [True]))

pool = multiprocessing.Pool() # object for multiprocessing
prpShl_norm = list(tqdm.tqdm(pool.imap(clim_anom, combs_used), total=len(unique_daymonth), position=0))
pool.close(); pool.join()

prpShl_norm = xr.concat(prpShl_norm, dim='time').sortby('time')
prpShl_norm = prpShl_norm.to_dataframe('PrecipSahel')

del(all_dates_atm_var, all_dates_atm_var_extd, data_used, unique_daymonth, combs_used, pool, prpShl)

Generate indices for sea ice area fraction over parts of Northern Hemisphere.

In [23]:
sea_ice = xr.open_dataarray(dir_loc+'../Data/D1_Mean_SIAF.grb', engine='cfgrib').reset_coords(drop=True)

areas = {'ArcticWhole': [slice(90, 70), slice(-180, 180)],
         'ArcticEurope': [slice(90, 70), slice(-30, 60)],
         'ArcticEurasia': [slice(90, 70), slice(-30, 180)]}

sea_ice_indices = []
for i_area, i_domain in areas.items():
    i_sea_ice = sea_ice.sel(latitude=i_domain[0], longitude=i_domain[1])
    weights = np.cos(np.deg2rad(i_sea_ice.latitude))
    i_sea_ice = i_sea_ice.weighted(weights).mean(['latitude', 'longitude'])
    i_sea_ice.name = i_area
    sea_ice_indices.append(i_sea_ice)
    
sea_ice_indices = xr.merge(sea_ice_indices)

In [24]:
all_dates_atm_var = pd.to_datetime(sea_ice_indices.time.values)
all_dates_atm_var_extd = pd.date_range(all_dates_atm_var[0] - pd.DateOffset(years=1), 
                                       all_dates_atm_var[-1] + pd.DateOffset(years=1))
data_used = sea_ice_indices
unique_daymonth = all_dates_atm_var.strftime('%m%d')
unique_daymonth = sorted(set(unique_daymonth))
combs_used = list(product(unique_daymonth, [True]))

pool = multiprocessing.Pool() # object for multiprocessing
sea_ice_norm = list(tqdm.tqdm(pool.imap(clim_anom, combs_used), total=len(unique_daymonth), position=0))
pool.close(); pool.join()

sea_ice_norm = xr.concat(sea_ice_norm, dim='time').sortby('time')
sea_ice_norm = sea_ice_norm.to_dataframe()

del(all_dates_atm_var,all_dates_atm_var_extd, data_used, unique_daymonth, sea_ice, areas, i_area, i_domain,
    combs_used, pool, weights)

In [25]:
# indices_Linus = !ls *csv

# ts_ind_linus = []

# for i_ind in indices_Linus:
#     ts_i = pd.read_csv(i_ind, index_col=0)
#     ts_i.index = pd.to_datetime(ts_i.index)
#     prefix_name = i_ind.replace('replace', '').replace('.csv', '') # replace section of names
#     ts_i.columns = [f'Linus_{prefix_name}_{i}' for i in ts_i.columns]
#     ts_ind_linus.append(ts_i)
    
# ts_ind_linus = pd.concat(ts_ind_linus, axis=1).dropna()

# del(indices_Linus, i_ind, ts_i, prefix_name)

In [26]:
ts_indices = pd.concat([noaa_indices('nao'), noaa_indices('pna'),
                        noaa_indices('ao'), # noaa_indices('aao', 'b790101'), # aao file is mising lately...
                        mo_indices('moi1'), mo_indices('moi2'), west_MO, 
                        ind_BSISO, mjo_BOM, prpShl_norm, sea_ice_norm, pcs_final, ts_ind_linus], 
                       axis=1).dropna()

del(west_MO, ind_BSISO, ts_ind_linus)

In [27]:
ts_indices.corr()

In [28]:
ts_indices = xr.DataArray(ts_indices).rename({'dim_0': 'time', 'dim_1': 'index'})
ts_ind_detr = ts_indices*0+np.apply_along_axis(detrend, 0, ts_indices)
ts_ind_seas = ts_ind_detr.assign_coords({'time': pd.PeriodIndex(ts_ind_detr.time.values, freq='Q-Nov')})
ts_ind_seas = ts_ind_seas.groupby('time') - ts_ind_seas.groupby('time').mean()
ts_ind_seas = ts_ind_seas.assign_coords({'time': ts_ind_detr.time.values})

ts_indices = [ts_indices, ts_ind_detr, ts_ind_seas]
ts_indices = xr.concat(ts_indices, dim=pd.Index(['Actual', 'Detrended', 'InterAnnRemov'], name='Type'))
ts_indices.to_netcdf(dir_loc+'AtmIndices.nc')

del(ts_ind_detr, ts_ind_seas)