In [1]:
import warnings
from itertools import product
import glob
from datetime import datetime
from datetime import timedelta
import numpy as np
import pandas as pd
import xarray as xr

import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.path as mpath
import cartopy
import cartopy.crs as ccrs
import cartopy.feature
import cartopy.feature as cfeature
import cartopy.io.shapereader as shpreader
import cartopy.feature as cf
import shapely.geometry as sgeom
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter


from sklearn.decomposition import PCA
from scipy import stats
from sklearn.cluster import KMeans
from sklearn import metrics
from scipy.spatial.distance import cdist
from sklearn.metrics import davies_bouldin_score

import pickle
import copy
from shapely import geometry
from sklearn.metrics.pairwise import euclidean_distances
import statsmodels.api as sm
from scipy.stats import linregress
from sklearn.metrics import accuracy_score
import math

In [2]:
names_reanalyses = ['ERA5',
                   'JRA3Q',
                   'NCEP_NCAR']

In [3]:
path_anoms = '/glade/derecho/scratch/jhayron/Data4WRsClimateChange/ProcessedDataReanalyses/'
path_pcs = '/glade/derecho/scratch/jhayron/Data4WRsClimateChange/PCs_Z500/'

In [4]:
dic_labels = {}
for reanalysis in names_reanalyses:
    labels_temp = pd.read_csv(f'../ProcessZ500/labels/df_labels_{reanalysis}.csv', 
                              parse_dates=True, index_col=0, names=['WR','distances','corr'], skiprows=1)
    dic_labels[reanalysis] = labels_temp[['WR']]

In [5]:
def get_average_fields_for_centroids(dataarray,labels):
    wrs = np.unique(labels)
    avgs = []
    for wr in wrs:
        df_wr = labels[labels['WR']==wr]
        arr_selection = dataarray.sel(time=df_wr.index)
        averagefield = arr_selection.mean('time')
        avgs.append(averagefield)
    return xr.concat(avgs,dim='WR')

In [6]:
# for reanalysis in names_reanalyses:
for reanalysis in ['ERA5']:
    anoms = xr.open_dataset(f'{path_anoms}Z500Anoms_{reanalysis}.nc')
    labels_temp = pd.read_csv(f'../ProcessZ500/labels/df_labels_{reanalysis}.csv', 
                              parse_dates=True, index_col=0, names=['WR','distances','corr'], skiprows=1)
    labels_temp.loc[labels_temp['corr']<=0.25,'WR']=np.unique(labels_temp['WR'])[-1]
    composites = get_average_fields_for_centroids(anoms, labels_temp[['WR']])
    # composites.to_netcdf(f'composites/composites_{reanalysis}_March23_2025.nc')

In [7]:
region = [180, 330, 20, 80]
# names = ["Atlantic High","Greenland High","Pacific Ridge","Pacific Trough","No WR"]
names = ["Polar High", "Pacific Trough (PT)", "Pacific Ridge", "Alaskan Ridge", "Atlantic Ridge" ,"No WR"]
# plot_multiple_maps(composites,
#                    region,names=names, path_save=f'nFigures/1_Composites_ERA5.png')

In [8]:
dic_results_best_k = np.load(f'../ProcessZ500/results_best_k/dic_results_best_k_v2.npy',allow_pickle=True)[()]
classifiability_synthetic = np.load(f'../ProcessZ500/results_best_k/classifiability_synthetic_v2.npy').tolist()

lower_bound_synth_class = np.quantile(classifiability_synthetic,0.9,axis=1)
upper_bound_synth_class = np.quantile(classifiability_synthetic,0.10,axis=1)

# lower_bound_subsets_class = np.quantile(dic_results_best_k['classifiability_subsets'],0.1,axis=1)
# upper_bound_subsets_class = np.quantile(dic_results_best_k['classifiability_subsets'],0.9,axis=1)
### Do plot ###
lower_bound_synth_class = np.quantile(classifiability_synthetic,0.9,axis=1)
upper_bound_synth_class = np.quantile(classifiability_synthetic,0.10,axis=1)

q25_synth_class = np.quantile(classifiability_synthetic,0.25,axis=1)
q75_synth_class = np.quantile(classifiability_synthetic,0.75,axis=1)

In [9]:
# fig, ax = plt.subplots(1, 1, figsize=(10,3))
# # axes = axes.flatten()

# for k in range(2,11):
#     ax.scatter(np.repeat(k,k),
#              dic_results_best_k['reproducibility'][k-2],color='k',s=2)
#     ax.scatter(k,
#              np.mean(dic_results_best_k['reproducibility'][k-2]),
#                     color='orange',s=20)
# ax.scatter(k,
#          np.mean(dic_results_best_k['reproducibility'][k-2]),
#                 color='orange',s=20,label='Average clusters')
#     # axes[1].axhline(0,color='k')
# ax.set_xlabel('Number of clusters (k)')
# ax.set_ylabel('Reproducibility')
# ax.set_title('Reproducibility Index')
# ax.set_xticks(np.arange(2,11))
# ax.grid(alpha=0.2,ls='--')
# ax.legend()

In [10]:
from mpl_toolkits.axes_grid1 import make_axes_locatable

In [13]:
da=composites
regioncoords=region
path_save=f'nFigures/1_WR_Composites_ERA5.png'
n_cols=3

min_lon, max_lon, min_lat, max_lat = regioncoords
# Convert longitudes from 0-360 to -180-180 if necessary
def convert_lon(lon):
    return lon if lon <= 180 else lon - 360

min_lon_converted = convert_lon(min_lon)
max_lon_converted = convert_lon(max_lon)

# Number of maps to plot
n_maps = len(da.WR)

# Determine the number of rows needed
n_rows = math.ceil(n_maps / n_cols)

# Create a figure with the calculated number of subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(9, 2 * (n_rows)),
                         subplot_kw={'projection': ccrs.AlbersEqualArea(central_longitude=-115,
            central_latitude=50,
            standard_parallels=(30, 70))})

if n_rows == 1:
    axes = np.expand_dims(axes, axis=0)


import string
abcd = list(string.ascii_lowercase)

# Plot each DataArray in the provided list
for i in range(len(da.WR.values)-1):
    row = i // n_cols
    col = i % n_cols
    
    ax = axes[row, col]

    # Convert longitudes to -180 to 180 range
    lon = (da.lon + 180) % 360 - 180

    # Adjust data array to match the longitude range
    da_shifted, lon_shifted = xr.broadcast(da, lon)
    ax.set_extent([min_lon, max_lon, min_lat, max_lat], crs=ccrs.PlateCarree())
    
    # Plot the data using pcolormesh
    mini=-1
    maxi=1
    intervals = 21
    bounds=np.linspace(mini,maxi,intervals)
    mesh = ax.contourf(lon_shifted.sel(WR=i).lon, da.lat, da_shifted.sel(WR=i).Z_anom.values, levels=bounds, vmin=mini, vmax=maxi,
                             cmap='bwr', transform=ccrs.PlateCarree(),extend='both')

    # Add coastlines for context
    ax.coastlines()
    ax.margins(x=0, y=0)
    ax.add_feature(cfeature.BORDERS, edgecolor='gray', linewidth=0.5, zorder=5)  # Add country borders
    ax.add_feature(cfeature.STATES, edgecolor='gray', linewidth=0.25, zorder=5)  # Add state/province borders

    extent = [-180, -30, 20, 80]
    # Create a rectangular boundary matching the extent
    verts = [
        (extent[0], extent[2]),  # Bottom-left corner
        (extent[1], extent[2]),  # Bottom-right corner
        (extent[1], extent[3]),  # Top-right corner
        (extent[0], extent[3]),  # Top-left corner
        (extent[0], extent[2])   # Closing the rectangle
    ]
    rect = mpath.Path(verts)
    # Set the boundary of the plot
    ax.set_boundary(rect, transform=ccrs.PlateCarree())

    # Set title for each subplot
    if names:
        ax.set_title(f'{abcd[i]}) {names[i]}',fontsize=11,loc='left')
    else:
        ax.set_title(f'Cluster {i+1}',fontsize=11,loc='left')

# # Hide any unused subplots
# for j in range(i + 1, n_rows * n_cols):
#     fig.delaxes(axes[j // n_cols, j % n_cols])


# Replace the last axes (i=5) with a standard 2D axes
i = 5
row = i // n_cols
col = i % n_cols

# Remove the map-projection axes
fig.delaxes(axes[row, col])

# Create a new regular axes in the same position
# Get the position of the subplot we're replacing
position = fig.add_subplot(n_rows, n_cols, i + 1)

# Shrink and slightly shift the subplot to better match map subplots
box = position.get_position()
position.set_position([box.x0, box.y0+0.065, box.width*0.9, box.height*0.8])

axes[row, col] = position  # update in the axes array
ax = axes[row, col]

for k in range(2,11):
    ax.scatter(np.repeat(k,k),
             dic_results_best_k['reproducibility'][k-2],color='k',s=2)
    ax.scatter(k,
             np.mean(dic_results_best_k['reproducibility'][k-2]),
                    color='orange',s=20)
ax.scatter(k,
         np.mean(dic_results_best_k['reproducibility'][k-2]),
                color='orange',s=20,label='Average clusters')
    # axes[1].axhline(0,color='k')
ax.set_xlabel('Number of clusters (k)',fontsize=11)
ax.set_ylabel('Reproducibility',fontsize=11)
ax.set_title('f) Reproducibility Index',loc='left',fontsize=11)
ax.set_xticks(np.arange(2,11),fontsize=11)

ax.yaxis.tick_right()
ax.yaxis.set_label_position("right")
ax.grid(alpha=0.2,ls='--')
# ax.legend()

# # Adjust layout to prevent overlapping
cax = fig.add_axes([0.92, 0.6, 0.01, 0.25])  # Example position: horizontal, below the main plot
# Add a horizontal colorbar
ticks_1 = [-1, -0.5, 0, 0.5, 1]
cbar = fig.colorbar(mesh, cax=cax, orientation='vertical',ticks=ticks_1)
cbar.set_label(r'Z Anomaly ($\sigma$)')


# plt.tight_layout()

if path_save==False:
    # Show the plot
    plt.show()
    plt.close('all')
else:
    plt.savefig(path_save, bbox_inches='tight',dpi=100)
    plt.close('all')

In [22]:
i

4

In [39]:
regioncoords

[180, 330, 20, 80]