In [1]:
# Local imports
import os
import sys
import glob
import re

# Third party imports
import numpy as np
import xarray as xr
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

In [2]:
# Import dictionaries
sys.path.append('/home/users/benhutch/skill-maps')
import dictionaries as dict

# Import functions
import functions as func

# Import the NAO functions
from nao_skill_functions import nao_stats, \
                                plot_subplots_ind_models, plot_multi_model_mean, \
                                plot_multi_model_mean_spna, \
                                plot_subplots_ind_models_spna

# Import nao matching functions
sys.path.append('/home/users/benhutch/skill-maps/rose-suite-matching')
from nao_matching_seasons import match_variable_models, find_obs_path

# Import the bootstrapping functions
from process_bs_values import align_nao_matched_members, load_and_process_hist_data, \
                                extract_hist_models

### NAO/SPNA Matching ###

Testing whether:
1. Does NAO matching improve skill for the SPNA SSTs (tas in this case).
2. Does SPNA matching improve skill for the NAO anomalies

In the first case, we will consider whether NAO-matching improves the skill for SPNA SSTs. We want to create two subplots, one on the left showing the raw SPNA SST time series and the other (on the right) showing the NAO-matched SPNA SST time series. If NAO-matching improves the skill of the SPNA SST time series, this suggests that NAO may be a driver of predictability for the SPNA SSTs.

$\hat{F}_{SPNA} = \hat{F}_{NAO} + \hat{F}_{OTHER} + \hat{\epsilon}$

In the second case, we will consider whether SPNA SST matching improves the skill for NAO anomalies. By doing this we assume that some of the predictability for the NAO derives from SPNA SSTs, e.g.

$\hat{F}_{NAO} = \hat{F}_{SPNA} + \hat{F}_{OTHER} + \hat{\epsilon}$

In [3]:
# Set up the parameters
variable = "tas"
region = "global"
forecast_range = "2-9"
season = "DJFM"
start_year = 1960
end_year = 2022

# set up the base canari dir
canari_dir = "/gws/nopw/j04/canari/users/benhutch"

# Form the folder within which the data are stored
data_dir = os.path.join(canari_dir, "NAO-matching",
                        variable, region, season,
                        forecast_range, str(start_year) + "-" + str(end_year))

# Check if the data directory exists
if not os.path.exists(data_dir):
    raise ValueError("Data directory does not exist")

# Print the files stored in the data directory
print("Files in data directory:")
files = os.listdir(data_dir)

# Print the files
for file in files:
    print(file)

# Extract the file containing "members_lagged"
members_lagged_file = [file for file in files if "members_lagged" in file][0]

# Open the file
nao_matched_tas_members = xr.open_dataset(os.path.join(data_dir,
                                                        members_lagged_file))

Files in data directory:
tas_global_DJFM_2-9_1960-2022_matched_var_ensemble_mean.nc
tas_global_DJFM_2-9_1960-2022_matched_var_ensemble_mean_lagged.nc
tas_global_DJFM_2-9_1960-2022_matched_var_ensemble_members.nc
tas_global_DJFM_2-9_1960-2022_matched_var_ensemble_members_lagged.nc


In [4]:
# variable contained here is:
# '__xarray_dataarray_variable__'
nao_matched_tas_members

In [5]:
# Extract the observations
obs_tas_anom = func.read_obs(variable=variable,
                             region=region,
                             forecast_range=forecast_range,
                             season=season,
                             observations_path=dict.obs,
                             start_year=start_year,
                             end_year=end_year)

Time dimension of obs: ['1960-12-31T00:00:00.000000000' '1961-12-31T00:00:00.000000000'
 '1962-12-31T00:00:00.000000000' '1963-12-31T00:00:00.000000000'
 '1964-12-31T00:00:00.000000000' '1965-12-31T00:00:00.000000000'
 '1966-12-31T00:00:00.000000000' '1967-12-31T00:00:00.000000000'
 '1968-12-31T00:00:00.000000000' '1969-12-31T00:00:00.000000000'
 '1970-12-31T00:00:00.000000000' '1971-12-31T00:00:00.000000000'
 '1972-12-31T00:00:00.000000000' '1973-12-31T00:00:00.000000000'
 '1974-12-31T00:00:00.000000000' '1975-12-31T00:00:00.000000000'
 '1976-12-31T00:00:00.000000000' '1977-12-31T00:00:00.000000000'
 '1978-12-31T00:00:00.000000000' '1979-12-31T00:00:00.000000000'
 '1980-12-31T00:00:00.000000000' '1981-12-31T00:00:00.000000000'
 '1982-12-31T00:00:00.000000000' '1983-12-31T00:00:00.000000000'
 '1984-12-31T00:00:00.000000000' '1985-12-31T00:00:00.000000000'
 '1986-12-31T00:00:00.000000000' '1987-12-31T00:00:00.000000000'
 '1988-12-31T00:00:00.000000000' '1989-12-31T00:00:00.000000000'
 '



In [6]:
# Load and process historical data first (for the align function)
hist_data = load_and_process_hist_data(base_dir="/home/users/benhutch/skill-maps-processed-data/historical",
                                       hist_models=extract_hist_models(variable, dict),
                                       variable=variable,
                                       region=region,
                                       forecast_range=forecast_range,
                                       season=season)

# Constrain the historical data to the same time period as the observations
constrained_hist_data = func.constrain_years(hist_data,
                                             extract_hist_models(variable, dict))

processing model:  BCC-CSM2-MR
files_path:  /home/users/benhutch/skill-maps-processed-data/historical/tas/BCC-CSM2-MR/global/years_2-9/DJFM/outputs/processed/*.nc
processing model:  MPI-ESM1-2-HR
files_path:  /home/users/benhutch/skill-maps-processed-data/historical/tas/MPI-ESM1-2-HR/global/years_2-9/DJFM/outputs/processed/*.nc
processing model:  CanESM5
files_path:  /home/users/benhutch/skill-maps-processed-data/historical/tas/CanESM5/global/years_2-9/DJFM/outputs/processed/*.nc
processing model:  CMCC-CM2-SR5
files_path:  /home/users/benhutch/skill-maps-processed-data/historical/tas/CMCC-CM2-SR5/global/years_2-9/DJFM/outputs/processed/*.nc
processing model:  HadGEM3-GC31-MM
files_path:  /home/users/benhutch/skill-maps-processed-data/historical/tas/HadGEM3-GC31-MM/global/years_2-9/DJFM/outputs/processed/*.nc
processing model:  MPI-ESM1-2-LR
files_path:  /home/users/benhutch/skill-maps-processed-data/historical/tas/MPI-ESM1-2-LR/global/years_2-9/DJFM/outputs/processed/*.nc
processing m

In [7]:
# Align the members using the function
aligned_data = align_nao_matched_members(obs=obs_tas_anom,
                                         nao_matched_members=nao_matched_tas_members,
                                         constrained_hist_data=constrained_hist_data,
                                         hist_models=extract_hist_models(variable, dict))

there are NaN values in the observations for 1960
all values are NaN for 1960
there are NaN values in the observations for 1961
all values are NaN for 1961
there are NaN values in the observations for 1962
all values are NaN for 1962
there are NaN values in the observations for 1963
all values are NaN for 1963
there are no NaN values in the observations for 1964
there are no NaN values in the observations for 1965
there are no NaN values in the observations for 1966
there are no NaN values in the observations for 1967
there are no NaN values in the observations for 1968
there are no NaN values in the observations for 1969
there are no NaN values in the observations for 1970
there are no NaN values in the observations for 1971
there are no NaN values in the observations for 1972
there are no NaN values in the observations for 1973
there are no NaN values in the observations for 1974
there are no NaN values in the observations for 1975
there are no NaN values in the observations for 1976

In [14]:
# Import importlib
import importlib

# Reload the dictionaries
importlib.reload(sys.modules['dictionaries'])

# Reload the functions
importlib.reload(sys.modules['functions'])

# Reload the nao functions
importlib.reload(sys.modules['nao_skill_functions'])

# Reload the nao matching functions
importlib.reload(sys.modules['nao_matching_seasons'])

# Reload the bootstrapping functions
importlib.reload(sys.modules['process_bs_values'])

# Import the dictionaries
import dictionaries as dict

# Import the functions
import functions as func

# Import the nao functions
from nao_skill_functions import nao_stats, \
                                plot_subplots_ind_models, plot_multi_model_mean, \
                                plot_multi_model_mean_spna, \
                                plot_subplots_ind_models_spna

# Import nao matching functions
sys.path.append('/home/users/benhutch/skill-maps/rose-suite-matching')
from nao_matching_seasons import match_variable_models, find_obs_path

# Import the bootstrapping functions
from process_bs_values import align_nao_matched_members, load_and_process_hist_data, \
                                extract_hist_models

In [15]:
# extract the data from the aligned data
nao_matched_tas_members_array = aligned_data[0]
obs_tas_anom = aligned_data[2]
common_years = aligned_data[3]

# Print the shape of the data
print(nao_matched_tas_members_array.shape)
print(obs_tas_anom.shape)
print(common_years.shape)
print(common_years)

(20, 43, 72, 144)
(43, 72, 144)
(43,)
[1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982
 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996
 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
 2011]


In [23]:
# We want to find the indexes for lat and lon
# which correspond to the SPNA region for the regridded data
lats = nao_matched_tas_members.lat.values
lons = nao_matched_tas_members.lon.values

# Print the lats and lons
print(lats)
print(lons)

# Extract the lats and lons for the SPNA region
lat1, lat2 = dict.spna_grid_strommen['lat1'], dict.spna_grid_strommen['lat2']
lon1, lon2 = dict.spna_grid_strommen['lon1'], dict.spna_grid_strommen['lon2']

# Print the lats and lons
print(lat1, lat2)
print(lon1, lon2)

# Find the indexes for the lat and lon
# Find the values closest to the lat and lon
lat1_idx = np.argmin(np.abs(lats - lat1))
lat2_idx = np.argmin(np.abs(lats - lat2))
lon1_idx = np.argmin(np.abs(lons - lon1))
lon2_idx = np.argmin(np.abs(lons - lon2))

# Print the indexes
print(lat1_idx, lat2_idx, lon1_idx, lon2_idx)

# Constrain the lats and lons to the SPNA region
lats_spna = lats[lat1_idx:lat2_idx + 1]
lons_spna = lons[lon1_idx:lon2_idx + 1]

# Print the lats and lons
print(lats_spna)
print(lons_spna)

[-90.  -87.5 -85.  -82.5 -80.  -77.5 -75.  -72.5 -70.  -67.5 -65.  -62.5
 -60.  -57.5 -55.  -52.5 -50.  -47.5 -45.  -42.5 -40.  -37.5 -35.  -32.5
 -30.  -27.5 -25.  -22.5 -20.  -17.5 -15.  -12.5 -10.   -7.5  -5.   -2.5
   0.    2.5   5.    7.5  10.   12.5  15.   17.5  20.   22.5  25.   27.5
  30.   32.5  35.   37.5  40.   42.5  45.   47.5  50.   52.5  55.   57.5
  60.   62.5  65.   67.5  70.   72.5  75.   77.5  80.   82.5  85.   87.5]
[-180.  -177.5 -175.  -172.5 -170.  -167.5 -165.  -162.5 -160.  -157.5
 -155.  -152.5 -150.  -147.5 -145.  -142.5 -140.  -137.5 -135.  -132.5
 -130.  -127.5 -125.  -122.5 -120.  -117.5 -115.  -112.5 -110.  -107.5
 -105.  -102.5 -100.   -97.5  -95.   -92.5  -90.   -87.5  -85.   -82.5
  -80.   -77.5  -75.   -72.5  -70.   -67.5  -65.   -62.5  -60.   -57.5
  -55.   -52.5  -50.   -47.5  -45.   -42.5  -40.   -37.5  -35.   -32.5
  -30.   -27.5  -25.   -22.5  -20.   -17.5  -15.   -12.5  -10.    -7.5
   -5.    -2.5    0.     2.5    5.     7.5   10.    12.5   15.  

In [24]:
# Collapse the nao_matched and obs data to the SPNA region
# Constrain to the region
# and take the mean over the lat and lon axes
nao_matched_tas_members_spna = nao_matched_tas_members_array[:, :,
                                                            lat1_idx:lat2_idx + 1,
                                                            lon1_idx:lon2_idx + 1].mean(axis=(2, 3))

# Print the shape of the data
print(nao_matched_tas_members_spna.shape)

# Collapse the obs data to the SPNA region
# Constrain to the region
# and take the mean over the lat and lon axes
obs_tas_anom_spna = obs_tas_anom[:, lat1_idx:lat2_idx + 1,
                                 lon1_idx:lon2_idx + 1].mean(axis=(1, 2))

# Print the shape of the data
print(obs_tas_anom_spna.shape)

(20, 43)
(43,)
