In [1]:
# Import local modules
import argparse
import os
import sys
import glob

# Import third-party modules
import xarray as xr
import numpy as np
import pandas as pd

In [2]:
# Import dictionaries
sys.path.append("/home/users/benhutch/skill-maps")
import dictionaries as dicts

# Import the functions
sys.path.append("/home/users/benhutch/skill-maps/python")
import functions as fnc

# Import more functions
sys.path.append("/home/users/benhutch/skill-maps/rose-suite-matching")
from nao_matching_seasons import match_variable_models, find_obs_path

In [3]:
# Set up the arguments
variable = "psl"
region = "global"
season = "DJFM"
forecast_range = "2-3"
start_year = 1961
end_year = 2014
lag = 4
method = "alternate_lag"
no_bootstraps = 10 # test for now

# Set up the alt lag directory
alt_lag_dir = "/gws/nopw/j04/canari/users/benhutch/alternate-lag-processed-data"

In [4]:
# Set up the first and last years
alt_lag_first_year = start_year + lag - 1
alt_lag_last_year = end_year

# Set up the file name
filename = f"{variable}_{season}_{region}_{alt_lag_first_year}_{alt_lag_last_year}_{forecast_range}_{lag}_*alternate_lag.npy"

# Form the other file name
raw_filename = f"{variable}_{season}_{region}_{start_year}_{alt_lag_last_year}_{forecast_range}_{lag}*.npy"

# Find files matching the filename
alt_lag_files = glob.glob(alt_lag_dir + "/" + filename)

# Find files matching the raw filename
raw_files = glob.glob(alt_lag_dir + "/" + raw_filename)

In [5]:
# Load the files dependent on the date
# If there is more than one file
if len(alt_lag_files) > 1:
    print("More than one file found")

    # If the psl_DJFM_global_1962_1980_2-9_2_1706281292.628301_alternate_lag.npy
    # 1706281292.628301 is the datetime
    # Extract the datetimes
    datetimes = [file.split("_")[7] for file in alt_lag_files]

    # Remove the .npy from the datetimes
    datetimes = [datetime.split(".")[0] for datetime in datetimes]

    # Convert the datasetimes to datetimes using pandas
    datetimes = [pd.to_datetime(datetime, unit="s") for datetime in datetimes]

    # Find the latest datetime
    latest_datetime = max(datetimes)

    # Find the index of the latest datetime
    latest_datetime_index = datetimes.index(latest_datetime)

    # Print that we are using the latest datetime file
    print("Using the latest datetime file:", alt_lag_files[latest_datetime_index])

    # Load the file
    alt_lag_data = np.load(alt_lag_files[latest_datetime_index])
else:
    # Load the file
    alt_lag_data = np.load(alt_lag_files[0])

# Load the raw data
if len(raw_files) > 1:
    print("More than one file found")

    # If the psl_DJFM_global_1962_1980_2-9_2_1706281292.628301_alternate_lag.npy
    # 1706281292.628301 is the datetime
    # Extract the datetimes
    datetimes = [file.split("_")[7] for file in raw_files]

    # Remove the .npy from the datetimes
    datetimes = [datetime.split(".")[0] for datetime in datetimes]

    # Convert the datasetimes to datetimes using pandas
    datetimes = [pd.to_datetime(datetime, unit="s") for datetime in datetimes]

    # Find the latest datetime
    latest_datetime = max(datetimes)

    # Find the index of the latest datetime
    latest_datetime_index = datetimes.index(latest_datetime)

    # Print that we are using the latest datetime file
    print("Using the latest datetime file:", raw_files[latest_datetime_index])

    # Load the file
    raw_data = np.load(raw_files[latest_datetime_index])
else:
    # Load the file
    raw_data = np.load(raw_files[0])

More than one file found
Using the latest datetime file: /gws/nopw/j04/canari/users/benhutch/alternate-lag-processed-data/psl_DJFM_global_1964_2014_2-3_4_1706785429.0699768_alternate_lag.npy
More than one file found
Using the latest datetime file: /gws/nopw/j04/canari/users/benhutch/alternate-lag-processed-data/psl_DJFM_global_1961_2014_2-3_4_1706785429.0699768.npy


In [6]:
# Process the observations for this variable
obs = fnc.process_observations(variable=variable,
                               region=region,
                               region_grid=dicts.gridspec_global,
                               forecast_range=forecast_range,
                               season=season,
                               observations_path=find_obs_path(variable),
                               obs_var_name=variable)

Gridspec file: /home/users/benhutch/gridspec/gridspec-global.txt
Variable is not ua or va, creating new file name
File already exists
Loading ERA5 data
Dataset loaded:  [[[[100441.25  100441.25  100441.25  ... 100441.25  100441.25
    100441.25 ]
   [ 99956.96   99949.89   99938.24  ... 100008.12   99992.734
     99973.734]
   [ 99372.84   99429.414  99346.5   ...  99315.445  99202.03
     99230.18 ]
   ...
   [103048.07  103076.63  103104.36  ... 102961.27  102991.36
    103020.336]
   [102941.17  102960.586 102981.38  ... 102875.04  102897.77
    102917.19 ]
   [102773.83  102784.78  102795.32  ... 102740.555 102752.75
    102763.43 ]]

  [[       nan        nan        nan ...        nan        nan
           nan]
   [       nan        nan        nan ...        nan        nan
           nan]
   [       nan        nan        nan ...        nan        nan
           nan]
   ...
   [       nan        nan        nan ...        nan        nan
           nan]
   [       nan        nan     

In [7]:
# Set up the years to overlap
if forecast_range == "2-9":
    # Set up the alt lag first and last years
    alt_lag_first_year = alt_lag_first_year + 5
    alt_lag_last_year = alt_lag_last_year + 5

    # Set up the raw first and last years
    raw_first_year = int(start_year) + 5
    raw_last_year = int(end_year) + 5
elif forecast_range == "2-5":
    # Set up the alt lag first and last years
    alt_lag_first_year = alt_lag_first_year + 3
    alt_lag_last_year = alt_lag_last_year + 3

    # Set up the raw first and last years
    raw_first_year = int(start_year) + 3
    raw_last_year = int(end_year) + 3
elif forecast_range == "2-3":
    # Set up the alt lag first and last years
    alt_lag_first_year = alt_lag_first_year + 2
    alt_lag_last_year = alt_lag_last_year + 2

    # Set up the raw first and last years
    raw_first_year = int(start_year) + 2
    raw_last_year = int(end_year) + 2
else:
    raise ValueError("Forecast range not recognised. Please try again.")

In [8]:
# Set up the common years
common_years_alt_lag = np.arange(alt_lag_first_year, alt_lag_last_year + 1)

# Set up common years
common_years_raw = np.arange(raw_first_year, raw_last_year + 1)

# Create a list of the common years
common_years = [common_years_alt_lag, common_years_raw]

In [9]:
# Prtocess the obs to be inline
# Create a copy of the obs
obs_copy = obs.copy()

# Constraint the observations to the common years
obs_lag = obs_copy.sel(time=slice(f"{alt_lag_first_year}-01-01", f"{alt_lag_last_year}-12-31"))

# Constrain the observations to the common years of the raw data
obs_raw = obs_copy.sel(time=slice(f"{raw_first_year}-01-01", f"{raw_last_year}-12-31"))

# Loop over the obs to check that there are no nans
for year in obs_lag.time.dt.year.values:
    # Extract the data for the year
    year_data = obs.sel(time=f"{year}")

    # If there are any nans, raise an error
    if np.isnan(year_data).any():
        print("Nans found in obs for year:", year)
        if np.isnan(year_data).all():
            print("All values are nan")
            print("Removing year:", year, "from obs")
            obs = obs.sel(time=obs.time.dt.year != year)

# Loop over the obs to check that there are no nans
for year in obs_raw.time.dt.year.values:
    # Extract the data for the year
    year_data = obs.sel(time=f"{year}")

    # If there are any nans, raise an error
    if np.isnan(year_data).any():
        print("Nans found in obs for year:", year)
        if np.isnan(year_data).all():
            print("All values are nan")
            print("Removing year:", year, "from obs")
            obs = obs.sel(time=obs.time.dt.year != year)

# print the first and last years of the observations
print("First year obs post-slice:", obs_lag.time[0].dt.year.values)
print("Last year obs post-slice:", obs_lag.time[-1].dt.year.values)

# Verify that the length of the observations is correct
assert len(obs_lag.time.dt.year.values) == alt_lag_data.shape[0], (
    "Length of observations is incorrect"
)

# Verify that the length of the observations is correct
assert len(obs_raw.time.dt.year.values) == raw_data.shape[0], (
    "Length of observations is incorrect"
)

First year obs post-slice: 1966
Last year obs post-slice: 2016


In [10]:
# Swap the axes of the alt_lag_data
# Swap the 1th axis with the 0th axis
alt_lag_data = np.swapaxes(alt_lag_data, 1, 0)

# Print the shape of the alt_lag_data
print("Shape of alt_lag_data:", alt_lag_data.shape)

# Swap the axes of the raw_data
# Swap the 1th axis with the 0th axis
raw_data = np.swapaxes(raw_data, 1, 0)

# Print the shape of the raw_data
print("Shape of raw_data:", raw_data.shape)

Shape of alt_lag_data: (712, 51, 72, 144)
Shape of raw_data: (178, 54, 9, 72, 144)


In [11]:
# First take the mean over the year axis for the raw data
if forecast_range == "2-3":
    raw_data_mean = raw_data[:, :, :2, :, :].mean(axis=2)
elif forecast_range == "2-5":
    raw_data_mean = raw_data[:, :, :4, :, :].mean(axis=2)
elif forecast_range == "2-9":
    raw_data_mean = raw_data[:, :, :8, :, :].mean(axis=2)
else:
    raise ValueError("Forecast range not recognised. Please try again.")

In [12]:
# # Extract the values for the obs
# obs_lag_values = obs_lag.values

# # Print the shape of the obs_values
# print("Shape of obs_values:", obs_lag_values.shape)

# # Print the shape of the alt lag data
# print("Shape of alt_lag_data:", alt_lag_data.shape)

# # Print the shape of the raw data
# print("Shape of raw_data_mean:", raw_data_mean.shape)

# # FIXME: Exits in forecast_stats?
# # Run the function to calculate the forecast stats
# forecast_stats_alt_lag = fnc.forecast_stats(obs=obs_lag_values,
#                                             forecast1=alt_lag_data,
#                                             forecast2=alt_lag_data,
#                                             no_boot=1)

In [13]:
# Set up the obs raw values
obs_raw_values = obs_raw.values

# TODO: Calculate the forecast stats for the raw data
forecast_stats_raw = fnc.forecast_stats(obs=obs_raw_values,
                                        forecast1=raw_data_mean,
                                        forecast2=raw_data_mean,
                                        no_boot=10)

bootstrap index 0
shape of obs_boot (54, 72, 144)
value of obs_boot [[[ -75.99023438  -75.99023438  -75.99023438 ...  -75.99023438
    -75.99023438  -75.99023438]
  [ -81.36621094  -77.30273438  -72.16210938 ...  -98.96484375
    -93.64648438  -87.20800781]
  [ -21.42773438  -32.09472656  -14.82714844 ...  -15.10351562
     -4.49023438   -8.01074219]
  ...
  [-127.5703125  -115.44140625 -103.33496094 ... -164.59863281
   -151.85839844 -139.65429688]
  [-101.78808594  -93.07617188  -84.63476562 ... -128.48535156
   -119.3515625  -110.58691406]
  [ -54.98242188  -51.65332031  -48.16210938 ...  -65.59863281
    -61.86230469  -58.37988281]]

 [[ 438.109375    438.109375    438.109375   ...  438.109375
    438.109375    438.109375  ]
  [ 419.04199219  421.56347656  424.34667969 ...  414.14746094
    415.14941406  416.94238281]
  [ 426.22460938  417.39453125  423.76074219 ...  416.8125
    423.87304688  426.66113281]
  ...
  [  69.32421875   74.71191406   79.78125    ...   50.1640625
     57

  r_partial_boot[iboot, lat, lon] = num / np.sqrt(denom_sq)


bootstrap index 1
shape of obs_boot (54, 72, 144)
value of obs_boot [[[-7.59902344e+01 -7.59902344e+01 -7.59902344e+01 ... -7.59902344e+01
   -7.59902344e+01 -7.59902344e+01]
  [-8.13662109e+01 -7.73027344e+01 -7.21621094e+01 ... -9.89648438e+01
   -9.36464844e+01 -8.72080078e+01]
  [-2.14277344e+01 -3.20947266e+01 -1.48271484e+01 ... -1.51035156e+01
   -4.49023438e+00 -8.01074219e+00]
  ...
  [-1.27570312e+02 -1.15441406e+02 -1.03334961e+02 ... -1.64598633e+02
   -1.51858398e+02 -1.39654297e+02]
  [-1.01788086e+02 -9.30761719e+01 -8.46347656e+01 ... -1.28485352e+02
   -1.19351562e+02 -1.10586914e+02]
  [-5.49824219e+01 -5.16533203e+01 -4.81621094e+01 ... -6.55986328e+01
   -6.18623047e+01 -5.83798828e+01]]

 [[ 4.38109375e+02  4.38109375e+02  4.38109375e+02 ...  4.38109375e+02
    4.38109375e+02  4.38109375e+02]
  [ 4.19041992e+02  4.21563477e+02  4.24346680e+02 ...  4.14147461e+02
    4.15149414e+02  4.16942383e+02]
  [ 4.26224609e+02  4.17394531e+02  4.23760742e+02 ...  4.16812500e+