### Process daily NAO ###

Load the daily observations and quantify the NAO index and delta P index and fold into a dataframe.

In [1]:
# Local imports
import os
import sys
import time
import argparse

# Third-party imports
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import shapely.geometry
import cartopy.io.shapereader as shpreader
import iris

# Specific imports
from tqdm import tqdm
from datetime import datetime, timedelta

  _set_context_ca_bundle_path(ca_bundle_path)


In [2]:
# Set up the path to the psl data
psl_data_path = "/gws/nopw/j04/canari/users/benhutch/ERA5/ERA5_msl_daily_1960_2020_daymean.nc"

In [3]:
%%time

# load the data
psl_cube = iris.load_cube(psl_data_path, "msl")

CPU times: user 1.62 s, sys: 1.76 s, total: 3.39 s
Wall time: 32.9 s


In [4]:
psl_cube

Air Pressure At Mean Sea Level (Pa),time,latitude,longitude
Shape,22281,214,304
Dimension coordinates,,,
time,x,-,-
latitude,-,x,-
longitude,-,-,x
Cell methods,,,
0,time: mean,time: mean,time: mean
Attributes,,,
CDI,'Climate Data Interface version 2.4.1 (https://mpimet.mpg.de/cdi)','Climate Data Interface version 2.4.1 (https://mpimet.mpg.de/cdi)','Climate Data Interface version 2.4.1 (https://mpimet.mpg.de/cdi)'
CDO,'Climate Data Operators version 2.4.1 (https://mpimet.mpg.de/cdo)','Climate Data Operators version 2.4.1 (https://mpimet.mpg.de/cdo)','Climate Data Operators version 2.4.1 (https://mpimet.mpg.de/cdo)'


In [5]:
# subset the data between 1975 and 2015
psl_cube = psl_cube.extract(iris.Constraint(time=lambda cell: 1975 <= cell.point.year <= 2015))

In [6]:
%%time

# extract the times
times = psl_cube.coord("time").points

# extract the lats and lons
lats = psl_cube.coord("latitude").points
lons = psl_cube.coord("longitude").points

# extract the data
data = psl_cube.data

CPU times: user 2.16 s, sys: 11.8 s, total: 13.9 s
Wall time: 35.4 s


In [7]:
data.shape

(14975, 214, 304)

In [8]:
# Define the dimensions for the gridbox for the azores
azores_grid_corrected = {"lon1": -28, "lon2": -20, "lat1": 36, "lat2": 40}

# Define the dimensions for the gridbox for the azores
iceland_grid_corrected = {"lon1": -25, "lon2": -16, "lat1": 63, "lat2": 70}

# Define this but corrected
uk_n_box_corrected = {"lon1": -27, "lon2": 21, "lat1": 57, "lat2": 70}

# Define this but corrected
uk_s_box_corrected = {"lon1": -27, "lon2": 21, "lat1": 38, "lat2": 51}

# find the indices of the azores region
azores_lat1_idx = np.argmin(np.abs(lats - azores_grid_corrected["lat1"]))
azores_lat2_idx = np.argmin(np.abs(lats - azores_grid_corrected["lat2"]))
azores_lon1_idx = np.argmin(np.abs(lons - azores_grid_corrected["lon1"]))
azores_lon2_idx = np.argmin(np.abs(lons - azores_grid_corrected["lon2"]))

# Find the indicies of the iceland region
iceland_lat1_idx = np.argmin(np.abs(lats - iceland_grid_corrected["lat1"]))
iceland_lat2_idx = np.argmin(np.abs(lats - iceland_grid_corrected["lat2"]))
iceland_lon1_idx = np.argmin(np.abs(lons - iceland_grid_corrected["lon1"]))
iceland_lon2_idx = np.argmin(np.abs(lons - iceland_grid_corrected["lon2"]))

# Find the indices of the uk n box region
uk_n_lat1_idx = np.argmin(np.abs(lats - uk_n_box_corrected["lat1"]))
uk_n_lat2_idx = np.argmin(np.abs(lats - uk_n_box_corrected["lat2"]))
uk_n_lon1_idx = np.argmin(np.abs(lons - uk_n_box_corrected["lon1"]))
uk_n_lon2_idx = np.argmin(np.abs(lons - uk_n_box_corrected["lon2"]))

# Find the indices of the uk s box region
uk_s_lat1_idx = np.argmin(np.abs(lats - uk_s_box_corrected["lat1"]))
uk_s_lat2_idx = np.argmin(np.abs(lats - uk_s_box_corrected["lat2"]))
uk_s_lon1_idx = np.argmin(np.abs(lons - uk_s_box_corrected["lon1"]))
uk_s_lon2_idx = np.argmin(np.abs(lons - uk_s_box_corrected["lon2"]))

In [9]:
lons

array([-4.50000000e+01, -4.47187500e+01, -4.44375000e+01, -4.41562462e+01,
       -4.38749962e+01, -4.35937462e+01, -4.33124962e+01, -4.30312424e+01,
       -4.27499924e+01, -4.24687424e+01, -4.21874924e+01, -4.19062424e+01,
       -4.16249886e+01, -4.13437386e+01, -4.10624886e+01, -4.07812386e+01,
       -4.04999886e+01, -4.02187347e+01, -3.99374847e+01, -3.96562347e+01,
       -3.93749847e+01, -3.90937309e+01, -3.88124809e+01, -3.85312309e+01,
       -3.82499809e+01, -3.79687309e+01, -3.76874771e+01, -3.74062271e+01,
       -3.71249771e+01, -3.68437271e+01, -3.65624771e+01, -3.62812233e+01,
       -3.59999733e+01, -3.57187233e+01, -3.54374733e+01, -3.51562195e+01,
       -3.48749695e+01, -3.45937195e+01, -3.43124695e+01, -3.40312195e+01,
       -3.37499657e+01, -3.34687157e+01, -3.31874657e+01, -3.29062157e+01,
       -3.26249619e+01, -3.23437119e+01, -3.20624619e+01, -3.17812119e+01,
       -3.14999599e+01, -3.12187099e+01, -3.09374580e+01, -3.06562080e+01,
       -3.03749580e+01, -

In [10]:
azores_box = data[:, azores_lat2_idx:azores_lat1_idx, azores_lon1_idx:azores_lon2_idx].mean(axis=(1, 2))
iceland_box = data[:, iceland_lat2_idx:iceland_lat1_idx, iceland_lon1_idx:iceland_lon2_idx].mean(axis=(1, 2))

uk_n_box = data[:, uk_n_lat2_idx:uk_n_lat1_idx, uk_n_lon1_idx:uk_n_lon2_idx].mean(axis=(1, 2))
uk_s_box = data[:, uk_s_lat2_idx:uk_s_lat1_idx, uk_s_lon1_idx:uk_s_lon2_idx].mean(axis=(1, 2))

In [21]:
# Set up a dataframe
indices_df = pd.DataFrame(
    {
        "time": times,
        "nao_index": (azores_box - iceland_box) / 100,
        "delta_p_index": (uk_n_box - uk_s_box) / 100,
    }
)

In [22]:
indices_df.describe()

Unnamed: 0,time,nao_index,delta_p_index
count,14975.0,14975.0,14975.0
mean,837131.0,13.410758,-8.586423
std,103753.3,16.701416,12.193858
min,657443.0,-48.421406,-52.878672
25%,747287.0,2.902539,-15.756133
50%,837131.0,12.781641,-7.522344
75%,926975.0,23.729219,-0.21125
max,1016819.0,72.429766,30.282891


In [23]:
indices_df.head()

Unnamed: 0,time,nao_index,delta_p_index
0,657443,49.856172,-32.213047
1,657467,56.215625,-36.983906
2,657491,43.575859,-35.995234
3,657515,27.980391,-32.392578
4,657539,28.766641,-39.533594


In [16]:
import cftime

# # convert time to a datetime object
# # time:calendar = "gregorian" ;
# # time:axis = "T" ;
# # time:units = "hours since 1900-01-01" ;
# indices_df["time"] = cftime.num2date(indices_df["time"], units="hours since 1900-01-01", calendar="gregorian")

In [24]:
import pandas as pd

# Assuming the time column is in hours since 1900-01-01
indices_df["time"] = pd.to_datetime("1900-01-01") + pd.to_timedelta(indices_df["time"], unit='h')

In [27]:
indices_df.head()

Unnamed: 0,time,nao_index,delta_p_index
0,1975-01-01,49.856172,-32.213047
1,1975-01-02,56.215625,-36.983906
2,1975-01-03,43.575859,-35.995234
3,1975-01-04,27.980391,-32.392578
4,1975-01-05,28.766641,-39.533594


In [26]:
# limit the time column to just the yyyy-mm-dd
indices_df["time"] = indices_df["time"].dt.strftime("%Y-%m-%d")

In [28]:
# set up a fname
fname = "nao_delta_p_indices_1975_2015.csv"

# set up the directory to save to
save_dir = "/home/users/benhutch/unseen_multi_year/dfs"

# save the dataframe
indices_df.to_csv(os.path.join(save_dir, fname), index=False)