In [1]:
# import the relevant modules
import os
from netCDF4 import Dataset
import xarray as xr
import numpy as np
import dask

In [2]:
# processing the observations
# trying to do it correctly

# path for the full .nc file
# will likely need to load this in chunks with dask
path_obs_nc = "/home/users/benhutch/ERA5_psl/ERA5-full-data.nc"
path_obs_nc_regrid = "/home/users/benhutch/ERA5_psl/ERA5-full-data-2.5-regrid.nc"

In [3]:
# write a function to first select the iceland and azores //
# gridboxes
# also remap the obs data to a 2.5x2.5 degree grid
# first define the new grid
def create_new_grid():
    
    lon = np.arange(-180, 180, 2.5)
    lat = np.arange(-90, 90, 2.5)
    
    return xr.Dataset({'lon': (['lon'], lon),
                       'lat': (['lat'], lat)})

# define the gridspec for azores and iceland
def get_grid_spec(location):
    if location == 'azores':
        return {'lon1': -28, 'lon2': -20, 'lat1': 36, 'lat2': 40}
    elif location == 'iceland':
        return {'lon1': -25, 'lon2': -16, 'lat1': 63, 'lat2': 70}
    else:
        raise ValueError('Location must be azores or iceland')

def select_data_within_grid(dataset, grid):
    return dataset.sel(lon=slice(grid['lon1'], grid['lon2']), lat=slice(grid['lat1'], grid['lat2']))

# define a function to select the months DJFM
def select_months(dataset):
    return dataset.sel(time=dataset.time.dt.month.isin([12, 1, 2, 3]))

# define a function which calculates the model mean state
def calculate_model_mean_state(dataset):
    model_mean_state = dataset.mean(dim='time')
    return model_mean_state

# define a function which calculates the model anomalies
def calculate_model_anomalies(dataset, model_mean_state):
    model_anomalies = dataset - model_mean_state
    return model_anomalies

# define a function which first shifts the data back by 3 months
# then calculates the annual mean anomalies
def calculate_annual_mean_anomalies(dataset):
    dataset = dataset.shift(time=-3)
    dataset = dataset.resample(time='Y').mean(dim='time')
    return dataset

# define a function which takes the azores and iceland anomalies
# and calculates the NAO index
def calculate_NAO_index(azores_anomalies, iceland_anomalies, output_file):
    
    # take the spatial mean of the azores anomalies
    azores_anomalies = azores_anomalies.mean(dim=['lat', 'lon'])
    # take the spatial mean of the iceland anomalies
    iceland_anomalies = iceland_anomalies.mean(dim=['lat', 'lon'])

    # calculate the NAO index
    NAO_index = azores_anomalies - iceland_anomalies

    # Save NAO_index as a netCDF file
    NAO_index.to_netcdf(output_file)

    return NAO_index


# define a function which takes a forward running mean of the NAO index
# for 8 years
def calculate_NAO_index_running_mean(NAO_index):
    NAO_index = NAO_index.rolling(time=8).mean()
    return NAO_index


In [4]:
# define the main function for processing the observations
# for azores and iceland
def process_observations(path_obs_nc, location):
        
        # load in the full dataset into chunks with dask
        dataset = xr.open_dataset(path_obs_nc, chunks={'time': 500})

        # get the grid spec for the location
        grid_spec = get_grid_spec(location)
        
        # select the data within the grid
        dataset = select_data_within_grid(dataset, grid_spec)

        #print(dataset['var151'].compute())
    
        
        #print(dataset.time)
        
        # select the months DJFM
        # not neccessary for updated dataset
        #dataset = select_months(dataset)

        # calculate the model mean state
        model_mean_state = calculate_model_mean_state(dataset)

        # calculate the model anomalies
        model_anomalies = calculate_model_anomalies(dataset, model_mean_state)

        #print(model_anomalies.compute())
        
        # calculate the annual mean anomalies
        annual_mean_anomalies = calculate_annual_mean_anomalies(model_anomalies)

        # return the annual mean anomalies
        return annual_mean_anomalies

# define the main function for processing the observations
# for azores and iceland
def main(path_obs_nc):

        # process the observations for azores
        azores_annual_mean_anomalies = process_observations(path_obs_nc, 'azores')
        
        # process the observations for iceland
        iceland_annual_mean_anomalies = process_observations(path_obs_nc, 'iceland')
        
        # calculate the NAO index
        # and save to the current directory
        NAO_index = calculate_NAO_index(azores_annual_mean_anomalies, iceland_annual_mean_anomalies, "/home/users/benhutch/multi-model/multi-model-jasmin/NAO_index_raw.nc")    

In [5]:
# run the main function
NAO_index_running_mean = main(path_obs_nc_regrid)
#NAO_index_running_mean.compute()

<xarray.DataArray 'var151' (time: 708, lat: 2, lon: 4)>
array([[[101482.18 , 101580.94 , 101674.4  , 101762.18 ],
        [101194.484, 101318.914, 101432.41 , 101537.016]],

       [[100662.99 , 100734.44 , 100833.82 , 100949.15 ],
        [100517.13 , 100554.34 , 100624.72 , 100723.4  ]],

       [[101600.375, 101606.36 , 101587.72 , 101548.73 ],
        [101294.13 , 101308.14 , 101298.18 , 101267.92 ]],

       ...,

       [[102039.05 , 101972.94 , 101918.234, 101875.6  ],
        [102121.875, 102066.22 , 102004.78 , 101948.05 ]],

       [[101856.4  , 101820.48 , 101775.555, 101731.39 ],
        [101545.95 , 101523.13 , 101494.58 , 101463.58 ]],

       [[101984.35 , 102110.93 , 102242.82 , 102371.12 ],
        [101742.01 , 101902.35 , 102064.39 , 102217.35 ]]], dtype=float32)
Coordinates:
  * time     (time) datetime64[ns] 1960-01-01 1960-02-01 ... 2018-12-01
  * lon      (lon) float64 -27.5 -25.0 -22.5 -20.0
  * lat      (lat) float64 37.5 40.0
Attributes:
    code:     151
    t

In [7]:
# take the running mean using cdo
#! module load jaspy
! cdo runmean,8 "/home/users/benhutch/multi-model/multi-model-jasmin/NAO_index_raw.nc" "/home/users/benhutch/multi-model/multi-model-jasmin/NAO_index_8yrRM.nc"

[32mcdo    runmean: [0mProcessed 59 values from 1 variable over 59 timesteps [0.48s 148MB].


In [8]:
# open the newly generated file and have a look
path_NAO_index_8yrRM = "/home/users/benhutch/multi-model/multi-model-jasmin/NAO_index_8yrRM.nc"

test = xr.open_dataset(path_NAO_index_8yrRM, chunks={'time': 10})
test

Unnamed: 0,Array,Chunk
Bytes,832 B,160 B
Shape,"(52, 2)","(10, 2)"
Count,7 Tasks,6 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 832 B 160 B Shape (52, 2) (10, 2) Count 7 Tasks 6 Chunks Type datetime64[ns] numpy.ndarray",2  52,

Unnamed: 0,Array,Chunk
Bytes,832 B,160 B
Shape,"(52, 2)","(10, 2)"
Count,7 Tasks,6 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,208 B,40 B
Shape,"(52,)","(10,)"
Count,7 Tasks,6 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 208 B 40 B Shape (52,) (10,) Count 7 Tasks 6 Chunks Type float32 numpy.ndarray",52  1,

Unnamed: 0,Array,Chunk
Bytes,208 B,40 B
Shape,"(52,)","(10,)"
Count,7 Tasks,6 Chunks
Type,float32,numpy.ndarray
