In [1]:
# import the relevant modules
import os
from netCDF4 import Dataset
import xarray as xr
import numpy as np
import dask

In [2]:
# preprocess the updated data from CDS
# specify the path
updated_long_obs_path = "/home/users/benhutch/ERA5_psl/adaptor.mars.internal-1684935647.8168426-1420-8-a5a68040-1786-409f-975b-df151208f15d.grib"

# first convert to a .nc file
! cdo -f nc copy "/home/users/benhutch/ERA5_psl/adaptor.mars.internal-1684935647.8168426-1420-8-a5a68040-1786-409f-975b-df151208f15d.grib" "/home/users/benhutch/ERA5_psl/long-ERA5-full.nc"

# then remap to the right grid specs
! cdo remapcon,"/home/users/benhutch/multi-model/gridspec-global.txt" "/home/users/benhutch/ERA5_psl/long-ERA5-full.nc" "/home/users/benhutch/ERA5_psl/long-ERA5-full-remap.nc"

cgribexGetTsteptype: Time range indicator 123 unsupported, set to 0!
cdo    copy:   1[32mcdo    copy: [0mProcessed 261636480 values from 1 variable over 252 timesteps [3.59s 134MB].
[32mcdo    remapcon: [0mYAC first order conservative weights from lonlat (1440x721) to lonlat (144x72) grid
cdo    remapcon:                        1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 91[32mcdo    remapcon: [0mProcessed 261636480 values from 1 variable over 252 timesteps [6.63s 134MB].


In [3]:
# check out the file
ds_long = xr.open_dataset("/home/users/benhutch/ERA5_psl/long-ERA5-full-remap.nc",  chunks={'time': 50})

ds_long.time

In [4]:
# processing the observations
# trying to do it correctly

# path for the full .nc file
# will likely need to load this in chunks with dask
path_obs_nc = "/home/users/benhutch/ERA5_psl/ERA5-full-data.nc"
path_obs_nc_regrid = "/home/users/benhutch/ERA5_psl/ERA5-full-data-2.5-regrid.nc"
path_obs_nc_regrid_long = "/home/users/benhutch/ERA5_psl/long-ERA5-full-remap.nc"

In [5]:
# write a function to first select the iceland and azores //
# gridboxes
# also remap the obs data to a 2.5x2.5 degree grid
# first define the new grid
def create_new_grid():
    
    lon = np.arange(-180, 180, 2.5)
    lat = np.arange(-90, 90, 2.5)
    
    return xr.Dataset({'lon': (['lon'], lon),
                       'lat': (['lat'], lat)})

# define the gridspec for azores and iceland
def get_grid_spec(location):
    if location == 'azores':
        return {'lon1': -28, 'lon2': -20, 'lat1': 36, 'lat2': 40}
    elif location == 'iceland':
        return {'lon1': -25, 'lon2': -16, 'lat1': 63, 'lat2': 70}
    else:
        raise ValueError('Location must be azores or iceland')

def select_data_within_grid(dataset, grid):
    return dataset.sel(lon=slice(grid['lon1'], grid['lon2']), lat=slice(grid['lat1'], grid['lat2']))

# define a function to select the months DJFM
def select_months(dataset):
    return dataset.sel(time=dataset.time.dt.month.isin([12, 1, 2, 3]))

# define a function which calculates the model mean state
def calculate_model_mean_state(dataset):
    model_mean_state = dataset.mean(dim='time')
    return model_mean_state

# define a function which calculates the model anomalies
def calculate_model_anomalies(dataset, model_mean_state):
    model_anomalies = dataset - model_mean_state
    return model_anomalies

# define a function which first shifts the data back by 3 months
# then calculates the annual mean anomalies
def calculate_annual_mean_anomalies(dataset):
    dataset = dataset.shift(time=-3)
    dataset = dataset.resample(time='Y').mean(dim='time')
    return dataset

# define a function which takes the azores and iceland anomalies
# and calculates the NAO index
def calculate_NAO_index(azores_anomalies, iceland_anomalies, output_file):
    
    # take the spatial mean of the azores anomalies
    azores_anomalies = azores_anomalies.mean(dim=['lat', 'lon'])
    # take the spatial mean of the iceland anomalies
    iceland_anomalies = iceland_anomalies.mean(dim=['lat', 'lon'])

    # calculate the NAO index
    NAO_index = azores_anomalies - iceland_anomalies

    # Save NAO_index as a netCDF file
    NAO_index.to_netcdf(output_file)

    return NAO_index


# define a function which takes a forward running mean of the NAO index
# for 8 years
def calculate_NAO_index_running_mean(NAO_index):
    NAO_index = NAO_index.rolling(time=8).mean()
    return NAO_index


In [18]:
# define the main function for processing the observations
# for azores and iceland
def process_observations(path_obs_nc, location):
        
        # load in the full dataset into chunks with dask
        dataset = xr.open_dataset(path_obs_nc, chunks={'time': 500})

        # get the grid spec for the location
        grid_spec = get_grid_spec(location)
        
        # select the data within the grid
        dataset = select_data_within_grid(dataset, grid_spec)

        #print(dataset['var151'].compute())
    
        
        #print(dataset.time)
        
        # select the months DJFM
        # not neccessary for updated dataset
        #dataset = select_months(dataset)

        # calculate the model mean state
        model_mean_state = calculate_model_mean_state(dataset)

        # calculate the model anomalies
        model_anomalies = calculate_model_anomalies(dataset, model_mean_state)

        #print(model_anomalies.compute())
        
        # calculate the annual mean anomalies
        annual_mean_anomalies = calculate_annual_mean_anomalies(model_anomalies)

        # return the annual mean anomalies
        return annual_mean_anomalies

# define the main function for processing the observations
# for azores and iceland
def main(path_obs_nc):

        # process the observations for azores
        azores_annual_mean_anomalies = process_observations(path_obs_nc, 'azores')
        
        # process the observations for iceland
        iceland_annual_mean_anomalies = process_observations(path_obs_nc, 'iceland')
        
        # calculate the NAO index
        # and save to the current directory
        NAO_index = calculate_NAO_index(azores_annual_mean_anomalies, iceland_annual_mean_anomalies, "/home/users/benhutch/multi-model/multi-model-jasmin/NAO_index_raw.nc")    

        print(NAO_index['var151'].compute())

In [19]:
# run the main function
NAO_index_running_mean = main(path_obs_nc_regrid_long)
#NAO_index_running_mean.compute()

<xarray.DataArray 'var151' (time: 63)>
array([  547.66345 ,  -904.8264  , -1362.615   , -1122.0385  ,
       -1009.49664 , -1277.3301  ,   378.5083  ,  -334.24545 ,
       -1989.828   ,  -519.35364 ,  -462.9961  ,   293.88364 ,
         765.45807 ,   403.9149  ,   179.39429 ,   668.71387 ,
       -1219.0999  ,  -351.36874 , -1023.2671  ,  -124.45044 ,
         190.59677 ,   -62.390057,   939.1576  ,   547.05493 ,
        -635.07874 ,  -103.92798 ,  -467.65533 ,  -288.98813 ,
        1576.6603  ,  1033.6198  ,   381.33072 ,   768.244   ,
         937.40186 ,   965.4193  ,  1169.8403  , -1052.2946  ,
        -164.94849 ,   -96.76692 ,   701.9907  ,   859.0314  ,
       -1020.01953 ,  -134.64436 ,   -99.35116 ,  -303.45654 ,
          92.3418  ,  -453.07983 ,   798.7523  ,   633.23706 ,
         175.2758  , -2053.5938  ,  -542.1264  ,  1262.3513  ,
       -1078.7349  ,   938.4829  ,  1858.1763  ,   585.9868  ,
         493.56357 ,   265.1542  ,   293.53427 ,  1295.4287  ,
        -362.090

In [12]:
# take the running mean using cdo
#! module load jaspy
! cdo runmean,8 "/home/users/benhutch/multi-model/multi-model-jasmin/NAO_index_raw.nc" "/home/users/benhutch/multi-model/multi-model-jasmin/NAO_index_8yrRM_long.nc"

[32mcdo    runmean: [0mProcessed 63 values from 1 variable over 63 timesteps [0.04s 153MB].


In [13]:
# open the newly generated file and have a look
path_NAO_index_8yrRM = "/home/users/benhutch/multi-model/multi-model-jasmin/NAO_index_8yrRM_long.nc"

test = xr.open_dataset(path_NAO_index_8yrRM, chunks={'time': 10})
test

Unnamed: 0,Array,Chunk
Bytes,896 B,160 B
Shape,"(56, 2)","(10, 2)"
Count,7 Tasks,6 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 896 B 160 B Shape (56, 2) (10, 2) Count 7 Tasks 6 Chunks Type datetime64[ns] numpy.ndarray",2  56,

Unnamed: 0,Array,Chunk
Bytes,896 B,160 B
Shape,"(56, 2)","(10, 2)"
Count,7 Tasks,6 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,224 B,40 B
Shape,"(56,)","(10,)"
Count,7 Tasks,6 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 224 B 40 B Shape (56,) (10,) Count 7 Tasks 6 Chunks Type float32 numpy.ndarray",56  1,

Unnamed: 0,Array,Chunk
Bytes,224 B,40 B
Shape,"(56,)","(10,)"
Count,7 Tasks,6 Chunks
Type,float32,numpy.ndarray
