# Prepare atmospheric and ocean data

In [8]:
from dask.distributed import Client,LocalCluster
from dask_jobqueue import PBSCluster

In [9]:
# One node on Gadi has 48 cores - try and use up a full node before going to multiple nodes (jobs)

walltime = '00:30:00'
cores = 4
memory = str(4 * cores) + 'GB'

cluster = PBSCluster(walltime=str(walltime), cores=cores, memory=str(memory), processes=cores,
                     job_extra_directives=['-q normal',
                                           '-P w42',
                                           '-l ncpus='+str(cores),
                                           '-l mem='+str(memory),
                                           '-l storage=gdata/w42+gdata/rt52'],
                     local_directory='$TMPDIR',
                     job_directives_skip=["select"])
                     # python=os.environ["DASK_PYTHON"])

In [10]:
cluster.scale(jobs=1)
client = Client(cluster)

In [11]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: http://10.6.65.70:8787/status,

0,1
Dashboard: http://10.6.65.70:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.6.65.70:42317,Workers: 0
Dashboard: http://10.6.65.70:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [5]:
# client.close()
# cluster.close()

In [6]:
# %load_ext autoreload
# %autoreload 2

In [7]:
# %reload_ext autoreload
# %autoreload 2

In [1]:
import xarray as xr
import pandas as pd

# import numpy as np
# from xhistogram.xarray import histogram

# %matplotlib inline
# import matplotlib
# import matplotlib.pyplot as plt
# from mpl_toolkits.axes_grid1.inset_locator import inset_axes

# import cartopy.crs as ccrs
# import cartopy
# cartopy.config['pre_existing_data_dir'] = '/g/data/w42/dr6273/work/data/cartopy-data/'
# cartopy.config['data_dir'] = '/g/data/w42/dr6273/work/data/cartopy-data/'

In [2]:
# import functions as fn

# plt_params = fn.get_plot_params()
# prop_cycle = plt.rcParams['axes.prop_cycle']
# colors = prop_cycle.by_key()['color']

In [8]:
data_fp = '/g/data/w42/dr6273/work/data/'
years = range(1959, 2021)

# SST data

In [13]:
dataset = 'hadisst'
sst_fp = data_fp + dataset + '/sst/sst_anom_'+dataset+'_moda_sfc_'+str(years[0])+'-'+str(years[-1])+'.zarr'
print(sst_fp)

/g/data/w42/dr6273/work/data/hadisst/sst/sst_anom_hadisst_moda_sfc_1959-2020.zarr


In [14]:
load_SST_anoms = True

In [15]:
if load_SST_anoms:
    sst_anoms = xr.open_zarr(sst_fp, consolidated=True)
else:
    if dataset == 'era5':
        era_root_path = '/g/data/rt52/era5/single-levels/monthly-averaged/'
        sst = fn.open_era_data(root_path=era_root_path,
                            variable='sst',
                            years=years)
    else:
        sst = xr.open_zarr('/g/data/xv83/reanalyses/HadISST/ocean_month.zarr', consolidated=True)
        sst = sst.sel(time=slice(str(years[0]), str(years[-1])))
        sst['time'] = pd.date_range(str(years[0]), str(years[-1])+'-12-01', freq='1MS')
        
    sst = sst.rename({'latitude': 'lat',
                      'longitude': 'lon'})
    sst = sst['sst']
    sst = sst.assign_attrs({'short_name': 'sst'})
    
    sst = sst.chunk({'time': 12,
                     'lat': -1,
                     'lon': -1})
    
    sst_anoms = sst.groupby('time.month').apply(lambda x: x - x.mean('time'))
    
    # write attrs
    sst_anoms = sst_anoms.assign_attrs({'long_name': 'Sea-surface temperature',
                                          'short_name': 'sst',
                                          'units': 'K'})
    
    sst_anoms = sst_anoms.to_dataset(name='sst_anom')
    sst_anoms_encoding = {'sst_anom': {'dtype': 'float32'}}
    sst_anoms.to_zarr(
        sst_fp,
        mode='w',
        consolidated=True,
        encoding=sst_anoms_encoding
    )