In [15]:
import glob
import re
import matplotlib as plt
import numpy as np
import xarray as xr
import pandas as pd

In [16]:
# !pip install dask_jobqueue

In [17]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [18]:
######## File paths ################
lustre_scratch    = "/glade/work/dpanta"
era5_surface_data = "/gdex/data/special_projects/harshah/ARCO/e5.oper.an.sfc"

In [19]:
cluster = PBSCluster(
        job_name = 'dask-osdf-25',
        cores = 1,
        memory = '4GiB',
        processes = 1,
        local_directory = lustre_scratch + '/dask/spill',
        log_directory = lustre_scratch + '/dask/logs/',
        resource_spec = 'select=1:ncpus=1:mem=4GB',
        queue = 'casper',
        account='P43713000',
        walltime = '3:00:00',
        #interface = 'ib0'
        interface = 'ext'
    )

Perhaps you already have a cluster running?
Hosting the HTTP server on port 43303 instead


In [20]:
client = Client(cluster)
# Scale the cluster and display cluster dashboard URL
n_workers =5
cluster.scale(n_workers)
client.wait_for_workers(n_workers = n_workers)
cluster

0,1
Dashboard: http://128.117.211.221:43303/status,Workers: 5
Total threads: 5,Total memory: 20.00 GiB

0,1
Comm: tcp://128.117.211.221:37517,Workers: 5
Dashboard: http://128.117.211.221:43303/status,Total threads: 5
Started: Just now,Total memory: 20.00 GiB

0,1
Comm: tcp://128.117.208.176:45939,Total threads: 1
Dashboard: http://128.117.208.176:34553/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.176:43229,
Local directory: /glade/work/dpanta/dask/spill/dask-scratch-space/worker-08mjy5rl,Local directory: /glade/work/dpanta/dask/spill/dask-scratch-space/worker-08mjy5rl
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 136.00 MiB,Spilled bytes: 0 B
Read bytes: 438.76 MiB,Write bytes: 26.67 MiB

0,1
Comm: tcp://128.117.208.176:34279,Total threads: 1
Dashboard: http://128.117.208.176:43873/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.176:42785,
Local directory: /glade/work/dpanta/dask/spill/dask-scratch-space/worker-0bd13ui3,Local directory: /glade/work/dpanta/dask/spill/dask-scratch-space/worker-0bd13ui3
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 136.03 MiB,Spilled bytes: 0 B
Read bytes: 439.58 MiB,Write bytes: 26.72 MiB

0,1
Comm: tcp://128.117.208.179:39655,Total threads: 1
Dashboard: http://128.117.208.179:34825/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.179:43447,
Local directory: /glade/work/dpanta/dask/spill/dask-scratch-space/worker-iksl6zl7,Local directory: /glade/work/dpanta/dask/spill/dask-scratch-space/worker-iksl6zl7
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.95 MiB,Spilled bytes: 0 B
Read bytes: 829.23 kiB,Write bytes: 0.97 MiB

0,1
Comm: tcp://128.117.208.179:39787,Total threads: 1
Dashboard: http://128.117.208.179:45907/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.179:33567,
Local directory: /glade/work/dpanta/dask/spill/dask-scratch-space/worker-_64ayuft,Local directory: /glade/work/dpanta/dask/spill/dask-scratch-space/worker-_64ayuft
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.99 MiB,Spilled bytes: 0 B
Read bytes: 1.19 MiB,Write bytes: 853.22 kiB

0,1
Comm: tcp://128.117.208.176:37705,Total threads: 1
Dashboard: http://128.117.208.176:34201/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.176:40135,
Local directory: /glade/work/dpanta/dask/spill/dask-scratch-space/worker-p54z5nkx,Local directory: /glade/work/dpanta/dask/spill/dask-scratch-space/worker-p54z5nkx
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 134.06 MiB,Spilled bytes: 0 B
Read bytes: 440.09 MiB,Write bytes: 26.75 MiB


## GMST functions

In [21]:
# GMST function ###
# calculate global means
def get_lat_name(ds):
    for lat_name in ['lat', 'latitude']:
        if lat_name in ds.coords:
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")

def global_mean(ds):
    lat = ds[get_lat_name(ds)]
    weight = np.cos(np.deg2rad(lat))
    weight /= weight.mean()
    #other_dims = set(ds.dims) - {'time'}
    other_dims = set(ds.dims)
    return (ds * weight).mean(other_dims)




## Load data and compute GMST

In [22]:
# %%time
temp_2m = xr.open_zarr(era5_surface_data + '/e5.oper.an.sfc.2t.zarr')  
temp_2m = temp_2m.VAR_2T
# temp_2m

In [23]:
temp_2m_1950 = temp_2m.sel(time=temp_2m.time.dt.year==1949)
temp_2m_1950

Unnamed: 0,Array,Chunk
Bytes,33.88 GiB,11.47 MiB
Shape,"(8760, 721, 1440)","(26, 480, 241)"
Dask graph,4044 chunks in 3 graph layers,4044 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 33.88 GiB 11.47 MiB Shape (8760, 721, 1440) (26, 480, 241) Dask graph 4044 chunks in 3 graph layers Data type float32 numpy.ndarray",1440  721  8760,

Unnamed: 0,Array,Chunk
Bytes,33.88 GiB,11.47 MiB
Shape,"(8760, 721, 1440)","(26, 480, 241)"
Dask graph,4044 chunks in 3 graph layers,4044 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [24]:
np.sum(temp_2m_1950[0,:,:].values)

286054140.0

In [25]:
# %%time
# # Grid cells are spatially weighted and each time step gets the same weight 
gmst_1950 = global_mean(temp_2m_1950)
print(gmst_1950.values)

286.87586025340266


- This is sizeable dask graph!
- Let us try to compute it!

In [None]:
%%timeit -r 3 
gmst_copy = gmst_1950.copy()
gmst_copy.values


In [None]:
client.close()

In [None]:
# 1950:array(286.76467424)
# 1945: 286.85706282

SyntaxError: illegal target for annotation (2689628681.py, line 2)