In [1]:
import glob
import re
import matplotlib as plt
import numpy as np
import xarray as xr
import pandas as pd

In [4]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [10]:
######## File paths ################
lustre_scratch    = "/lustre/desc1/scratch/harshah"
era5_surface_data = "/gdex/data/special_projects/harshah/ARCO/e5.oper.an.sfc"

In [5]:
cluster = PBSCluster(
        job_name = 'dask-osdf-25',
        cores = 1,
        memory = '4GiB',
        processes = 1,
        local_directory = lustre_scratch + '/dask/spill',
        log_directory = lustre_scratch + '/dask/logs/',
        resource_spec = 'select=1:ncpus=1:mem=4GB',
        queue = 'casper',
        walltime = '3:00:00',
        #interface = 'ib0'
        interface = 'ext'
    )

Perhaps you already have a cluster running?
Hosting the HTTP server on port 42483 instead


In [7]:
client = Client(cluster)
# Scale the cluster and display cluster dashboard URL
n_workers =5
cluster.scale(n_workers)
client.wait_for_workers(n_workers = n_workers)
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/42483/status,Workers: 5
Total threads: 5,Total memory: 20.00 GiB

0,1
Comm: tcp://128.117.208.96:37129,Workers: 5
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/42483/status,Total threads: 5
Started: 1 minute ago,Total memory: 20.00 GiB

0,1
Comm: tcp://128.117.208.174:40161,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/45835/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.174:38575,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-zckdop3q,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-zckdop3q
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 53.52 MiB,Spilled bytes: 0 B
Read bytes: 4.92 MiB,Write bytes: 4.19 MiB

0,1
Comm: tcp://128.117.208.175:42115,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/38639/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.175:33529,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-39on7xbn,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-39on7xbn
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 51.49 MiB,Spilled bytes: 0 B
Read bytes: 498.39 MiB,Write bytes: 1.92 GiB

0,1
Comm: tcp://128.117.208.174:45405,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/34513/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.174:45193,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-ilh904wb,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-ilh904wb
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 51.57 MiB,Spilled bytes: 0 B
Read bytes: 552.98 MiB,Write bytes: 9.70 MiB

0,1
Comm: tcp://128.117.208.173:46291,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/40133/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.173:33657,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-bmht5ara,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-bmht5ara
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 51.54 MiB,Spilled bytes: 0 B
Read bytes: 6.59 MiB,Write bytes: 4.04 MiB

0,1
Comm: tcp://128.117.208.173:41361,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/33503/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.173:41409,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-j701km0d,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-j701km0d
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 51.49 MiB,Spilled bytes: 0 B
Read bytes: 6.63 MiB,Write bytes: 4.97 MiB


## GMST functions

In [18]:
# GMST function ###
# calculate global means
def get_lat_name(ds):
    for lat_name in ['lat', 'latitude']:
        if lat_name in ds.coords:
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")

def global_mean(ds):
    lat = ds[get_lat_name(ds)]
    weight = np.cos(np.deg2rad(lat))
    weight /= weight.mean()
    #other_dims = set(ds.dims) - {'time'}
    other_dims = set(ds.dims)
    return (ds * weight).mean(other_dims)

## Load data and compute GMST

In [12]:
%%time
temp_2m = xr.open_zarr(era5_surface_data + '/e5.oper.an.sfc.2t.zarr')  
temp_2m = temp_2m.VAR_2T
temp_2m

CPU times: user 91.4 ms, sys: 66.4 ms, total: 158 ms
Wall time: 193 ms


Unnamed: 0,Array,Chunk
Bytes,2.83 TiB,11.91 MiB
Shape,"(749472, 721, 1440)","(27, 480, 241)"
Dask graph,333108 chunks in 2 graph layers,333108 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.83 TiB 11.91 MiB Shape (749472, 721, 1440) (27, 480, 241) Dask graph 333108 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  749472,

Unnamed: 0,Array,Chunk
Bytes,2.83 TiB,11.91 MiB
Shape,"(749472, 721, 1440)","(27, 480, 241)"
Dask graph,333108 chunks in 2 graph layers,333108 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [16]:
temp_2m_1950 = temp_2m.sel(time=temp_2m.time.dt.year==1950)
temp_2m_1950

Unnamed: 0,Array,Chunk
Bytes,33.88 GiB,11.47 MiB
Shape,"(8760, 721, 1440)","(26, 480, 241)"
Dask graph,4044 chunks in 3 graph layers,4044 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 33.88 GiB 11.47 MiB Shape (8760, 721, 1440) (26, 480, 241) Dask graph 4044 chunks in 3 graph layers Data type float32 numpy.ndarray",1440  721  8760,

Unnamed: 0,Array,Chunk
Bytes,33.88 GiB,11.47 MiB
Shape,"(8760, 721, 1440)","(26, 480, 241)"
Dask graph,4044 chunks in 3 graph layers,4044 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [19]:
%%time
# Grid cells are spatially weighted and each time step gets the same weight 
gmst_1950 = global_mean(temp_2m_1950)
gmst_1950

CPU times: user 18.1 ms, sys: 0 ns, total: 18.1 ms
Wall time: 30.6 ms


Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Dask graph,1 chunks in 16 graph layers,1 chunks in 16 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
Array Chunk Bytes 8 B 8 B Shape () () Dask graph 1 chunks in 16 graph layers Data type float64 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Dask graph,1 chunks in 16 graph layers,1 chunks in 16 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


- This is sizeable dask graph!
- Let us try to compute it!

In [24]:
%%timeit -n 2 -r 3 
gmst_copy = gmst_1950.copy()
gmst_copy.values

1min 6s ± 525 ms per loop (mean ± std. dev. of 3 runs, 2 loops each)
