In [1]:
# Python core
from typing import Optional, Callable, TypedDict, Union, Iterable, Tuple, NamedTuple, List
from dataclasses import dataclass
import datetime
from itertools import product
from concurrent import futures

# Scientific python
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt

# PyTorch
# import torch
# from torch import nn
# import torch.nn.functional as F
# from torchvision import transforms
# import pytorch_lightning as pl

In [2]:
import bokeh
print('bokeh version: ', bokeh.__version__)

bokeh version:  3.2.1


In [13]:
if 'client' in locals():
    client.shutdown()
    print('...shutdown client...')
else:
    print('client does not exist yet')

...shutdown client...


In [4]:
from distributed import Client
from dask_jobqueue import PBSCluster

cluster = PBSCluster(account='NAML0001',walltime='12:00:00',cores=1, memory='100GB',shared_temp_directory='/glade/scratch/wchapman/tmp',queue='casper',interface='ext')
cluster.scale(jobs=40)
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/wchapman/Dingo/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/wchapman/Dingo/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.63:36725,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/wchapman/Dingo/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Load the Zarr file

In [3]:
%%time
ZARR = '/glade/derecho/scratch/wchapman/STAGING/All_2010_staged.zarr'

plt.rcParams['figure.figsize'] = (5, 5)
plt.rcParams['image.interpolation'] = 'none'

def get_forward_data(filename: str=ZARR) -> xr.DataArray:
    """Lazily opens the Zarr store on gladefilesystem.
    """
    dataset = xr.open_zarr(filename, consolidated=True)
    return dataset

forcing_data = get_forward_data().unify_chunks()
forcing_data

CPU times: user 2.53 s, sys: 237 ms, total: 2.77 s
Wall time: 27 s


Unnamed: 0,Array,Chunk
Bytes,403.20 GiB,13.94 MiB
Shape,"(8808, 15, 640, 1280)","(10, 4, 214, 427)"
Dask graph,56384 chunks in 3 graph layers,56384 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 403.20 GiB 13.94 MiB Shape (8808, 15, 640, 1280) (10, 4, 214, 427) Dask graph 56384 chunks in 3 graph layers Data type float32 numpy.ndarray",8808  1  1280  640  15,

Unnamed: 0,Array,Chunk
Bytes,403.20 GiB,13.94 MiB
Shape,"(8808, 15, 640, 1280)","(10, 4, 214, 427)"
Dask graph,56384 chunks in 3 graph layers,56384 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,26.88 GiB,3.49 MiB
Shape,"(8808, 640, 1280)","(10, 214, 427)"
Dask graph,14096 chunks in 3 graph layers,14096 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 26.88 GiB 3.49 MiB Shape (8808, 640, 1280) (10, 214, 427) Dask graph 14096 chunks in 3 graph layers Data type float32 numpy.ndarray",1280  640  8808,

Unnamed: 0,Array,Chunk
Bytes,26.88 GiB,3.49 MiB
Shape,"(8808, 640, 1280)","(10, 214, 427)"
Dask graph,14096 chunks in 3 graph layers,14096 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,403.20 GiB,13.94 MiB
Shape,"(8808, 15, 640, 1280)","(10, 4, 214, 427)"
Dask graph,56384 chunks in 3 graph layers,56384 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 403.20 GiB 13.94 MiB Shape (8808, 15, 640, 1280) (10, 4, 214, 427) Dask graph 56384 chunks in 3 graph layers Data type float32 numpy.ndarray",8808  1  1280  640  15,

Unnamed: 0,Array,Chunk
Bytes,403.20 GiB,13.94 MiB
Shape,"(8808, 15, 640, 1280)","(10, 4, 214, 427)"
Dask graph,56384 chunks in 3 graph layers,56384 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,403.20 GiB,13.94 MiB
Shape,"(8808, 15, 640, 1280)","(10, 4, 214, 427)"
Dask graph,56384 chunks in 3 graph layers,56384 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 403.20 GiB 13.94 MiB Shape (8808, 15, 640, 1280) (10, 4, 214, 427) Dask graph 56384 chunks in 3 graph layers Data type float32 numpy.ndarray",8808  1  1280  640  15,

Unnamed: 0,Array,Chunk
Bytes,403.20 GiB,13.94 MiB
Shape,"(8808, 15, 640, 1280)","(10, 4, 214, 427)"
Dask graph,56384 chunks in 3 graph layers,56384 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,403.20 GiB,13.94 MiB
Shape,"(8808, 15, 640, 1280)","(10, 4, 214, 427)"
Dask graph,56384 chunks in 3 graph layers,56384 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 403.20 GiB 13.94 MiB Shape (8808, 15, 640, 1280) (10, 4, 214, 427) Dask graph 56384 chunks in 3 graph layers Data type float32 numpy.ndarray",8808  1  1280  640  15,

Unnamed: 0,Array,Chunk
Bytes,403.20 GiB,13.94 MiB
Shape,"(8808, 15, 640, 1280)","(10, 4, 214, 427)"
Dask graph,56384 chunks in 3 graph layers,56384 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## Take a Variety of Means/STDs and Save them
- you need a massive amount of compute to do this; try it in the morning before every other NCAR employee wakes up 

In [11]:
%%time
Mean_latlonlev = forcing_data.mean(['time','latitude','longitude','level']).persist()
print('...moving on to load...')
Mean_latlonlev = Mean_latlonlev.load()
print('...mean done...')
print('...saving...')
Mean_latlonlev.to_netcdf('/glade/derecho/scratch/wchapman/STAGING/All_2010_staged.mean.nc')


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


...moving on to load...




...mean done...
...saving...
CPU times: user 10min 54s, sys: 20.4 s, total: 11min 14s
Wall time: 29min 39s


In [12]:
STD_latlonlev = forcing_data.std(['time','latitude','longitude','level']).persist()
# print('...moving on to load...')
STD_latlonlev = STD_latlonlev.load()
print('...saving...')
STD_latlonlev.to_netcdf('/glade/derecho/scratch/wchapman/STAGING/All_2010_staged.std.nc')

This may cause some slowdown.
Consider scattering data ahead of time and using futures.


...moving on to load...




...saving...


## Testing Below here

In [5]:
DS = xr.open_dataset('/glade/campaign/collections/rda/data/ds633.5/e5p.moda.an.pl/1950/e5p.moda.an.pl.128_131_u.ll025uv.1950010100_1950120100.nc')

In [6]:
DS