# Rechunk variables
This notebook rechunks variables for optimizing operations across the time dimension.

In [None]:
import os
import sys
import shutil
import warnings
import json
from datetime import datetime
from itertools import product
import numpy as np
import xarray as xr
from rechunker import rechunk
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client

sys.path.append('..')
import utils

In [None]:
## Globals
with open("../paths.json") as paths_json: 
    PATHS = json.load(paths_json)
with open("../globals.json") as globals_json:
    GLOBALS = json.load(globals_json)
    
FILE  = os.path.join(os.path.abspath('.'), 'rechunk.ipynb')

VARIABLES = ['zos', 'SST', 'UBOT', 'VBOT']

Dask

In [3]:
cluster = PBSCluster(walltime='02:00:00')
client = Client(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 46026 instead


In [4]:
cluster.scale(16)
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abrettin/proxy/46026/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.46:41647,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abrettin/proxy/46026/status,Total threads: 0
Started: Just now,Total memory: 0 B


Functions

In [5]:
def rm_stores(*stores):
    for store in stores:
        if os.path.exists(store):
            shutil.rmtree(store)

def execute_rechunk(ds, target_store, temp_store):
    chunks_dict = {
        'time': 3650,
        'lat': 192,
        'lon': 288
    }
    max_mem='12GB'
    
    array_plan = rechunk(
        ds, chunks_dict, max_mem, target_store, temp_store=temp_store
    )
    
    array_plan.execute()

In [6]:
def main():
    START_TIME = datetime.now()
    
    for var, init_year, member in product(
            VARIABLES, GLOBALS['init_years'], GLOBALS['members']):
        print(f"LE-{init_year}.{member}.{var}_anom.zarr")
        print(datetime.now() - START_TIME)
        
        # Load data as a dataset
        ds = utils.data.load_anomalies(
            var, init_year, member, full_ds=True, chunkedby='time')
        
        # Prepare paths for rechunking
        print("Preparing zarr stores")
        print(datetime.now() - START_TIME)
        target_store = os.path.join(
            PATHS['anom_spatial'],
            f'LE2-{init_year}.{member}.{var}_anom.zarr'
        )
        temp_store = os.path.join(PATHS['tmp'],'temp.zarr')
        rm_stores(target_store, temp_store)
        
        # Rechunk
        print("Rechunking")
        print(datetime.now() - START_TIME)
        execute_rechunk(ds, target_store, temp_store)
        
        # Repeat
        print(f"Completed rechunk for LE-{init_year}.{member}.{var}_anom.zarr")
        print(datetime.now() - START_TIME, '\n')
        client.restart()
    
    print("PROCESS_COMPLETED")
    print(START_TIME - datetime.now())
    
    return 0

---

In [10]:
main()

LE-1251.011.SSH_anom.zarr
0:00:00.000038
Preparing zarr stores
0:00:00.414282
Rechunking
0:00:00.699488
Completed rechunk for LE-1251.011.SSH_anom.zarr
0:00:42.472053 

LE-1251.012.SSH_anom.zarr
0:00:46.032769
Preparing zarr stores
0:00:46.413161
Rechunking
0:00:46.841917
Completed rechunk for LE-1251.012.SSH_anom.zarr
0:01:25.784396 

LE-1251.013.SSH_anom.zarr
0:01:29.049445
Preparing zarr stores
0:01:29.267358
Rechunking
0:01:29.679693
Completed rechunk for LE-1251.013.SSH_anom.zarr
0:02:13.802062 

LE-1281.011.SSH_anom.zarr
0:02:16.050455
Preparing zarr stores
0:02:16.373001
Rechunking
0:02:16.789501
Completed rechunk for LE-1281.011.SSH_anom.zarr
0:02:58.301318 

LE-1281.012.SSH_anom.zarr
0:03:01.190001
Preparing zarr stores
0:03:01.421324
Rechunking
0:03:01.831687
Completed rechunk for LE-1281.012.SSH_anom.zarr
0:03:43.010779 

LE-1281.013.SSH_anom.zarr
0:03:45.651803
Preparing zarr stores
0:03:46.003697
Rechunking
0:03:46.405765
Completed rechunk for LE-1281.013.SSH_anom.zarr
0:0

0

In [7]:
main()

LE-1251.011.SHF_anom.zarr
0:00:00.000034
Preparing zarr stores
0:00:00.245781
Rechunking
0:00:00.248815
Completed rechunk for LE-1251.011.SHF_anom.zarr
0:01:36.384921 

LE-1251.012.SHF_anom.zarr
0:01:46.928961
Preparing zarr stores
0:01:47.181998
Rechunking
0:01:47.376214
Completed rechunk for LE-1251.012.SHF_anom.zarr
0:03:03.891730 

LE-1251.013.SHF_anom.zarr
0:03:07.058789
Preparing zarr stores
0:03:07.478973
Rechunking
0:03:07.720086
Completed rechunk for LE-1251.013.SHF_anom.zarr
0:04:29.964921 

LE-1281.011.SHF_anom.zarr
0:04:33.585093
Preparing zarr stores
0:04:34.037130
Rechunking
0:04:34.264015
Completed rechunk for LE-1281.011.SHF_anom.zarr
0:05:36.999961 

LE-1281.012.SHF_anom.zarr
0:05:40.057150
Preparing zarr stores
0:05:40.297015
Rechunking
0:05:40.502850
Completed rechunk for LE-1281.012.SHF_anom.zarr
0:06:54.243032 

LE-1281.013.SHF_anom.zarr
0:06:57.430819
Preparing zarr stores
0:06:57.697340
Rechunking
0:06:57.916552
Completed rechunk for LE-1281.013.SHF_anom.zarr
0:0

0

In [8]:
client.close()
cluster.close()