In [1]:
import os
import sys
import shutil
import warnings
import json
from datetime import datetime
from itertools import product
import numpy as np
import xarray as xr
from rechunker import rechunk
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client

import src
from src import utils
from src.data import loading
from src.attrs import PATHS, GLOBALS

VARIABLES = ['PSL', 'UBOT', 'VBOT']
START_TIME = datetime.now()

In [2]:
# Dask stuff
cluster = PBSCluster(walltime='06:00:00')
client = Client(cluster)
cluster.scale(8)

In [5]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abrettin/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abrettin/proxy/8787/status,Workers: 4
Total threads: 8,Total memory: 93.12 GiB

0,1
Comm: tcp://128.117.208.103:41995,Workers: 4
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abrettin/proxy/8787/status,Total threads: 8
Started: Just now,Total memory: 93.12 GiB

0,1
Comm: tcp://128.117.208.103:33593,Total threads: 2
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abrettin/proxy/32999/status,Memory: 23.28 GiB
Nanny: tcp://128.117.208.103:43021,
Local directory: /glade/scratch/abrettin/dask/dask-scratch-space/worker-66xfwdkg,Local directory: /glade/scratch/abrettin/dask/dask-scratch-space/worker-66xfwdkg

0,1
Comm: tcp://128.117.208.83:37625,Total threads: 2
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abrettin/proxy/42755/status,Memory: 23.28 GiB
Nanny: tcp://128.117.208.83:35915,
Local directory: /glade/scratch/abrettin/dask/dask-scratch-space/worker-bvgt8ect,Local directory: /glade/scratch/abrettin/dask/dask-scratch-space/worker-bvgt8ect

0,1
Comm: tcp://128.117.208.89:42729,Total threads: 2
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abrettin/proxy/42531/status,Memory: 23.28 GiB
Nanny: tcp://128.117.208.89:46821,
Local directory: /glade/scratch/abrettin/dask/dask-scratch-space/worker-0262vwov,Local directory: /glade/scratch/abrettin/dask/dask-scratch-space/worker-0262vwov

0,1
Comm: tcp://128.117.208.89:39889,Total threads: 2
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/abrettin/proxy/45839/status,Memory: 23.28 GiB
Nanny: tcp://128.117.208.89:45321,
Local directory: /glade/scratch/abrettin/dask/dask-scratch-space/worker-6p6999mk,Local directory: /glade/scratch/abrettin/dask/dask-scratch-space/worker-6p6999mk


In [3]:
def rm_stores(*stores):
    for store in stores:
        if os.path.exists(store):
            shutil.rmtree(store)

def execute_rechunk(ds, target_store, temp_store):
    chunks_dict = {
        'time': -1,
        'lat': 48,
        'lon': 48
    }
    max_mem='8GB'
    
    array_plan = rechunk(
        ds, chunks_dict, max_mem, target_store, temp_store=temp_store
    )
    
    array_plan.execute()

In [6]:
def main():
    utils.log("Beginning script")
    
    for var, init_year, member in product(
            VARIABLES, GLOBALS['init_years'], GLOBALS['members']):
        utils.log(f"LE-{init_year}.{member}.{var}", START_TIME)
        
        # Load data as a dataset
        array = loading.load_dataset(
            var, init_year, member, chunkedby='space')
        ds = xr.Dataset({var: array})
        ds = ds.chunk({'time': 3650})
        
        # Prepare paths for rechunking
        utils.log("Preparing zarr stores", START_TIME)
        target_store = os.path.join(
            PATHS['rechunked'],
            f'LE2-{init_year}.{member}.{var}_rechunked.zarr'
        )
        temp_store = os.path.join(PATHS['tmp'],'temp.zarr')
        rm_stores(target_store, temp_store)
        
        # Rechunk
        utils.log("Rechunking", START_TIME)
        execute_rechunk(ds, target_store, temp_store)
        
        # Repeat
        utils.log(f"Completed rechunk for LE-{init_year}.{member}.{var}", START_TIME)
        client.restart()
    utils.log("PROCESS_COMPLETED", START_TIME)
    
    return 0

In [7]:
main()

2024-02-11 22:03:27.091376	 Beginning script
0:01:49.073148	 LE-1251.011.PSL
0:01:59.240235	 Preparing zarr stores
0:01:59.301369	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


0:08:50.091265	 Completed rechunk for LE-1251.011.PSL
0:08:52.723642	 LE-1251.012.PSL
0:09:02.486306	 Preparing zarr stores
0:09:02.621546	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


0:12:35.734371	 Completed rechunk for LE-1251.012.PSL
0:12:37.890321	 LE-1251.013.PSL
0:12:47.988712	 Preparing zarr stores
0:12:48.141618	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


0:17:08.391624	 Completed rechunk for LE-1251.013.PSL
0:17:10.645924	 LE-1281.011.PSL
0:17:20.565912	 Preparing zarr stores
0:17:20.715615	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


0:21:43.721906	 Completed rechunk for LE-1281.011.PSL
0:21:46.974802	 LE-1281.012.PSL
0:21:56.609312	 Preparing zarr stores
0:21:57.490404	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


0:26:38.903001	 Completed rechunk for LE-1281.012.PSL
0:26:41.014171	 LE-1281.013.PSL
0:26:51.518245	 Preparing zarr stores
0:26:51.661703	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


0:32:41.076058	 Completed rechunk for LE-1281.013.PSL
0:32:43.169619	 LE-1301.011.PSL
0:32:53.852243	 Preparing zarr stores
0:32:53.960420	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


0:38:22.659876	 Completed rechunk for LE-1301.011.PSL
0:38:24.987487	 LE-1301.012.PSL
0:38:35.034824	 Preparing zarr stores
0:38:35.823395	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


0:42:55.666096	 Completed rechunk for LE-1301.012.PSL
0:42:58.113014	 LE-1301.013.PSL
0:43:08.674523	 Preparing zarr stores
0:43:08.791113	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


0:46:23.901456	 Completed rechunk for LE-1301.013.PSL
0:46:25.955330	 LE-1251.011.UBOT
0:46:36.216221	 Preparing zarr stores
0:46:37.542360	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


0:57:02.551296	 Completed rechunk for LE-1251.011.UBOT
0:57:04.698662	 LE-1251.012.UBOT
0:57:14.116960	 Preparing zarr stores
0:57:14.247817	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


1:06:33.578498	 Completed rechunk for LE-1251.012.UBOT
1:06:35.823120	 LE-1251.013.UBOT
1:06:46.140576	 Preparing zarr stores
1:06:46.284943	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


1:12:20.532648	 Completed rechunk for LE-1251.013.UBOT
1:12:22.833876	 LE-1281.011.UBOT
1:12:33.388863	 Preparing zarr stores
1:12:34.404467	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


1:18:32.025601	 Completed rechunk for LE-1281.011.UBOT
1:18:34.254563	 LE-1281.012.UBOT
1:18:44.771867	 Preparing zarr stores
1:18:44.924504	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


1:30:47.430158	 Completed rechunk for LE-1281.012.UBOT
1:30:49.581965	 LE-1281.013.UBOT
1:31:00.319761	 Preparing zarr stores
1:31:00.472007	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


1:36:22.060435	 Completed rechunk for LE-1281.013.UBOT
1:36:24.281201	 LE-1301.011.UBOT
1:36:33.697883	 Preparing zarr stores
1:36:33.839474	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


1:41:55.683406	 Completed rechunk for LE-1301.011.UBOT
1:41:57.820709	 LE-1301.012.UBOT
1:42:08.015479	 Preparing zarr stores
1:42:09.090353	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


1:47:15.942501	 Completed rechunk for LE-1301.012.UBOT
1:47:18.087833	 LE-1301.013.UBOT
1:47:27.940070	 Preparing zarr stores
1:47:29.092191	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


1:57:49.806602	 Completed rechunk for LE-1301.013.UBOT
1:57:52.311571	 LE-1251.011.VBOT
1:58:06.721024	 Preparing zarr stores
1:58:07.023017	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


2:09:03.243700	 Completed rechunk for LE-1251.011.VBOT
2:09:13.061760	 LE-1251.012.VBOT
2:09:24.316926	 Preparing zarr stores
2:09:24.456250	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


2:13:58.330540	 Completed rechunk for LE-1251.012.VBOT
2:14:00.622716	 LE-1251.013.VBOT
2:14:09.276859	 Preparing zarr stores
2:14:09.414020	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


2:23:17.051392	 Completed rechunk for LE-1251.013.VBOT
2:23:19.298257	 LE-1281.011.VBOT
2:23:29.461575	 Preparing zarr stores
2:23:29.921672	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


2:28:19.292288	 Completed rechunk for LE-1281.011.VBOT
2:28:21.577445	 LE-1281.012.VBOT
2:28:32.101951	 Preparing zarr stores
2:28:32.209633	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


2:39:20.535170	 Completed rechunk for LE-1281.012.VBOT
2:39:23.040364	 LE-1281.013.VBOT
2:39:33.833067	 Preparing zarr stores
2:39:33.961036	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


2:57:18.324512	 Completed rechunk for LE-1281.013.VBOT
2:57:20.588994	 LE-1301.011.VBOT
2:57:30.565654	 Preparing zarr stores
2:57:30.701702	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


3:03:18.627546	 Completed rechunk for LE-1301.011.VBOT
3:03:21.570847	 LE-1301.012.VBOT
3:03:32.149383	 Preparing zarr stores
3:03:32.271578	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


3:08:13.500443	 Completed rechunk for LE-1301.012.VBOT
3:08:15.559725	 LE-1301.013.VBOT
3:08:27.116386	 Preparing zarr stores
3:08:27.266720	 Rechunking


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


3:14:01.024470	 Completed rechunk for LE-1301.013.VBOT
3:14:03.992384	 PROCESS_COMPLETED


0

In [8]:
client.close()
cluster.close()