In [1]:
import xarray as xr
import numpy as np
import os
import time
from functools import partial

def _preprocess(x,level):
    return x.sel(level=level)

import sys
# adding Folder_2/subfolder to the system path
sys.path.insert(0, '/glade/u/home/acheung/TC_Genesis_Index/Scripts')
from useful_functions import era_5_datestrings,generate_pathstrs

from ncar_jobqueue import NCARCluster
from dask.distributed import Client
import dask

import warnings
warnings.filterwarnings('ignore')

cluster = NCARCluster(project='UMCP0022')
cluster.adapt(minimum_jobs=1, maximum_jobs=10)
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/acheung/proxy/34843/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/acheung/proxy/34843/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.47:43930,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/acheung/proxy/34843/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [None]:
data_interval = 7 # days
date_range_list = era_5_datestrings(data_interval,'pl')

# Generate path strings for u
variable_id_1= '131_u' # for relative vorticity
all_path_strs_1 = generate_pathstrs(date_range_list,variable_id_1,'uv','pl')

# Generate path strings for v
variable_id_2= '132_v' # for relative vorticity
all_path_strs_2 = generate_pathstrs(date_range_list,variable_id_2,'uv','pl')

# Choose pressure levels (must be small to large)
level = [200,850] # hPa
partial_func = partial(_preprocess, level=level)
iteration = 1

for begin_ind in np.arange(0,len(all_path_strs_1),data_interval):
    start = time.time()

    current_path_strs_1 = all_path_strs_1[begin_ind:begin_ind+data_interval] # loop this!
    current_path_strs_2 = all_path_strs_2[begin_ind:begin_ind+data_interval]

    # Open u and v together
    datasets = xr.open_mfdataset(current_path_strs_1 + current_path_strs_2,preprocess=partial_func, parallel=True)
    datasets_mean = datasets.mean('time').load()
    u_shear = datasets_mean['U'].sel(level = 200) - datasets_mean['U'].sel(level = 850)
    v_shear = datasets_mean['V'].sel(level = 200) - datasets_mean['V'].sel(level = 850)
    mean_shear = np.sqrt((u_shear**2) + (v_shear**2))
    mean_shear = mean_shear.assign_coords({"beg":np.asarray(datasets['time'][0])})
    mean_shear = mean_shear.assign_coords({"end":np.asarray(datasets['time'][-1])})
    mean_shear = mean_shear.assign_coords({"lower_level":np.asarray(datasets['level'][-1])})
    mean_shear = mean_shear.assign_coords({"upper_level":np.asarray(datasets['level'][0])})


    # Save deep-layer shear
    path = "/glade/scratch/acheung/dl_shear/"
    # Check whether the specified path exists or not
    isExist = os.path.exists(path)
    if not isExist:
    # Create a new directory because it does not exist
        os.makedirs(path) 


    variable_file_name_base = current_path_strs_1[0][57:79] + 'DL_shear.'+current_path_strs_1[0][81:89]+str(data_interval) + 'd_mean.'
    variable_file_name_start_time = current_path_strs_1[0][89:101]
    variable_file_name_end_time = current_path_strs_1[-1][101:]

    var_file_name_full = variable_file_name_base + variable_file_name_start_time + variable_file_name_end_time

    mean_shear.to_dataset(name='Deep-Layer Shear').to_netcdf(path+'/'+var_file_name_full)
    
    end = time.time()
    print(f"Runtime of week {iteration} of {len(np.arange(0,len(all_path_strs_1),data_interval))} is {round(end - start,2)} s")
    print(f"Predicted Time Remaining: {round((end - start) * (len(np.arange(0,len(all_path_strs_1),data_interval)) - iteration)/3600,2)} h")
    iteration = iteration + 1

    

Runtime of week 1 of 4331 is 67.02 s
Predicted Time Remaining: 80.6 h
Runtime of week 2 of 4331 is 57.04 s
Predicted Time Remaining: 68.59 h
Runtime of week 3 of 4331 is 55.74 s
Predicted Time Remaining: 67.01 h
Runtime of week 4 of 4331 is 56.14 s
Predicted Time Remaining: 67.47 h
Runtime of week 5 of 4331 is 55.32 s
Predicted Time Remaining: 66.47 h
Runtime of week 6 of 4331 is 56.41 s
Predicted Time Remaining: 67.77 h
Runtime of week 7 of 4331 is 58.76 s
Predicted Time Remaining: 70.58 h
Runtime of week 8 of 4331 is 54.73 s
Predicted Time Remaining: 65.72 h
Runtime of week 9 of 4331 is 55.33 s
Predicted Time Remaining: 66.42 h
Runtime of week 10 of 4331 is 73.51 s
Predicted Time Remaining: 88.24 h
Runtime of week 11 of 4331 is 71.3 s
Predicted Time Remaining: 85.56 h
Runtime of week 12 of 4331 is 59.19 s
Predicted Time Remaining: 71.01 h
Runtime of week 13 of 4331 is 68.31 s
Predicted Time Remaining: 81.93 h
Runtime of week 14 of 4331 is 103.09 s
Predicted Time Remaining: 123.62 h
R

2023-07-24 13:45:35,639 - distributed.scheduler - ERROR - Couldn't gather keys {"('mean_agg-aggregate-09a02accffeb5734d64f46a75e5d1466', 0, 0, 0)": []} state: ['waiting'] workers: []
NoneType: None
2023-07-24 13:45:35,642 - distributed.scheduler - ERROR - Shut down workers that don't have promised key: [], ('mean_agg-aggregate-09a02accffeb5734d64f46a75e5d1466', 0, 0, 0)
NoneType: None


Runtime of week 34 of 4331 is 3681.41 s
Predicted Time Remaining: 4394.17 h
Runtime of week 35 of 4331 is 55.55 s
Predicted Time Remaining: 66.29 h
Runtime of week 36 of 4331 is 52.33 s
Predicted Time Remaining: 62.43 h
Runtime of week 37 of 4331 is 62.93 s
Predicted Time Remaining: 75.06 h
Runtime of week 38 of 4331 is 62.8 s
Predicted Time Remaining: 74.89 h
Runtime of week 39 of 4331 is 55.69 s
Predicted Time Remaining: 66.4 h
Runtime of week 40 of 4331 is 65.73 s
Predicted Time Remaining: 78.34 h
Runtime of week 41 of 4331 is 57.72 s
Predicted Time Remaining: 68.78 h
Runtime of week 42 of 4331 is 55.84 s
Predicted Time Remaining: 66.53 h
Runtime of week 43 of 4331 is 53.89 s
Predicted Time Remaining: 64.19 h
Runtime of week 44 of 4331 is 55.88 s
Predicted Time Remaining: 66.54 h
Runtime of week 45 of 4331 is 44.86 s
Predicted Time Remaining: 53.41 h
Runtime of week 46 of 4331 is 87.99 s
Predicted Time Remaining: 104.74 h
Runtime of week 47 of 4331 is 71.07 s
Predicted Time Remainin