In [15]:
import zarr
import pyarrow.parquet as pq
import numpy as np
import glob
import json
import itertools
import sys
from collections import defaultdict

In [2]:
def process_pq(pq_list, the_type):
    '''
    function to process a list of pq files and return appropriate
    3D coordinates
    '''

    keys = {
        "condensed": 0,
        "condensed_edge": 1,
        "condensed_env": 2,
        "condensed_shell": 3,
        "core": 4,
        "core_edge": 5,
        "core_env": 6,
        "core_shell": 7,
        "plume": 8,
        }

    # to get both the full domain of x and y along with the x and y coordinates
    # for the given type (condensed, core, etc.)

    full = defaultdict(list)
    sub = defaultdict(list)
    extrema = defaultdict(list)
    for f in pq_list:
        table = pq.read_table(f).to_pandas()
        c_id = table['cloud_id'].values[0]
        if the_type == 'full' or the_type == 'base':
            tablerows = table['type'] == keys["condensed"]
        else:
            tablerows = table['type'] == keys[the_type]
        df_thetype = table[tablerows]
        for dimension in ['x', 'y', 'z']:
            for suffix in ['full', 'sub']:
                if suffix == 'full':
                    full[(dimension, suffix)].append(table[dimension].values)
                else:
                    sub[(dimension, suffix)].append(df_thetype[dimension].values)
            for suffix in ['min', 'max']:
                if suffix == 'min':
                    extrema[(dimension, suffix)].append(np.amin(table[dimension].values))
                    sub[(dimension, suffix)].append(np.amin(df_thetype[dimension].values))
                else:
                    extrema[(dimension, suffix)].append(np.amax(table[dimension].values))
                    sub[(dimension, suffix)].append(np.amax(table[dimension].values))
    return full, sub, c_id, extrema

In [7]:
filename = '/mnt/datatmp/visualize/andrew/bomex_json_files/16936.json'
tracktype = 'condensed'
varname = 'QN'
meters2km = 1.e-3
print(filename)
with open(filename, 'r') as f:
    files = json.load(f)
num_ts = len(files['pq_filenames'])
pq_filelist = sorted(files['pq_filenames'])

fulldict, subdict, cloud_id, extdict = process_pq(pq_filelist, tracktype)

print(cloud_id)

min_x, max_x = np.amin(extdict[('x', 'min')]), np.amax(extdict[('x', 'max')])
min_y, max_y = np.amax(extdict[('y', 'min')]), np.amax(extdict[('x', 'max')])

# to handle the cases where the cloud crosses a boundary
domain = 256

x = np.array(list(itertools.chain.from_iterable(fulldict[('x', 'full')])))
y = np.array(list(itertools.chain.from_iterable(fulldict[('y', 'full')])))
# hardcoded for bomex currently
off_x = 0
off_y = 0
if (max_x - min_x) > (domain / 2):
    off_x = domain - np.min(x[(x > domain / 2)])
if (max_y - min_y) > (domain / 2):
    off_y = domain - np.min(y[(y > domain / 2)])

/mnt/datatmp/visualize/andrew/bomex_json_files/16936.json
16936


In [16]:
for t_step, the_file in enumerate(files['var_filenames']):
    the_in = zarr.open_group(the_file, mode='r')
    try:
        # startx, stopx = x_indices[0], x_indices[1]
        # starty, stopy = y_indices[0], y_indices[1]
        # extra slice 0 is there to remove the time dimension from the zarr data
        var_data = the_in[varname][:][0]
        x_r = subdict[('x', 'sub')][t_step]
        y_r = subdict[('y', 'sub')][t_step]
        z = subdict[('z', 'sub')][t_step]
        if off_x > 0:
            var_data = np.roll(var_data, off_x, axis=2)
            x_r = x_r + off_x
            x_r[x_r > domain - 1] = x_r[x_r > domain - 1] - domain - 1
        if off_y > 0:
            var_data = np.roll(var_data, off_y, axis=1)
            y_r = y_r + off_y
            y_r[y_r > domain - 1] = y_r[y_r > domain - 1] - domain - 1
        # only map the values that are valid for the given type
        indices = np.array((z, y_r, x_r))
        b_map = np.zeros_like(var_data, dtype=bool)
        if tracktype == 'full':
            b_map[:] = True
        elif tracktype == 'base':
            z_base = subdict[('z', 'min')][t_step]
            b_map[z_base, y_r, x_r] = True
        else:
            b_map[tuple(indices)] = True
        var_data[~b_map] = 0
        var_data = np.ma.masked_values(var_data, 0)
        var_data = var_data[:, :, :]
        print(var_data.shape)
        sys.exit(0)
    except KeyError:
        print('variable names are: ', write_error(the_in))
        sys.exit(1)

(128, 256, 256)


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
