In [2]:
import asyncio
import xarray as xr
from glob import iglob
import os

def get_n_dims(nc_filepath):
    with xr.open_dataset(nc_filepath) as ds:
        n_dims = max(
            [len(ds[v].dims) for v in ds.variables if v != 'options_database']
        )
    return n_dims

def chunked_file_parser(num_chunks):
    semaphore = asyncio.Semaphore(num_chunks)
    @asyncio.coroutine
    def parse_dims(filepath):
        nonlocal semaphore
        with (yield from semaphore):
            n_dims = yield from get_n_dims(filepath)
        return n_dims
    return parse_dims

def categorise_by_dims(directory):
    dim_dict = {
        '0+1d': [],
        '2d': [],
        '3d': []
    }
    file_parser = chunked_file_parser(10)
    tasks = [(f, file_parser(f)) for f in iglob(
        pathname=f'{directory}/*.nc', recursive=False
    )]
    for future in asyncio.as_completed(tasks):
        data = yield from future
        if data[1] < 3:
            dim_dict['0+1d'].append(data[0])
        elif data[1] < 4:
            dim_dict['2d'].append(data[0])
        else:
            dim_dict['3d'].append(data[0])
    return dim_dict


In [3]:
test_dir = '/home/earcwi/OneDrive/EUREC4A/Code/CFizer/test_data'
get_n_dims(os.path.join(test_dir, 'd20200128_diagnostic_2d_172800.nc'))

3

In [4]:
loop = asyncio.get_event_loop()
file_categories = loop.run_until_complete(categorise_by_dims(directory=test_dir))
print(file_categories)

RuntimeError: This event loop is already running