In [1]:
using cfgrib, DataStructures

┌ Info: Precompiling cfgrib [cb67cb4b-e5c8-45d4-aff0-3ae3657ca610]
└ @ Base loading.jl:1273


In [2]:
const dir_tests = abspath(joinpath(dirname(pathof(cfgrib)), "..", "tests"))
const dir_testfiles = abspath(joinpath(dir_tests, "sample-data"))

test_file = joinpath(dir_testfiles, "era5-levels-members.grib")

index = cfgrib.FileIndex(
    test_file,
    cfgrib.ALL_KEYS
)

cfgrib.filter!(index, paramId=130);

In [19]:
#  TODO: Add filter_by_keys
function build_variable_components(
        index; encode_cf=("parameter", "time", "geography", "vertical"),
        log=LOG, errors="warn", squeeze=true, read_keys=[],
        time_dims=("time", "step")
    )
    data_var_attrs_keys = cfgrib.DATA_ATTRIBUTES_KEYS
    data_var_attrs_keys = [
        data_var_attrs_keys;
        get(cfgrib.GRID_TYPE_MAP, index["gridType"][1], [])
    ]
    data_var_attrs_keys = [data_var_attrs_keys; read_keys]

    data_var_attrs = cfgrib.enforce_unique_attributes(index, data_var_attrs_keys)

    coords_map = cfgrib.encode_cf_first(data_var_attrs, encode_cf, time_dims)

    coord_name_key_map = Dict()
    coord_vars = OrderedDict()

    for coord_key in coords_map
        values = index[coord_key]
        if length(values) == 1 && ismissing(values[1])
            #  TODO: Add logging
            #  @warn "Missing from GRIB Stream $(coord_key)"
            continue
        end

        coord_name = coord_key

        if ("vertical" in encode_cf && coord_key == "level"
                && haskey(data_var_attrs, "GRIB_typeOfLevel"))
            coord_name = data_var_attrs["GRIB_typeOfLevel"]
            coord_name_key_map[coord_name] = coord_key
        end

        attributes = Dict(
            "long_name" => "original GRIB coordinate for key:" *
                           "$(coord_key)($(coord_name))",
            "units"     => "1",
        )

        merge!(attributes, copy(get(cfgrib.COORD_ATTRS, coord_name, Dict())))

        data = sort(
            values,
            rev=get(attributes, "stored_direction", "none") == "decreasing"
        )
        dimensions = (coord_name, )

        if squeeze && length(values) == 1
            data = data[1]
            typeof(data) == Array ? nothing : data = [data]
            dimensions = ()
        end

        coord_vars[coord_name] = cfgrib.Variable(dimensions, data, attributes)
    end

    header_dimensions = Tuple(
        d for (d, c)
        in pairs(coord_vars)
        if !squeeze || length(c.data) > 1
    )
    #  Loses information on which shape belongs to which dimension
    #  doesn't seem to matter though
    header_shape = Iterators.flatten(
        Tuple(size(coord_vars[d].data) for d in header_dimensions)
    )

    geo_dims, geo_shape, geo_coord_vars = cfgrib.build_geography_coordinates(
        index, encode_cf, errors; log=log
    )

    dimensions = (header_dimensions..., geo_dims...)
    shape = (header_shape..., geo_shape...)

    merge!(coord_vars, geo_coord_vars)

    offsets = OrderedDict{NTuple{length(header_dimensions), Int64}, Int}()
    for (header_values, offset) in index.offsets
        header_indexes = Array{Int}(undef, length(header_dimensions))
        for (i, dim) in enumerate(header_dimensions)
            coord_name = get(coord_name_key_map, dim, dim)
            coord_idx = findfirst(index.index_keys .== coord_name)
            header_value = header_values[coord_idx]
            header_indexes[i] = findfirst(coord_vars[dim].data .== header_value)
        end

        offsets[Tuple(header_indexes)] = offset
    end

    data = cfgrib.OnDiskArray(
        index.grib_path,
        shape,
        offsets,
        missing,
        length(geo_dims),
        Float32
    )
    
    if haskey(coord_vars, "time") && haskey(coord_vars, "step")
        # add the 'valid_time' secondary coordinate
        dims, time_data = cfgrib.build_valid_time(
            coord_vars["time"].data,
            coord_vars["step"].data
        )
        attrs = cfgrib.COORD_ATTRS["valid_time"]
        coord_vars["valid_time"] = cfgrib.Variable(dims, time_data, attrs)
    end

    data_var_attrs["coordinates"] = join(keys(coord_vars), " ")
    data_var = cfgrib.Variable(dimensions, data, data_var_attrs)
    dims = OrderedDict(
        (d => s)
        for (d, s)
        in zip(dimensions, size(data_var.data))
    )

    return dims, data_var, coord_vars
end

build_variable_components (generic function with 1 method)

In [20]:
_d = build_variable_components(
    index; log=missing
);

In [24]:
_d[3]

OrderedDict{Any,Any} with 7 entries:
  "number"        => Variable(("number",), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], Dict…
  "time"          => Variable(("time",), [1483228800, 1483272000, 1483315200, 1…
  "step"          => Variable((), [0], Dict{String,Any}("units"=>"hours","long_…
  "isobaricInhPa" => Variable(("isobaricInhPa",), [850, 500], Dict{String,Any}(…
  "latitude"      => Variable(("latitude",), [90.0, 87.0, 84.0, 81.0, 78.0, 75.…
  "longitude"     => Variable(("longitude",), [0.0, 3.0, 6.0, 9.0, 12.0, 15.0, …
  "valid_time"    => Variable(("time",), [1483228800, 1483272000, 1483315200, 1…

In [6]:
_d["step"].data

1-element Array{Int64,1}:
 0

In [7]:
cfgrib.build_valid_time(
    _d["time"].data,
    _d["step"].data
)

(("time",), [1483228800, 1483272000, 1483315200, 1483358400])

In [40]:
length(unique(_d["step"].data))

1