From 80a2d1f14850e2fbdd6b7eb0d80765a23eb1e0b6 Mon Sep 17 00:00:00 2001 From: Hugo Oliveira Date: Tue, 27 Oct 2020 13:10:32 +1100 Subject: [PATCH] feat(netcdf): nc_get_var & nc_flat metadata lookup `nc_get_var` is just a wrapper to obtain the variable of a netcdf by its name. `nc_flat` flattens the `ncinfo` metadata output into a named structure, recursively. The function turns the access to all netcdf derived fields into a much easier ordeal, by using name referencing, tree pruning (e.g. Groups/Attributes), all with recursive resolution. The typical usage of `nc_flat` is to avoid index searching, and allow dictionary like access patterns (a-la python:xarray). For example, to get the size and units of the TIME variable without nc_flat you would use: ``` info = ncinfo(ncfile); vnames = {info.Variables.Name}; [has_var,var_ind] = inCell(vnames,'TIME'); vsize = info.Variables(var_ind).Size; vattrs = {info.Variables(var_ind).Attributes}; [has_units,unit_ind] = inCell(vattrs,'units'); vunits = info.Variables(var_ind).Attributes(unit_ind).Value; ``` with nc_flat: ``` info = nc_flat(ncinfo(ncfile)); vsize = info.Variables.TIME.Size; vunits = info.Variables.TIME.Attributes.units; ``` --- Util/NetCDF/nc_flat.m | 146 +++++++++++++++++++++++++++++++++++++++ Util/NetCDF/nc_get_var.m | 35 ++++++++++ 2 files changed, 181 insertions(+) create mode 100644 Util/NetCDF/nc_flat.m create mode 100644 Util/NetCDF/nc_get_var.m diff --git a/Util/NetCDF/nc_flat.m b/Util/NetCDF/nc_flat.m new file mode 100644 index 000000000..2dd1dfb37 --- /dev/null +++ b/Util/NetCDF/nc_flat.m @@ -0,0 +1,146 @@ +function [flat_struct] = nc_flat(ncstruct, keep_empty) +% function [flat_struct] = nc_flat(ncstruct, keep_empty) +% +% Flat the ncinfo structure, recursively, +% into a flattened form with named/dictionary like access. +% Prunning is also allowed. +% +% Inputs: +% +% ncinfo_struct [struct] - a ncinfo like structure +% keep_empty [bool] - flag to keep or prune empty entries. +% +% Outputs: +% +% +% Example: +% +% % basic +% ncstruct = struct('Filename','x.nc','Name','/','Dimensions',[],'Variables',[]); +% ncstruct.Attributes = struct('Name','one','Value',1); +% ncstruct.Attributes(2) = struct('Name','two','Value',2); +% ncstruct.Groups = []; +% ncstruct.Format = 'netcdf4'; +% [flat_struct] = nc_flat(ncstruct,true); +% assert(flat_struct.Attributes.one==1) +% assert(flat_struct.Attributes.two==2) +% assert(isstruct(flat_struct.Dimensions)) +% assert(isempty(flat_struct.Dimensions)) +% +% % recursion +% ncstruct.Groups = rmfield(ncstruct,{'Filename','Format'}); +% ncstruct.Groups.Name = 'Group_A'; +% ncstruct.Groups.Attributes(1).Name = 'three'; +% ncstruct.Groups.Attributes(1).Value = 3; +% ncstruct.Groups(2) = rmfield(ncstruct,{'Filename','Format'}); +% ncstruct.Groups(2).Name = 'Group_B'; +% ncstruct.Groups(2).Attributes(1).Name = 'four'; +% ncstruct.Groups(2).Attributes(1).Value = 4; +% [flat_struct] = nc_flat(ncstruct); +% assert(flat_struct.Attributes.one==1) +% assert(flat_struct.Attributes.two==2) +% assert(flat_struct.Groups.Group_A.Attributes.three==3) +% assert(flat_struct.Groups.Group_B.Attributes.four==4) +% +% % prunning +% [flat_struct] = nc_flat(ncstruct,false); +% assert(isequal(fieldnames(flat_struct),{'Filename','Attributes','Groups','Format'}')); +% +% +% author: hugo.oliveira@utas.edu.au +% +narginchk(1, 2) + +if nargin < 2 + keep_empty = true; +end + +names = {ncstruct.Name}'; +ncstruct = rmfield(ncstruct, 'Name'); +fnames = fieldnames(ncstruct); + +root_fields = {'Filename', 'Dimensions', 'Variables', 'Attributes', 'Groups', 'Format'}; +dims_fields = {'Length', 'Unlimited'}; +vars_fields = {'Dimensions', 'Size', 'Datatype', 'Attributes'}; +attrs_fields = {'Value'}; +group_fields = {'Dimensions', 'Variables', 'Attributes', 'Groups'}; + +at_root_level = all(contains(root_fields, fnames)); +at_dims_level = all(contains(dims_fields, fnames)); +at_vars_level = all(contains(vars_fields, fnames)); +at_attrs_level = all(contains(attrs_fields, fnames)); +at_group_level = all(contains(fnames, group_fields)); + +if at_attrs_level + + flat_struct = cell2struct({ncstruct.Value}, names, 2); + +elseif at_dims_level + + flat_struct = cell2struct(num2cell(ncstruct), names', 2); + +elseif at_vars_level + for k = 1:numel(names) + flat_struct.(names{k}) = clean_prune(ncstruct(k), {'Attributes', 'Dimensions'}, keep_empty); + end + +elseif at_group_level + + for k = 1:numel(names) + flat_struct.(names{k}) = clean_prune(ncstruct(k), {'Attributes', 'Dimensions', 'Variables', 'Groups'}, keep_empty); + end + +elseif at_root_level + for k = 1:numel(names) + flat_struct = clean_prune(ncstruct(k), {'Attributes', 'Dimensions', 'Variables', 'Groups'}, keep_empty); + end + +end + +end + +function [s] = clean_prune(s, fnames, keep_flag) +% +% Try to prune/flat the fieldnames of a ncinfo structure. +% +% If keep_flag is true and prune fails, +% the fieldname is kept as an empty struct. +% Otherwise, the fieldname is removed +% from the structure. +% + +narginchk(3, 3); + +total = numel(fnames); +remove_list = cell(1, total); + +c = 0; + +for n = 1:total + name = fnames{n}; + + try + s.(name) = nc_flat(s.(name), keep_flag); + catch + + if keep_flag + s.(name) = struct([]); + else + c = c + 1; + remove_list{c} = name; + end + + end + +end + +unflat_fields = c>0; +if unflat_fields + remove_list = remove_list(1:c); + need_removal = any(contains(remove_list,fieldnames(s))); + if need_removal + s = rmfield(s, remove_list); + end +end + +end diff --git a/Util/NetCDF/nc_get_var.m b/Util/NetCDF/nc_get_var.m new file mode 100644 index 000000000..8dab2c745 --- /dev/null +++ b/Util/NetCDF/nc_get_var.m @@ -0,0 +1,35 @@ +function [var] = nc_get_var(id,varname,varargin) +% function [var] = nc_get_var(id,varname,varargin) +% +% A wrapper to read a variable by its name +% with the netcdf matlab library. +% +% Inputs: +% +% id [int] - the netcdf file id. +% varname [str] - the variable name. +% varargin [int] - Other netcdf.getVar args, like +% start,count,stride. +% +% Outputs: +% +% var - the array. +% +% +% author: hugo.oliveira@utas.edu.au +% +try + varid = netcdf.inqVarID(id,varname); +catch err + err = addCause(err,MException(err.identifier,'%s: %s Variable name not found.',mfilename,varname)); + throw(err); +end + +try + var = netcdf.getVar(id,varid,varargin{:}); +catch err + err = addCause(err,MException(err.identifier,'%s: invalid getVar arguments for %s Variable',mfilename,varname)); + throw(err); +end + +end