Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow the read/write of JSON-valued Zarr attributes. #2278

Merged
merged 15 commits into from May 17, 2022
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE_NOTES.md
Expand Up @@ -7,6 +7,8 @@ This file contains a high-level description of this package's evolution. Release

## 4.8.2 - TBD

* [Enhancement] Allow the read/write of JSON-valued Zarr attributes to allow
for domain specific info such as used by GDAL/Zarr. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????).
* [Enhancement] Update the documentation to match the current filter capabilities See [Github #2249](https://github.com/Unidata/netcdf-c/pull/2249).
* [Enhancement] Support installation of pre-built standard filters into user-specified location. See [Github #2318](https://github.com/Unidata/netcdf-c/pull/2318).
* [Enhancement] Improve filter support. More specifically (1) add nc_inq_filter_avail to check if a filter is available, (2) add the notion of standard filters, (3) cleanup szip support to fix interaction with NCZarr. See [Github #2245](https://github.com/Unidata/netcdf-c/pull/2245).
Expand Down
5 changes: 4 additions & 1 deletion include/ncjson.h
Expand Up @@ -56,9 +56,12 @@ struct NCJconst {int bval; long long ival; double dval; char* sval;};
extern "C" {
#endif

/* Parse a JSON string */
/* Parse a string to NCjson*/
DLLEXPORT int NCJparse(const char* text, unsigned flags, NCjson** jsonp);

/* Parse a counted string to NCjson*/
DLLEXPORT int NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp);

/* Reclaim a JSON tree */
DLLEXPORT extern void NCJreclaim(NCjson* json);

Expand Down
43 changes: 34 additions & 9 deletions libdispatch/ncjson.c
Expand Up @@ -94,6 +94,7 @@ static int NCJyytext(NCJparser*, char* start, size_t pdlen);
static void NCJreclaimArray(struct NCjlist*);
static void NCJreclaimDict(struct NCjlist*);
static int NCJunescape(NCJparser* parser);
static int unescape1(int c);
static int listappend(struct NCjlist* list, NCjson* element);

#ifndef NETCDF_JSON_H
Expand All @@ -109,24 +110,28 @@ static int bytesappendc(NCJbuf* bufp, const char c);

int
NCJparse(const char* text, unsigned flags, NCjson** jsonp)
{
return NCJparsen(strlen(text),text,flags,jsonp);
}

int
NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp)
{
int stat = NCJ_OK;
size_t len;
NCJparser* parser = NULL;
NCjson* json = NULL;

/* Need at least 1 character of input */
if(text == NULL || text[0] == '\0')
if(len == 0 || text == NULL)
{stat = NCJTHROW(NCJ_ERR); goto done;}
if(jsonp == NULL) goto done;
parser = calloc(1,sizeof(NCJparser));
if(parser == NULL)
{stat = NCJTHROW(NCJ_ERR); goto done;}
len = strlen(text);
parser->text = (char*)malloc(len+1+1);
if(parser->text == NULL)
{stat = NCJTHROW(NCJ_ERR); goto done;}
strcpy(parser->text,text);
memcpy(parser->text,text,len);
parser->text[len] = '\0';
parser->text[len+1] = '\0';
parser->pos = &parser->text[0];
Expand Down Expand Up @@ -334,16 +339,21 @@ NCJlex(NCJparser* parser)
c = *parser->pos;
if(c == '\0') {
token = NCJ_EOF;
} else if(c <= ' ' || c == '\177') {
} else if(c <= ' ' || c == '\177') {/* ignore whitespace */
parser->pos++;
continue;
} else if(c == NCJ_ESCAPE) {
parser->pos++;
continue; /* ignore whitespace */
c = *parser->pos;
*parser->pos = unescape1(c);
continue;
} else if(strchr(JSON_WORD, c) != NULL) {
start = parser->pos;
for(;;) {
c = *parser->pos++;
if(c == '\0' || strchr(JSON_WORD,c) == NULL) break; /* end of word */
}
/* Pushback c if not whitespace */
/* Pushback c */
parser->pos--;
count = ((parser->pos) - start);
if(NCJyytext(parser,start,count)) goto done;
Expand Down Expand Up @@ -604,6 +614,21 @@ NCJunescape(NCJparser* parser)
return NCJTHROW(NCJ_OK);
}

/* Unescape a single character */
static int
unescape1(int c)
{
switch (c) {
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
default: c = c; break;/* technically not Json conformant */
}
return c;
}

#ifdef NCJDEBUG
static char*
tokenname(int token)
Expand Down Expand Up @@ -896,7 +921,7 @@ NCJunparseR(const NCjson* json, NCJbuf* buf, unsigned flags)
if(json->list.len > 0 && json->list.contents != NULL) {
int shortlist = 0;
for(i=0;!shortlist && i < json->list.len;i+=2) {
if(i > 0) bytesappendc(buf,NCJ_COMMA);
if(i > 0) {bytesappendc(buf,NCJ_COMMA);bytesappendc(buf,' ');};
NCJunparseR(json->list.contents[i],buf,flags); /* key */
bytesappendc(buf,NCJ_COLON);
bytesappendc(buf,' ');
Expand Down Expand Up @@ -945,7 +970,7 @@ escape(const char* text, NCJbuf* buf)
case '\n': replace = 'n'; break;
case '\r': replace = 'r'; break;
case '\t': replace = 't'; break;
case NCJ_QUOTE: replace = '\''; break;
case NCJ_QUOTE: replace = '\"'; break;
case NCJ_ESCAPE: replace = '\\'; break;
default: break;
}
Expand Down
123 changes: 77 additions & 46 deletions libnczarr/zsync.c
Expand Up @@ -12,13 +12,17 @@

#undef FILLONCLOSE

/*mnemonics*/
#define DICTOPEN '{'
#define DICTCLOSE '}'

/* Forward */
static int ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp);
static int ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose);

static int ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp);
static int load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypes);
static int zconvert(nc_type typeid, size_t typelen, void* dst, NCjson* src);
static int zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst);
static int computeattrinfo(const char* name, NClist* atypes, NCjson* values,
nc_type* typeidp, size_t* typelenp, size_t* lenp, void** datap);
static int parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* subgrps);
Expand All @@ -37,6 +41,8 @@ static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, s
static int inferattrtype(NCjson* values, nc_type* typeidp);
static int mininttype(unsigned long long u64, int negative);
static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims);
static int read_dict(NCjson* jdict, NCjson** jtextp);
static int write_dict(size_t len, const void* data, NCjson** jsonp);

/**************************************************/
/**************************************************/
Expand Down Expand Up @@ -776,6 +782,7 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc
Note that this does not push to the file.
Also note that attributes of length 1 are stored as singletons, not arrays.
This is to be more consistent with pure zarr.
Also implements the JSON dictionary convention.
@param attlist - [in] the attributes to dictify
@param jattrsp - [out] the json'ized att list
@return NC_NOERR
Expand All @@ -785,7 +792,7 @@ static int
ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp)
{
int stat = NC_NOERR;
int i;
int i, isdict;
NCjson* jattrs = NULL;
NCjson* akey = NULL;
NCjson* jdata = NULL;
Expand All @@ -795,9 +802,18 @@ ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp)
/* Iterate over the attribute list */
for(i=0;i<ncindexsize(attlist);i++) {
NC_ATT_INFO_T* att = (NC_ATT_INFO_T*)ncindexith(attlist,i);
isdict = 0;
/* Create the attribute dict value*/
if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata)))
goto done;
if(att->nc_typeid == NC_CHAR
&& ((char*)att->data)[0] == DICTOPEN
&& ((char*)att->data)[att->len-1] == DICTCLOSE) {
/* this is subject to the JSON dictionary convention? */
if(write_dict(att->len,att->data,&jdata)==NC_NOERR) isdict=1;
}
if(!isdict) {
if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata)))
goto done;
}
if((stat = NCJinsert(jattrs,att->hdr.name,jdata))) goto done;
jdata = NULL;
}
Expand Down Expand Up @@ -918,7 +934,7 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis

/* Convert a json value to actual data values of an attribute. */
static int
zconvert(nc_type typeid, size_t typelen, void* dst0, NCjson* src)
zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst0)
{
int stat = NC_NOERR;
int i;
Expand Down Expand Up @@ -1004,19 +1020,28 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp
void* data = NULL;
size_t typelen;
nc_type typeid = NC_NAT;
NCjson* jtext = NULL;
int reclaimvalues = 0;

/* Get assumed type */
if(typeidp) typeid = *typeidp;
if(typeid == NC_NAT) if((stat = inferattrtype(values,&typeid))) goto done;
if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;}

if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen)))
goto done;

/* Collect the length of the attribute; might be a singleton */
switch (NCJsort(values)) {
case NCJ_DICT: stat = NC_ENCZARR; goto done;
case NCJ_ARRAY:
count = NCJlength(values);
break;
case NCJ_DICT:
/* Apply the JSON dictionary convention and convert to string */
if((stat = read_dict(values,&jtext))) goto done;
values = jtext; jtext = NULL;
reclaimvalues = 1;
/* fall thru */
case NCJ_STRING: /* requires special handling as an array of characters; also look out for empty string */
if(typeid == NC_CHAR) {
count = strlen(NCJstring(values));
Expand All @@ -1029,18 +1054,16 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp
break;
}

if(count > 0) {
if(count > 0 && data == NULL) {
/* Allocate data space */
if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen)))
goto done;
if(typeid == NC_CHAR)
data = malloc(typelen*(count+1));
else
data = malloc(typelen*count);
if(data == NULL)
{stat = NC_ENOMEM; goto done;}
/* convert to target type */
if((stat = zconvert(typeid, typelen, data, values)))
if((stat = zconvert(typeid, typelen, values, data)))
goto done;
}
if(lenp) *lenp = count;
Expand Down Expand Up @@ -1079,7 +1102,9 @@ inferattrtype(NCjson* value, nc_type* typeidp)
case NCJ_NULL:
typeid = NC_CHAR;
return NC_NOERR;
case NCJ_DICT: /* fall thru */
case NCJ_DICT:
typeid = NC_CHAR;
goto done;
case NCJ_UNDEF:
return NC_EINVAL;
default: /* atomic */
Expand Down Expand Up @@ -2289,42 +2314,48 @@ computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarra
return THROW(stat);
}

#if 0
Not currently used
Special compatibility case:
if the value of the attribute is a dictionary,
or an array with non-atomic values, then
then stringify it and pretend it is of char type.
/* Return 1 if this json is not an
atomic value or an array of atomic values.
That is, it does not look like valid
attribute data.
/**
Implement the JSON convention for dictionaries.

Reading: If the value of the attribute is a dictionary, then stringify
it as the value and make the attribute be of type "char".

Writing: if the attribute is of type char and looks like a JSON dictionary,
then parse it as JSON and use that as its value in .zattrs.
*/

static int
iscomplexjson(NCjson* j)
read_dict(NCjson* jdict, NCjson** jtextp)
{
int i;
switch(NCJsort(j)) {
case NCJ_ARRAY:
/* verify that the elements of the array are not complex */
for(i=0;i<NCJlength(j);i++) {
switch (NCJith(j,NCJsort(i)))) {
case NCJ_DICT:
case NCJ_ARRAY:
case NCJ_UNDEF:
case NCJ_NULL:
return 1;
default: break;
}
}
return 0;
case NCJ_DICT:
case NCJ_UNDEF:
case NCJ_NULL:
break;
default:
return 0;
}
return 1;
int stat = NC_NOERR;
NCjson* jtext = NULL;
char* text = NULL;

if(jdict == NULL) {stat = NC_EINVAL; goto done;}
if(NCJsort(jdict) != NCJ_DICT) {stat = NC_EINVAL; goto done;}
if(NCJunparse(jdict,0,&text)) {stat = NC_EINVAL; goto done;}
if(NCJnewstring(NCJ_STRING,text,&jtext)) {stat = NC_EINVAL; goto done;}
*jtextp = jtext; jtext = NULL;
done:
NCJreclaim(jtext);
nullfree(text);
return stat;
}
#endif

static int
write_dict(size_t len, const void* data, NCjson** jsonp)
{
int stat = NC_NOERR;
NCjson* jdict = NULL;

assert(jsonp != NULL);
if(NCJparsen(len,(char*)data,0,&jdict))
{stat = NC_EINVAL; goto done;}
if(NCJsort(jdict) != NCJ_DICT)
{stat = NC_EINVAL; goto done;}
*jsonp = jdict; jdict = NULL;
done:
NCJreclaim(jdict);
return stat;
}

3 changes: 2 additions & 1 deletion nc_test4/tst_broken_files.c
Expand Up @@ -32,7 +32,8 @@ main() {
fclose(fp);

int ncid;
if (nc_open(FILE_NAME, 0, &ncid) != NC_EHDFERR) ERR;
int stat=nc_open(FILE_NAME, 0, &ncid);
if (stat != NC_EHDFERR && stat != NC_ENOTNC) ERR;
}

{
Expand Down
1 change: 0 additions & 1 deletion ncdump/tst_nccopy3.sh
Expand Up @@ -8,7 +8,6 @@
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh

set -x
set -e

echo ""
Expand Down
1 change: 0 additions & 1 deletion ncdump/tst_output.sh
Expand Up @@ -4,7 +4,6 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh

# This shell script tests the output from several previous tests.
set -x
set -e

echo ""
Expand Down
3 changes: 2 additions & 1 deletion nczarr_test/CMakeLists.txt
Expand Up @@ -73,7 +73,7 @@ IF(ENABLE_TESTS)
BUILD_BIN_TEST(zmapio ${COMMONSRC})
TARGET_INCLUDE_DIRECTORIES(zmapio PUBLIC ../libnczarr)
BUILD_BIN_TEST(zhex)
BUILD_BIN_TEST(zisjson)
BUILD_BIN_TEST(zisjson ${COMMONSRC})
TARGET_INCLUDE_DIRECTORIES(zisjson PUBLIC ../libnczarr)
BUILD_BIN_TEST(zs3parse ${COMMONSRC})
TARGET_INCLUDE_DIRECTORIES(zs3parse PUBLIC ../libnczarr)
Expand Down Expand Up @@ -108,6 +108,7 @@ IF(ENABLE_TESTS)
add_sh_test(nczarr_test run_interop)
add_sh_test(nczarr_test run_misc)
add_sh_test(nczarr_test run_nczarr_fill)
add_sh_test(nczarr_test run_jsonconvention)

BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC})
add_sh_test(nczarr_test run_quantize)
Expand Down