Unidata · WardF · May 17, 2022 · Apr 6, 2022 · Apr 6, 2022 · Apr 6, 2022
@@ -7,6 +7,8 @@ This file contains a high-level description of this package's evolution. Release
 
 ## 4.8.2 - TBD
 
+* [Enhancement] Allow the read/write of JSON-valued Zarr attributes to allow
+for domain specific info such as used by GDAL/Zarr. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????).
 * [Enhancement] Update the documentation to match the current filter capabilities  See [Github #2249](https://github.com/Unidata/netcdf-c/pull/2249).
 * [Enhancement] Support installation of pre-built standard filters into user-specified location. See [Github #2318](https://github.com/Unidata/netcdf-c/pull/2318).
 * [Enhancement] Improve filter support. More specifically (1) add nc_inq_filter_avail to check if a filter is available, (2) add the notion of standard filters, (3) cleanup szip support to fix interaction with NCZarr. See [Github #2245](https://github.com/Unidata/netcdf-c/pull/2245).

@@ -56,9 +56,12 @@ struct NCJconst {int bval; long long ival; double dval; char* sval;};
 extern "C" {
 #endif
 
-/* Parse a JSON string */
+/* Parse a string to NCjson*/
 DLLEXPORT int NCJparse(const char* text, unsigned flags, NCjson** jsonp);
 
+/* Parse a counted string to NCjson*/
+DLLEXPORT int NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp);
+
 /* Reclaim a JSON tree */
 DLLEXPORT extern void NCJreclaim(NCjson* json);
 

@@ -94,6 +94,7 @@ static int NCJyytext(NCJparser*, char* start, size_t pdlen);
 static void NCJreclaimArray(struct NCjlist*);
 static void NCJreclaimDict(struct NCjlist*);
 static int NCJunescape(NCJparser* parser);
+static int unescape1(int c);
 static int listappend(struct NCjlist* list, NCjson* element);
 
 #ifndef NETCDF_JSON_H
@@ -109,24 +110,28 @@ static int bytesappendc(NCJbuf* bufp, const char c);
 
 int
 NCJparse(const char* text, unsigned flags, NCjson** jsonp)
+{
+    return NCJparsen(strlen(text),text,flags,jsonp);
+}
+
+int
+NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp)
 {
     int stat = NCJ_OK;
-    size_t len;
     NCJparser* parser = NULL;
     NCjson* json = NULL;
 
     /* Need at least 1 character of input */
-    if(text == NULL || text[0] == '\0')
+    if(len == 0 || text == NULL)
 	{stat = NCJTHROW(NCJ_ERR); goto done;}
     if(jsonp == NULL) goto done;
     parser = calloc(1,sizeof(NCJparser));
     if(parser == NULL)
 	{stat = NCJTHROW(NCJ_ERR); goto done;}
-    len = strlen(text);
     parser->text = (char*)malloc(len+1+1);
     if(parser->text == NULL)
 	{stat = NCJTHROW(NCJ_ERR); goto done;}
-    strcpy(parser->text,text);
+    memcpy(parser->text,text,len);
     parser->text[len] = '\0';
     parser->text[len+1] = '\0';
     parser->pos = &parser->text[0];
@@ -334,16 +339,21 @@ NCJlex(NCJparser* parser)
 	c = *parser->pos;
 	if(c == '\0') {
 	    token = NCJ_EOF;
-	} else if(c <= ' ' || c == '\177') {
+	} else if(c <= ' ' || c == '\177') {/* ignore whitespace */
+	    parser->pos++;
+	    continue;
+	} else if(c == NCJ_ESCAPE) {
 	    parser->pos++;
-	    continue; /* ignore whitespace */
+	    c = *parser->pos;
+	    *parser->pos = unescape1(c);
+	    continue;
 	} else if(strchr(JSON_WORD, c) != NULL) {
 	    start = parser->pos;
 	    for(;;) {
 		c = *parser->pos++;
 		if(c == '\0' || strchr(JSON_WORD,c) == NULL) break; /* end of word */
 	    }
-	    /* Pushback c if not whitespace */
+	    /* Pushback c */
 	    parser->pos--;
 	    count = ((parser->pos) - start);
 	    if(NCJyytext(parser,start,count)) goto done;
@@ -604,6 +614,21 @@ NCJunescape(NCJparser* parser)
     return NCJTHROW(NCJ_OK);    
 }
 
+/* Unescape a single character */
+static int
+unescape1(int c)
+{
+    switch (c) {
+    case 'b': c = '\b'; break;
+    case 'f': c = '\f'; break;
+    case 'n': c = '\n'; break;
+    case 'r': c = '\r'; break;
+    case 't': c = '\t'; break;
+    default: c = c; break;/* technically not Json conformant */
+    }
+    return c;
+}
+
 #ifdef NCJDEBUG
 static char*
 tokenname(int token)
@@ -896,7 +921,7 @@ NCJunparseR(const NCjson* json, NCJbuf* buf, unsigned flags)
 	if(json->list.len > 0 && json->list.contents != NULL) {
 	    int shortlist = 0;
 	    for(i=0;!shortlist && i < json->list.len;i+=2) {
-		if(i > 0) bytesappendc(buf,NCJ_COMMA);
+		if(i > 0) {bytesappendc(buf,NCJ_COMMA);bytesappendc(buf,' ');};
 		NCJunparseR(json->list.contents[i],buf,flags); /* key */
 		bytesappendc(buf,NCJ_COLON);
 		bytesappendc(buf,' ');
@@ -945,7 +970,7 @@ escape(const char* text, NCJbuf* buf)
 	case '\n': replace = 'n'; break;
 	case '\r': replace = 'r'; break;
 	case '\t': replace = 't'; break;
-	case NCJ_QUOTE: replace = '\''; break;
+	case NCJ_QUOTE: replace = '\"'; break;
 	case NCJ_ESCAPE: replace = '\\'; break;
 	default: break;
 	}

@@ -12,13 +12,17 @@
 
 #undef FILLONCLOSE
 
+/*mnemonics*/
+#define DICTOPEN '{'
+#define DICTCLOSE '}'
+
 /* Forward */
 static int ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp);
 static int ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose);
 
 static int ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp);
 static int load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypes);
-static int zconvert(nc_type typeid, size_t typelen, void* dst, NCjson* src);
+static int zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst);
 static int computeattrinfo(const char* name, NClist* atypes, NCjson* values,
 		nc_type* typeidp, size_t* typelenp, size_t* lenp, void** datap);
 static int parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* subgrps);
@@ -37,6 +41,8 @@ static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, s
 static int inferattrtype(NCjson* values, nc_type* typeidp);
 static int mininttype(unsigned long long u64, int negative);
 static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims);
+static int read_dict(NCjson* jdict, NCjson** jtextp);
+static int write_dict(size_t len, const void* data, NCjson** jsonp);
 
 /**************************************************/
 /**************************************************/
@@ -776,6 +782,7 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc
 Note that this does not push to the file.
 Also note that attributes of length 1 are stored as singletons, not arrays.
 This is to be more consistent with pure zarr.
+Also implements the JSON dictionary convention.
 @param attlist - [in] the attributes to dictify
 @param jattrsp - [out] the json'ized att list
 @return NC_NOERR
@@ -785,7 +792,7 @@ static int
 ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp)
 {
     int stat = NC_NOERR;
-    int i;
+    int i, isdict;
     NCjson* jattrs = NULL;
     NCjson* akey = NULL;
     NCjson* jdata = NULL;
@@ -795,9 +802,18 @@ ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp)
     /* Iterate over the attribute list */
     for(i=0;i<ncindexsize(attlist);i++) {
 	NC_ATT_INFO_T* att = (NC_ATT_INFO_T*)ncindexith(attlist,i);
+	isdict = 0;
 	/* Create the attribute dict value*/
-	if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata)))
-	    goto done;
+	if(att->nc_typeid == NC_CHAR
+	   && ((char*)att->data)[0] == DICTOPEN
+   	   && ((char*)att->data)[att->len-1] == DICTCLOSE) {
+	    /* this is subject to the JSON dictionary convention? */
+	    if(write_dict(att->len,att->data,&jdata)==NC_NOERR) isdict=1;
+	}
+	if(!isdict) {
+	    if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata)))
+	        goto done;
+	}
 	if((stat = NCJinsert(jattrs,att->hdr.name,jdata))) goto done;
 	jdata = NULL;
     }
@@ -918,7 +934,7 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis
 
 /* Convert a json value to actual data values of an attribute. */
 static int
-zconvert(nc_type typeid, size_t typelen, void* dst0, NCjson* src)
+zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst0)
 {
     int stat = NC_NOERR;
     int i;
@@ -1004,19 +1020,28 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp
     void* data = NULL;
     size_t typelen;
     nc_type typeid = NC_NAT;
+    NCjson* jtext = NULL;
     int reclaimvalues = 0;
 
     /* Get assumed type */
     if(typeidp) typeid = *typeidp;
     if(typeid == NC_NAT) if((stat = inferattrtype(values,&typeid))) goto done;
     if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;}
 
+    if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen)))
+        goto done;
+
     /* Collect the length of the attribute; might be a singleton  */
     switch (NCJsort(values)) {
-    case NCJ_DICT: stat = NC_ENCZARR; goto done;
     case NCJ_ARRAY:
 	count = NCJlength(values);
 	break;
+    case NCJ_DICT:
+	/* Apply the JSON dictionary convention and convert to string */
+	if((stat = read_dict(values,&jtext))) goto done;
+	values = jtext; jtext = NULL;
+	reclaimvalues = 1;
+	/* fall thru */
     case NCJ_STRING: /* requires special handling as an array of characters; also look out for empty string */
 	if(typeid == NC_CHAR) {
 	    count = strlen(NCJstring(values));
@@ -1029,18 +1054,16 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp
 	break;
     }
 
-    if(count > 0) {
+    if(count > 0 && data == NULL) {
         /* Allocate data space */
-        if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen)))
-	    goto done;
         if(typeid == NC_CHAR)
             data = malloc(typelen*(count+1));
         else
             data = malloc(typelen*count);
         if(data == NULL)
 	    {stat = NC_ENOMEM; goto done;}
         /* convert to target type */
-        if((stat = zconvert(typeid, typelen, data, values)))
+        if((stat = zconvert(typeid, typelen, values, data)))
    	    goto done;
     }
     if(lenp) *lenp = count;
@@ -1079,7 +1102,9 @@ inferattrtype(NCjson* value, nc_type* typeidp)
     case NCJ_NULL:
         typeid = NC_CHAR;
 	return NC_NOERR;
-    case NCJ_DICT: /* fall thru */
+    case NCJ_DICT:
+    	typeid = NC_CHAR;
+	goto done;
     case NCJ_UNDEF:
 	return NC_EINVAL;
     default: /* atomic */
@@ -2289,42 +2314,48 @@ computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarra
     return THROW(stat);
 }
 
-#if 0
-Not currently used
-Special compatibility case:
-       if the value of the attribute is a dictionary,
-       or an array with non-atomic values, then
-       then stringify it and pretend it is of char type.
-/* Return 1 if this json is not an
-atomic value or an array of atomic values.
-That is, it does not look like valid
-attribute data.
+/**
+Implement the JSON convention for dictionaries.
+
+Reading: If the value of the attribute is a dictionary, then stringify
+         it as the value and make the attribute be of type "char".
+
+Writing: if the attribute is of type char and looks like a JSON dictionary,
+	 then parse it as JSON and use that as its value in .zattrs.
 */
+
 static int
-iscomplexjson(NCjson* j)
+read_dict(NCjson* jdict, NCjson** jtextp)
 {
-    int i;
-    switch(NCJsort(j)) {
-    case NCJ_ARRAY:
-	/* verify that the elements of the array are not complex */
-	for(i=0;i<NCJlength(j);i++) {
-	    switch (NCJith(j,NCJsort(i)))) {
-	    case NCJ_DICT:
-    	    case NCJ_ARRAY:
-	    case NCJ_UNDEF:
-	    case NCJ_NULL:
-		return 1;
-	    default: break;
-	    }
-	}
-	return 0;
-    case NCJ_DICT:
-    case NCJ_UNDEF:
-    case NCJ_NULL:
-	break;
-    default:
-        return 0;
-    }
-    return 1;
+    int stat = NC_NOERR;
+    NCjson* jtext = NULL;
+    char* text = NULL;
+
+    if(jdict == NULL) {stat = NC_EINVAL; goto done;}
+    if(NCJsort(jdict) != NCJ_DICT)  {stat = NC_EINVAL; goto done;}
+    if(NCJunparse(jdict,0,&text)) {stat = NC_EINVAL; goto done;}
+    if(NCJnewstring(NCJ_STRING,text,&jtext)) {stat = NC_EINVAL; goto done;}
+    *jtextp = jtext; jtext = NULL;
+done:
+    NCJreclaim(jtext);
+    nullfree(text);
+    return stat;
 }
-#endif
+
+static int
+write_dict(size_t len, const void* data, NCjson** jsonp)
+{
+    int stat = NC_NOERR;
+    NCjson* jdict = NULL;
+
+    assert(jsonp != NULL);
+    if(NCJparsen(len,(char*)data,0,&jdict))
+        {stat = NC_EINVAL; goto done;}
+    if(NCJsort(jdict) != NCJ_DICT)
+        {stat = NC_EINVAL; goto done;}
+    *jsonp = jdict; jdict = NULL;
+done:
+    NCJreclaim(jdict);
+    return stat;
+}
+
@@ -32,7 +32,8 @@ main() {
         fclose(fp);
 
         int  ncid;
-        if (nc_open(FILE_NAME, 0, &ncid) != NC_EHDFERR) ERR;
+        int stat=nc_open(FILE_NAME, 0, &ncid);
+        if (stat != NC_EHDFERR && stat != NC_ENOTNC) ERR;
     }
 
     {

@@ -8,7 +8,6 @@
 if test "x$srcdir" = x ; then srcdir=`pwd`; fi
 . ../test_common.sh
 
-set -x
 set -e
 
 echo ""

@@ -4,7 +4,6 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi
 . ../test_common.sh
 
 # This shell script tests the output from several previous tests.
-set -x
 set -e
 
 echo ""

@@ -73,7 +73,7 @@ IF(ENABLE_TESTS)
   BUILD_BIN_TEST(zmapio ${COMMONSRC})
   TARGET_INCLUDE_DIRECTORIES(zmapio PUBLIC ../libnczarr)
   BUILD_BIN_TEST(zhex)
-  BUILD_BIN_TEST(zisjson)
+  BUILD_BIN_TEST(zisjson ${COMMONSRC})
   TARGET_INCLUDE_DIRECTORIES(zisjson PUBLIC ../libnczarr)
   BUILD_BIN_TEST(zs3parse ${COMMONSRC})
   TARGET_INCLUDE_DIRECTORIES(zs3parse PUBLIC ../libnczarr)
@@ -108,6 +108,7 @@ IF(ENABLE_TESTS)
     add_sh_test(nczarr_test run_interop)
     add_sh_test(nczarr_test run_misc)
     add_sh_test(nczarr_test run_nczarr_fill)
+    add_sh_test(nczarr_test run_jsonconvention)
 
     BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC})
     add_sh_test(nczarr_test run_quantize)