Merge pull request #1984 from DennisHeimbigner/kwaliasfix.dmh

Fix the handling of certain alias types in CDL files.
Unidata · Apr 14, 2021 · 67c640f · 67c640f
2 parents 1e99bb4 + 30ee991
commit 67c640f
Show file tree

Hide file tree

Showing 12 changed files with 1,564 additions and 1,324 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -6,6 +6,7 @@ Release Notes       {#RELEASE_NOTES}
 This file contains a high-level description of this package's evolution. Releases are in reverse chronological order (most recent first). Note that, as of netcdf 4.2, the `netcdf-c++` and `netcdf-fortran` libraries have been separated into their own libraries.
 ## 4.8.1 - TBD
 
+* [Bug Fix] Allow some primitive type names to be used as identifiers depending on the file format. See [Github #1984](https://github.com/Unidata/netcdf-c/issues/1984).  
 * [Enhancement] Add support for reading/writing pure Zarr storage format that supports the XArray _ARRAY_DIMENSIONS attribute. See [Github #1952](https://github.com/Unidata/netcdf-c/pull/1952) for more information.
 * [Update] Updated version of bzip2 used in filter testing/functionality, in support of [Github #1969](https://github.com/Unidata/netcdf-c/issues/1969).
 * [Bug Fix] Corrected HDF5 version detection logic as described in [Github #1962](https://github.com/Unidata/netcdf-c/issues/1962).  

diff --git a/ncdump/Makefile.am b/ncdump/Makefile.am
@@ -162,8 +162,9 @@ ref_provenance_v1.nc ref_tst_radix.cdl tst_radix.cdl test_radix.sh	\
 ref_nccopy_w.cdl tst_nccopy_w3.sh tst_nccopy_w4.sh			\
 ref_no_ncproperty.nc test_unicode_directory.sh				\
 ref_roman_szip_simple.cdl ref_roman_szip_unlim.cdl ref_tst_perdimspecs.cdl \
-test_keywords.sh ref_keyword1.cdl ref_keyword2.cdl ref_tst_nofilters.cdl \
-test_unicode_path.sh
+test_keywords.sh ref_keyword1.cdl ref_keyword2.cdl                      \
+ref_keyword3.cdl ref_keyword4.cdl                                       \
+ref_tst_nofilters.cdl test_unicode_path.sh
 
 # The L512.bin file is file containing exactly 512 bytes each of value 0.
 # It is used for creating hdf5 files with varying offsets for testing.
@@ -199,5 +200,7 @@ tst_ncf199.cdl tst_tst_gattenum.cdl tst_tst_usuffix.cdl ctest.c		\
 ctest64.c nccopy3_subset_out.nc camrun.c tst_ncf213.cdl tst_ncf213.nc	\
 tst_radix.nc tmp_radix.cdl ctest_small_3.c ctest_small_4.c		\
 ctest_special_atts_4.c tst_roman_szip_simple.cdl			\
-tst_roman_szip_unlim.cdl tst_perdimpspecs.nc tmppds.* \
-keyword1.nc keyword2.nc tmp_keyword1.cdl tmp_keyword2.cdl
+tst_roman_szip_unlim.cdl tst_perdimpspecs.nc tmppds.*                   \
+keyword1.nc keyword2.nc keyword3.nc keyword4.nc                         \
+tmp_keyword1.cdl tmp_keyword2.cdl tmp_keyword3.cdl tmp_keyword4.cdl
+
diff --git a/ncdump/ref_keyword3.cdl b/ncdump/ref_keyword3.cdl
@@ -0,0 +1,5 @@
+netcdf keyword3 {
+variables:
+    real f;
+    long l;
+}
diff --git a/ncdump/ref_keyword4.cdl b/ncdump/ref_keyword4.cdl
@@ -0,0 +1,9 @@
+netcdf x {
+dimensions:
+    string = 17;
+variables:
+int string;
+  int string:x = 17;
+int :string = 17;
+data: string = 1;
+}
diff --git a/ncdump/test_keywords.sh b/ncdump/test_keywords.sh
@@ -20,4 +20,25 @@ ${NCDUMP} -h keyword2.nc > tmp_keyword2.cdl
 echo "*** comparing tmp_keyword2.cdl to ref_keyword2.cdl..."
 diff -b -w tmp_keyword2.cdl $srcdir/ref_keyword2.cdl
 
+echo "*** Test use of type aliases such as 'long' or 'real'..."
+echo "*** classic: creating keyword3.nc from ref_keyword3.cdl..."
+${NCGEN} -3 -lb -o keyword3.nc $srcdir/ref_keyword3.cdl
+echo "*** creating tmp_keyword3.cdl from keyword3.nc..."
+# We need to convert float back to real and int back to long
+${NCDUMP} -h keyword3.nc | sed -e 's/float/real/g' -e 's/int/long/g' >tmp_keyword3.cdl
+echo "*** comparing tmp_keyword3.cdl to ref_keyword3.cdl..."
+diff -b -w tmp_keyword3.cdl $srcdir/ref_keyword3.cdl
+
+echo "*** Test use of keywords both pass and fail"
+# This should succeed
+${NCGEN} -3 -lb -o keyword4.nc $srcdir/ref_keyword4.cdl
+echo "***pass: ncgen -3 X ref_keyword4"
+# This should (x)fail
+if ${NCGEN} -4 -lb -o keyword4.nc $srcdir/ref_keyword4.cdl ; then
+echo "***erroneous pass: ncgen -4 X ref_keyword4"
+exit 1
+else
+echo "***xfail: ncgen -4 X ref_keyword4"
+fi
+
 exit 0
diff --git a/ncgen/generate.c b/ncgen/generate.c
@@ -449,8 +449,10 @@ generate_array(Symbol* vsym, Bytebuffer* code, Datalist* filler, Generator* gene
     }
     nunlimited = countunlimited(args.dimset);
 
-    if(vsym->var.special._Storage == NC_CHUNKED)
-        memcpy(args.chunksizes,vsym->var.special._ChunkSizes,sizeof(size_t)*args.rank);
+    if(vsym->var.special._Storage == NC_CHUNKED) {
+	if(vsym->var.special._ChunkSizes)
+            memcpy(args.chunksizes,vsym->var.special._ChunkSizes,sizeof(size_t)*args.rank);
+    }	
 
     memset(index,0,sizeof(index));
 

diff --git a/ncgen/genlib.h b/ncgen/genlib.h
@@ -82,6 +82,7 @@ extern struct Datalist* getfiller(Symbol*); /* symbol isa variable|type */
 
 /* from: ncgen.y */
 extern Symbol* install(const char *sname);
+extern Symbol* installin(const char *sname,Symbol* grp);
 extern void freesymbol(Symbol*);
 extern Symbol* basetypefor(nc_type nctype);/* Convert nctype to a Symbol*/
 extern Symbol* makearraytype(Symbol*, Dimset*);

diff --git a/ncgen/ncgen.l b/ncgen/ncgen.l
@@ -115,13 +115,15 @@ char char_val;                 /* last char value read */
 signed char byte_val;                 /* last byte value read */
 unsigned char ubyte_val;       /* last byte value read */
 
+/* Forward */
 static Symbol* makepath(char* text);
 static int lexdebug(int);
 static unsigned long long parseULL(int radix, char* text, int*);
 static nc_type downconvert(unsigned long long uint64, int*, int, int);
 static int tagmatch(nc_type nct, int tag);
 static int nct2lexeme(nc_type nct);
 static int collecttag(char* text, char** stagp);
+static int identcheck(int token);
 
 struct Specialtoken specials[] = {
 {"_FillValue",_FILLVALUE,_FILLVALUE_FLAG},
@@ -140,35 +142,6 @@ struct Specialtoken specials[] = {
 {NULL,0} /* null terminate */
 };
 
-/* Track keywords that may be identifiers depending on
-   format being produced */
-/* Define the possible format classes */
-#define KWALL (1<<NC_FORMAT_CLASSIC|1<<NC_FORMAT_64BIT_OFFSET|1<<NC_FORMAT_NETCDF4|1<<NC_FORMAT_NETCDF4_CLASSIC|1<<NC_FORMAT_64BIT_DATA) /* Used in all formats */
-#define KWCDF5 (1<<NC_FORMAT_64BIT_DATA) /* Used in cdf5 */
-#define KWNC4 (1<<NC_FORMAT_NETCDF4) /* Used in netcdf-4 */
-
-#define NKWIDENT 12
-struct KWIDENT {
-    int token;
-    const char* keyword;
-    int formats; /* Which formats use this keyword */
-} kwident[NKWIDENT] = {
-/* Order by token for binary search */
-{CHAR_K, "char", KWALL},
-{BYTE_K, "byte", KWALL},
-{SHORT_K, "short", KWALL},
-{INT_K, "int", KWALL},
-{FLOAT_K, "float", KWALL},
-{DOUBLE_K, "double", KWALL},
-{UBYTE_K, "ubyte", KWCDF5|KWNC4},
-{USHORT_K, "ushort", KWCDF5|KWNC4},
-{UINT_K, "uint", KWCDF5|KWNC4},
-{INT64_K, "int64", KWCDF5|KWNC4},
-{UINT64_K, "uint64", KWCDF5|KWNC4},
-{STRING_K, "string", KWNC4}
-};
-static int identorkw(int token);
-
 %}
 %x ST_C_COMMENT
 %x TEXT
@@ -290,27 +263,33 @@ yytext[MAXTRST-1] = '\0';
 		return lexdebug(OPAQUESTRING);
 		}
 
-compound|struct|structure {return lexdebug(identorkw(COMPOUND));}
-enum				{return lexdebug(identorkw(ENUM));}
+compound|struct|structure {return lexdebug(COMPOUND);}
+enum				{return lexdebug(ENUM);}
 opaque				{return lexdebug(OPAQUE_);}
 
-float|real		{return lexdebug(identorkw(FLOAT_K));}
-char			{return lexdebug(identorkw(CHAR_K));}
-byte			{return lexdebug(identorkw(BYTE_K));}
-ubyte			{return lexdebug(identorkw(UBYTE_K));}
-short			{return lexdebug(identorkw(SHORT_K));}
-ushort			{return lexdebug(identorkw(USHORT_K));}
-long|int|integer	{return lexdebug(identorkw(INT_K));}
-ulong|uint|uinteger	{return lexdebug(identorkw(UINT_K));}
-int64			{return lexdebug(identorkw(INT64_K));}
-uint64			{return lexdebug(identorkw(UINT64_K));}
-double			{return lexdebug(identorkw(DOUBLE_K));}
-string                  {return lexdebug(identorkw(STRING_K));}
+float			{return lexdebug(FLOAT_K);}
+double			{return lexdebug(DOUBLE_K);}
+char			{return lexdebug(CHAR_K);}
+byte			{return lexdebug(BYTE_K);}
+short			{return lexdebug(SHORT_K);}
+int			{return lexdebug(INT_K);}
+ubyte			{return lexdebug(identcheck(UBYTE_K));}
+ushort			{return lexdebug(identcheck(USHORT_K));}
+uint			{return lexdebug(identcheck(UINT_K));}
+int64			{return lexdebug(identcheck(INT64_K));}
+uint64			{return lexdebug(identcheck(UINT64_K));}
+string                  {return lexdebug(identcheck(STRING_K));}
+
+real			{return lexdebug(FLOAT_K);}
+long			{return lexdebug(INT_K);}
+integer			{return lexdebug(INT_K);}
+ulong			{return lexdebug(identcheck(UINT_K));}
+uinteger		{return lexdebug(identcheck(UINT_K));}
+
 
 unlimited|UNLIMITED	{int32_val = -1;
-			 return lexdebug(identorkw(NC_UNLIMITED_K));}
+			 return lexdebug(NC_UNLIMITED_K);}
 
-			/* These are currently only keywords */
 types:			{return lexdebug(TYPES);}
 dimensions:		{return lexdebug(DIMENSIONS);}
 variables:		{return lexdebug(VARIABLES);}
@@ -620,9 +599,7 @@ lexdebug(int token)
 {
     if(debug >= 2)
     {
-	char* text = yytext;
-	text[yyleng] = 0;
-        fprintf(stderr,"Token=%d |%s| line=%d\n",token,text,lineno);
+        fprintf(stderr,"Token=%d |%s| line=%d\n",token,yytext,lineno);
     }
     return token;
 }
@@ -907,38 +884,28 @@ collecttag(char* text, char** stagp)
     return tag;
 }
 
-/* Depending on the format, a name may be a keword or an ident */
 static int
-identorkw(int token)
+identcheck(int token)
 {
-    /* Binary search for yytext */
-    int n = NKWIDENT;
-    int L = 0;
-    int R = (n - 1);
-    int m, cmp;
-    struct KWIDENT* p;
-    int found = 0;
-    size_t len;
-    char* id = NULL;
-
-    for(;;) {
-	if(L > R) break;
-        m = (L + R) / 2;
-	p = &kwident[m];
-	cmp = (p->token - token);
-	if(cmp == 0) {found = 1; break;}
-	if(cmp < 0)
-	    L = (m + 1);
-	else /*cmp > 0*/
-	    R = (m - 1);
+    switch (token) {
+    case UBYTE_K:
+    case USHORT_K:
+    case UINT_K:
+    case INT64_K:
+    case UINT64_K:
+	if(k_flag != NC_FORMAT_NETCDF4 	&& k_flag != NC_FORMAT_64BIT_DATA) {
+	    yylval.sym = install(yytext);
+   	    token = IDENT;
+	}
+	break;
+    case STRING_K:
+	if(k_flag != NC_FORMAT_NETCDF4) {
+	    yylval.sym = install(yytext);
+   	    token = IDENT;
+	}
+	break;
+    default:
+         break;    
     }
-    if(!found) return token; /* Not a keyword of interest */
-    /* See if the format applies */
-    if(p->formats & ((int)1<<k_flag)) return token;
-    /* Need to convert a non-ident token to an ident symbol */
-    len = strlen(yytext);
-    len = unescape(yytext,len,ISIDENT,&id);
-    yylval.sym = install(id);
-    efree(id);
-    return IDENT; /* treat as identifier */
+    return token;
 }
diff --git a/ncgen/ncgen.y b/ncgen/ncgen.y
@@ -26,6 +26,8 @@ static char SccsId[] = "$Id: ncgen.y,v 1.42 2010/05/18 21:32:46 dmh Exp $";
 #define ncvar_t void
 #include "nctime.h"
 
+#undef GENLIB1
+
 /* parser controls */
 #define YY_NO_INPUT 1
 
@@ -966,14 +968,19 @@ makeprimitivetype(nc_type nctype)
 /* install sname in symbol table even if it is already there */
 Symbol*
 install(const char *sname)
+{
+    return installin(sname,currentgroup());
+}
+
+Symbol*
+installin(const char *sname, Symbol* grp)
 {
     Symbol* sp;
     sp = (Symbol*) ecalloc (sizeof (struct Symbol));
     sp->name = nulldup(sname);
     sp->lineno = lineno;
-    sp->location = currentgroup();
-    sp->container = currentgroup();
-    sp->var.special._Storage = NC_CONTIGUOUS;
+    sp->location = grp;
+    sp->container = grp;
     listpush(symlist,sp);
     return sp;
 }
@@ -1245,8 +1252,6 @@ makespecial(int tag, Symbol* vsym, Symbol* tsym, void* data, int isconst)
               globalspecials._Format = kvalue->k_flag;
 	      /*Main.*/format_attribute = 1;
               found = 1;
-	      if(kvalue->deprecated)
-		 fprintf(stderr,"_Format=%s is deprecated; use corresponding _Format=<name>\n",sdata);
               break;
             }
           }
@@ -1569,21 +1574,21 @@ done:
 
 #ifdef GENDEBUG1
 static void
-printfilters(int nfilters, NC_Filterspec** filters)
+printfilters(int nfilters, NC_FilterSpec** filters)
 {
     int i;
     fprintf(stderr,"xxx: nfilters=%lu: ",(unsigned long)nfilters);
     for(i=0;i<nfilters;i++) {
 	int k;
 	NC_Filterspec* sp = filters[i];
         fprintf(stderr,"{");
-        fprintf(stderr,"filterid=%s format=%s nparams=%lu params=%p",
+        fprintf(stderr,"filterid=%llu format=%d nparams=%lu params=%p",
 		sp->filterid,sp->format,(unsigned long)sp->nparams,sp->params);
 	if(sp->nparams > 0 && sp->params != NULL) {
             fprintf(stderr," params={");
             for(k=0;k<sp->nparams;k++) {
 	        if(i==0) fprintf(stderr,",");
-	        fprintf(stderr,"%s",sp->params[i]);
+	        fprintf(stderr,"%u",sp->params[i]);
 	    }
             fprintf(stderr,"}");
 	} else