Skip to content

Commit

Permalink
[NCF-293]
Browse files Browse the repository at this point in the history
Allow .cdl files to have a leading utf-8 BOM.
Also add test.
  • Loading branch information
dmh committed Mar 8, 2014
1 parent deeca5f commit baade3e
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 5 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ AC_HEADER_STDBOOL
# Check for these functions...
AC_CHECK_FUNCS([strlcat strerror snprintf strchr strrchr strcat strcpy \
strdup strcasecmp strtod strtoll strtoull strstr \
mkstemp rand \
mkstemp rand memcmp \
getrlimit gettimeofday fsync MPI_Comm_f2c])

# Does the user want to use NC_DISKLESS?
Expand Down
4 changes: 2 additions & 2 deletions ncdump/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ TARGET_LINK_LIBRARIES(ncdump netcdf ${ALL_TLL_LIBS})
TARGET_LINK_LIBRARIES(nccopy netcdf ${ALL_TLL_LIBS})

IF(ENABLE_TESTS)
ADD_EXECUTABLE(rewrite-scalar rewrite-scalar.c)
ADD_EXECUTABLE(rewrite-scalar rewrite-scalar.c bom.c)
TARGET_LINK_LIBRARIES(rewrite-scalar netcdf)
# Base tests
# The tests are set up as a combination of shell scripts and executables that
Expand All @@ -58,8 +58,8 @@ IF(ENABLE_TESTS)
add_sh_test(ncdump tst_charfill)
add_sh_test(ncdump tst_iter)
add_sh_test(ncdump tst_formatx3)
add_sh_test(ncdump tst_bom)


IF(EXTRA_TESTS)
add_sh_test(ncdump run_back_comp_tests)
ENDIF()
Expand Down
4 changes: 2 additions & 2 deletions ncdump/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ man_MANS = ncdump.1 nccopy.1
if BUILD_TESTSETS
#if !BUILD_DLL
# These tests are run for both netCDF-4 and non-netCDF-4 builds.
check_PROGRAMS = rewrite-scalar ctest ctest64 ncdump tst_utf8
check_PROGRAMS = rewrite-scalar ctest ctest64 ncdump tst_utf8 bom
TESTS = run_tests.sh tst_64bit.sh ctest ctest64 tst_output.sh \
tst_lengths.sh tst_calendars.sh tst_utf8 run_utf8_tests.sh \
tst_nccopy3.sh tst_charfill.sh tst_iter.sh tst_formatx3.sh
tst_nccopy3.sh tst_charfill.sh tst_iter.sh tst_formatx3.sh tst_bom.sh

if LARGE_FILE_TESTS
TESTS += tst_iter.sh
Expand Down
33 changes: 33 additions & 0 deletions ncdump/bom.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*********************************************************************
* Copyright 1993, UCAR/Unidata
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*********************************************************************/

#include <config.h>
#include <stdlib.h>
#include <stdio.h>

/* BOM Sequences */
static char* U8 = "\xEF\xBB\xBF"; /* UTF-8 */
static char* BE32 = "\x00\x00\xFE\xFF"; /* UTF-32; big-endian */
static char* LE32 = "\xFF\xFE"; /* UTF-32; little-endian */
static char* BE16 = "\xFE\xFF"; /* UTF-16; big-endian */
static char* LE16 = "\xFF\xFE"; /* UTF-16; little-endian */

int
main(int argc, char** argv)
{
char* bom = U8;
int bomlen = 3;
if(argc > 1 && strlen(argv[1]) > 0) {
char* which = argv[1];
switch (which[0]) {
case '1': bom = BE16; bomlen = 2; break;
case '3': bom = BE32; bomlen = 2; break;
default: break;
}
}
fwrite(bom,1,bomlen,stdout);
exit(0);
}

54 changes: 54 additions & 0 deletions ncdump/tst_bom.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/sh
# This shell script tests BOM support in ncgen

set -e

if test "x$srcdir" = "x"; then
srcdir=`dirname $0`;
fi
# add hack for sunos
export srcdir;

echo ""

rm -f tst_bom.cdl tmp.cdl tst_bom8.* tst_bom16.*

cat <<EOF >>tst_bom.cdl
netcdf tst_bom {
variables:
float f;
data:
f = 1;
}
EOF

echo "*** Generate a cdl file with leading UTF-8 BOM."
./bom 8 >tst_bom8.cdl
cat tst_bom.cdl >> tst_bom8.cdl

echo "*** Verify .nc file"
../ncgen/ncgen -k1 -o tst_bom8.nc tst_bom8.cdl
../ncdump/ncdump -n tst_bom tst_bom8.nc > tmp.cdl
diff -w tst_bom.cdl tmp.cdl

# Do it again but with Big-Endian 16; should fail

rm -f tmp.cdl tst_bom8.* tst_bom16.*

echo "*** Generate a cdl file with leading UTF-16 BOM."
./bom 16 >tst_bom16.cdl
cat tst_bom.cdl >> tst_bom16.cdl

echo "*** Verify UTF-16 file fails"
if ../ncgen/ncgen -k1 -o tst_bom16.nc tst_bom16.cdl ; then
echo 'BOM Big Endian 16 succeeded, but should not'
exit 1
else
echo '***XFAIL: BOM Big Endian 16'
fi

# Cleanup
rm -f tst_bom.cdl tmp.cdl tst_bom8.* tst_bom16.*

exit 0
32 changes: 32 additions & 0 deletions ncgen/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ struct Languages legallanguages[] = {
};
#endif

/* BOM Sequences */
static char* U8 = "\xEF\xBB\xBF"; /* UTF-8 */
static char* BE32 = "\x00\x00\xFE\xFF"; /* UTF-32; big-endian */
static char* LE32 = "\xFF\xFE"; /* UTF-32; little-endian */
static char* BE16 = "\xFE\xFF"; /* UTF-16; big-endian */
static char* LE16 = "\xFF\xFE"; /* UTF-16; little-endian */

/* The default minimum iterator size depends
on whether we are doing binary or language
based output.
Expand Down Expand Up @@ -371,11 +378,36 @@ main(

fp = stdin;
if (argc > 0 && strcmp(argv[0], "-") != 0) {
char bom[4];
size_t count;
if ((fp = fopen(argv[0], "r")) == NULL) {
derror ("can't open file %s for reading: ", argv[0]);
perror("");
return(7);
}
/* Check the leading bytes for an occurrence of a BOM */
/* re: http://www.unicode.org/faq/utf_bom.html#BOM */
/* Attempt to read the first four bytes */
memset(bom,0,sizeof(bom));
count = fread(bom,1,2,fp);
if(count == 2) {
switch (bom[0]) {
case '\x00':
case '\xFF':
case '\xFE':
/* Only UTF-* is allowed; complain and exit */
fprintf(stderr,"Input file contains a BOM indicating a non-UTF8 encoding\n");
return 1;
case '\xEF':
/* skip the BOM */
fread(bom,1,1,fp);
break;
default: /* legal printable char, presumably; rewind */
rewind(fp);
break;
}
}

cdlname = (char*)emalloc(NC_MAX_NAME);
cdlname = nulldup(argv[0]);
if(strlen(cdlname) > NC_MAX_NAME) cdlname[NC_MAX_NAME] = '\0';
Expand Down

3 comments on commit baade3e

@nschloe
Copy link
Contributor

@nschloe nschloe commented on baade3e Mar 8, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit breaks tests,

Linking C executable rewrite-scalar
CMakeFiles/rewrite-scalar.dir/bom.c.o: In function `main':
/home/nschloe/software/netcdf/dev/ncdump/bom.c:19: multiple definition of `main'
CMakeFiles/rewrite-scalar.dir/rewrite-scalar.c.o:/home/nschloe/software/netcdf/dev/ncdump/rewrite-scalar.c:19: first defined here

(How about setting up a travis-ci instance and hooking it up with Github? http://docs.travis-ci.com/user/getting-started/)

@DennisHeimbigner
Copy link
Collaborator

@DennisHeimbigner DennisHeimbigner commented on baade3e Mar 8, 2014 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nschloe
Copy link
Contributor

@nschloe nschloe commented on baade3e Mar 8, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice.
Setting up a travis-ci file would have the additional benefit of being tightly integrated with Github by, e.g., showing green or red lights for pull requests. Also, since you already maintain a rather concise testing framework, adding a .travis.yaml file would be rather trivial. In fact I think I might start playing around with it now. :)

Please sign in to comment.