Skip to content

Commit

Permalink
Merge pull request #478 from wkliao/cdf5_var_len
Browse files Browse the repository at this point in the history
CDF-5 fix: let NC_var.len be the true size of variable
  • Loading branch information
WardF committed Feb 4, 2018
2 parents 8c6defe + be87124 commit b4a2947
Show file tree
Hide file tree
Showing 10 changed files with 243 additions and 72 deletions.
46 changes: 27 additions & 19 deletions configure.ac
Expand Up @@ -438,22 +438,6 @@ fi

AM_CONDITIONAL(USE_STRICT_NULL_BYTE_HEADER_PADDING, [test x$enable_strict_null_byte_header_padding = xyes ])

# Check whether we want to enable CDF5 support.
AC_MSG_CHECKING([whether CDF5 support should be enabled (default off)])
AC_ARG_ENABLE([cdf5],
[AS_HELP_STRING([--enable-cdf5],
[build with CDF5 support.])])
test "x$enable_cdf5" = xyes || enable_cdf5=no
AC_MSG_RESULT($enable_cdf5)

if test "x$enable_cdf5" = xyes; then
AC_DEFINE([USE_CDF5], [1], [if true, enable CDF5 Support])
AC_DEFINE([ENABLE_CDF5], [1], [if true, enable CDF5 Support])
fi

AM_CONDITIONAL(USE_CDF5, [test x$enable_cdf5 = xyes ])
AM_CONDITIONAL(ENABLE_CDF5, [test x$enable_cdf5 = xyes ])

# Does the user want to use the ffio module?
AC_MSG_CHECKING([whether FFIO will be used])
AC_ARG_ENABLE([ffio],
Expand Down Expand Up @@ -882,8 +866,32 @@ $SLEEPCMD
AC_CHECK_SIZEOF(size_t)
$SLEEPCMD
AC_CHECK_SIZEOF(unsigned long long)
$SLEEPCMD
AC_CHECK_SIZEOF(unsigned long long)

# Check whether we want to enable CDF5 support.
AC_MSG_CHECKING([whether CDF5 support should be enabled])
AC_ARG_ENABLE([cdf5],
[AS_HELP_STRING([--disable-cdf5],
[build without CDF5 support.])],
[enable_cdf5=${enableval}], [enable_cdf5=auto]
)
if test "x${enable_cdf5}" = xyes && test "$ac_cv_sizeof_size_t" -lt "8" ; then
dnl unable to support CDF5, but --enable-cdf5 is explicitly set
AC_MSG_ERROR([Unable to support CDF5 feature because size_t is less than 4 bytes])
fi
if test "$ac_cv_sizeof_size_t" -lt "8" ; then
enable_cdf5=no
else
enable_cdf5=yes
fi
AC_MSG_RESULT($enable_cdf5)

if test "x${enable_cdf5}" = xyes; then
AC_DEFINE([USE_CDF5], [1], [if true, enable CDF5 Support])
AC_DEFINE([ENABLE_CDF5], [1], [if true, enable CDF5 Support])
fi

AM_CONDITIONAL(USE_CDF5, [test x$enable_cdf5 = xyes ])
AM_CONDITIONAL(ENABLE_CDF5, [test x$enable_cdf5 = xyes ])

$SLEEPCMD
if test "$ac_cv_type_uchar" = yes ; then
Expand Down Expand Up @@ -1371,7 +1379,7 @@ AC_SUBST(HAS_DISKLESS,[$enable_diskless])
AC_SUBST(HAS_MMAP,[$enable_mmap])
AC_SUBST(HAS_JNA,[$enable_jna])
AC_SUBST(RELAX_COORD_BOUND,[$enable_relax_coord_bound])
AC_SUBST(ENABLE_ERANGE_FILL,[$enable_erange_fill])
AC_SUBST(HAS_ERANGE_FILL,[$enable_erange_fill])

# Include some specifics for netcdf on windows.
#AH_VERBATIM([_WIN32_STRICMP],
Expand Down
3 changes: 3 additions & 0 deletions include/nc3internal.h
Expand Up @@ -428,6 +428,9 @@ nc_put_rec(int ncid, size_t recnum, void *const *datap);
extern int
NC_check_vlens(NC3_INFO *ncp);

extern int
NC_check_voffs(NC3_INFO *ncp);

/* Define accessors for the dispatchdata */
#define NC3_DATA(nc) ((NC3_INFO*)(nc)->dispatchdata)
#define NC3_DATA_SET(nc,data) ((nc)->dispatchdata = (void*)(data))
Expand Down
99 changes: 79 additions & 20 deletions libsrc/nc3internal.c
Expand Up @@ -312,21 +312,23 @@ fprintf(stderr, " REC %d %s: %ld\n", ii, (*vpp)->name->cp, (long)index);
return NC_EVARSIZE;
}
#endif
if((*vpp)->len != UINT32_MAX) /* flag for vars >= 2**32 bytes */
ncp->recsize += (*vpp)->len;
ncp->recsize += (*vpp)->len;
last = (*vpp);
}

/*
* for special case of
*/
if(last != NULL) {
if(ncp->recsize == last->len) { /* exactly one record variable, pack value */
ncp->recsize = *last->dsizes * last->xsz;
} else if(last->len == UINT32_MAX) { /* huge last record variable */
ncp->recsize += *last->dsizes * last->xsz;
}
}
/*
* for special case (Check CDF-1 and CDF-2 file format specifications.)
* "A special case: Where there is exactly one record variable, we drop the
* requirement that each record be four-byte aligned, so in this case there
* is no record padding."
*/
if (last != NULL) {
if (ncp->recsize == last->len) {
/* exactly one record variable, pack value */
ncp->recsize = *last->dsizes * last->xsz;
}
}

if(NC_IsNew(ncp))
NC_set_numrecs(ncp, 0);
return NC_NOERR;
Expand Down Expand Up @@ -709,17 +711,14 @@ NC_check_vlens(NC3_INFO *ncp)
if(ncp->vars.nelems == 0)
return NC_NOERR;

if (fIsSet(ncp->flags,NC_64BIT_DATA)) {
/* CDF5 format allows many large vars */
return NC_NOERR;
}
if (fIsSet(ncp->flags,NC_64BIT_OFFSET) && sizeof(off_t) > 4) {
if (fIsSet(ncp->flags,NC_64BIT_DATA)) /* CDF-5 */
vlen_max = X_INT64_MAX - 3; /* "- 3" handles rounded-up size */
else if (fIsSet(ncp->flags,NC_64BIT_OFFSET) && sizeof(off_t) > 4)
/* CDF2 format and LFS */
vlen_max = X_UINT_MAX - 3; /* "- 3" handles rounded-up size */
} else {
/* CDF1 format */
else /* CDF1 format */
vlen_max = X_INT_MAX - 3;
}

/* Loop through vars, first pass is for non-record variables. */
large_vars_count = 0;
rec_vars_count = 0;
Expand All @@ -728,6 +727,8 @@ NC_check_vlens(NC3_INFO *ncp)
if( !IS_RECVAR(*vpp) ) {
last = 0;
if( NC_check_vlen(*vpp, vlen_max) == 0 ) {
if (fIsSet(ncp->flags,NC_64BIT_DATA)) /* too big for CDF-5 */
return NC_EVARSIZE;
large_vars_count++;
last = 1;
}
Expand Down Expand Up @@ -756,6 +757,8 @@ NC_check_vlens(NC3_INFO *ncp)
if( IS_RECVAR(*vpp) ) {
last = 0;
if( NC_check_vlen(*vpp, vlen_max) == 0 ) {
if (fIsSet(ncp->flags,NC_64BIT_DATA)) /* too big for CDF-5 */
return NC_EVARSIZE;
large_vars_count++;
last = 1;
}
Expand All @@ -774,6 +777,59 @@ NC_check_vlens(NC3_INFO *ncp)
return NC_NOERR;
}

/*----< NC_check_voffs() >---------------------------------------------------*/
/*
* Given a valid ncp, check whether the file starting offsets (begin) of all
* variables follows the same increasing order as they were defined.
*/
int
NC_check_voffs(NC3_INFO *ncp)
{
size_t i;
off_t prev_off;
NC_var *varp;

if (ncp->vars.nelems == 0) return NC_NOERR;

/* Loop through vars, first pass is for non-record variables */
prev_off = ncp->begin_var;
for (i=0; i<ncp->vars.nelems; i++) {
varp = ncp->vars.value[i];
if (IS_RECVAR(varp)) continue;

if (varp->begin < prev_off) {
#if 0
fprintf(stderr,"Variable \"%s\" begin offset (%lld) is less than previous variable end offset (%lld)\n", varp->name->cp, varp->begin, prev_off);
#endif
return NC_ENOTNC;
}
prev_off = varp->begin + varp->len;
}

if (ncp->begin_rec < prev_off) {
#if 0
fprintf(stderr,"Record variable section begin offset (%lld) is less than fix-sized variable section end offset (%lld)\n", varp->begin, prev_off);
#endif
return NC_ENOTNC;
}

/* Loop through vars, second pass is for record variables */
prev_off = ncp->begin_rec;
for (i=0; i<ncp->vars.nelems; i++) {
varp = ncp->vars.value[i];
if (!IS_RECVAR(varp)) continue;

if (varp->begin < prev_off) {
#if 0
fprintf(stderr,"Variable \"%s\" begin offset (%lld) is less than previous variable end offset (%lld)\n", varp->name->cp, varp->begin, prev_off);
#endif
return NC_ENOTNC;
}
prev_off = varp->begin + varp->len;
}

return NC_NOERR;
}

/*
* End define mode.
Expand All @@ -794,6 +850,9 @@ NC_endef(NC3_INFO *ncp,
if(status != NC_NOERR)
return status;
status = NC_begins(ncp, h_minfree, v_align, v_minfree, r_align);
if(status != NC_NOERR)
return status;
status = NC_check_voffs(ncp);
if(status != NC_NOERR)
return status;

Expand Down
26 changes: 18 additions & 8 deletions libsrc/v1hpg.c
Expand Up @@ -939,6 +939,7 @@ static int
v1h_put_NC_var(v1hs *psp, const NC_var *varp)
{
int status;
size_t vsize;

status = v1h_put_NC_string(psp, varp->name);
if(status != NC_NOERR)
Expand Down Expand Up @@ -975,9 +976,19 @@ v1h_put_NC_var(v1hs *psp, const NC_var *varp)
if(status != NC_NOERR)
return status;

status = v1h_put_size_t(psp, &varp->len);
if(status != NC_NOERR)
return status;
/* write vsize to header.
* CDF format specification: The vsize field is actually redundant, because
* its value may be computed from other information in the header. The
* 32-bit vsize field is not large enough to contain the size of variables
* that require more than 2^32 - 4 bytes, so 2^32 - 1 is used in the vsize
* field for such variables.
*/
vsize = varp->len;
if (varp->len > 4294967292UL && (psp->version == NC_FORMAT_CLASSIC ||
psp->version == NC_FORMAT_64BIT_OFFSET))
vsize = 4294967295UL; /* 2^32-1 */
status = v1h_put_size_t(psp, &vsize);
if(status != NC_NOERR) return status;

status = check_v1hs(psp, psp->version == 1 ? 4 : 8); /*begin*/
if(status != NC_NOERR)
Expand Down Expand Up @@ -1231,11 +1242,6 @@ NC_computeshapes(NC3_INFO* ncp)
{
if(first_rec == NULL)
first_rec = *vpp;
if((*vpp)->len == UINT32_MAX &&
(fIsSet(ncp->flags, NC_64BIT_OFFSET) ||
fIsSet(ncp->flags, NC_64BIT_DATA))) /* Flag for large last record */
ncp->recsize += (*vpp)->dsizes[0] * (*vpp)->xsz;
else
ncp->recsize += (*vpp)->len;
}
else
Expand Down Expand Up @@ -1538,6 +1544,10 @@ nc_get_NC(NC3_INFO* ncp)
if(status != NC_NOERR)
goto unwind_get;

status = NC_check_voffs(ncp);
if(status != NC_NOERR)
goto unwind_get;

unwind_get:
(void) rel_v1hs(&gs);
return status;
Expand Down
38 changes: 15 additions & 23 deletions libsrc/var.c
Expand Up @@ -485,29 +485,21 @@ NC_var_shape(NC_var *varp, const NC_dimarray *dims)


out :
if( varp->xsz <= (X_UINT_MAX - 1) / product ) /* if integer multiply will not overflow */
{
varp->len = product * varp->xsz;
switch(varp->type) {
case NC_BYTE :
case NC_CHAR :
case NC_UBYTE :
case NC_SHORT :
case NC_USHORT :
if( varp->len%4 != 0 )
{
varp->len += 4 - varp->len%4; /* round up */
/* *dsp += 4 - *dsp%4; */
}
break;
default:
/* already aligned */
break;
}
} else
{ /* OK for last var to be "too big", indicated by this special len */
varp->len = X_UINT_MAX;
}

/* No variable size can be > X_INT64_MAX - 3 */
if (0 == NC_check_vlen(varp, X_INT64_MAX-3)) return NC_EVARSIZE;

/*
* For CDF-1 and CDF-2 formats, the total number of array elements
* cannot exceed 2^32, unless this variable is the last fixed-size
* variable, there is no record variable, and the file starting
* offset of this variable is less than 2GiB.
* This will be checked in NC_check_vlens() during NC_endef()
*/
varp->len = product * varp->xsz;
if (varp->len % 4 > 0)
varp->len += 4 - varp->len % 4; /* round up */

#if 0
arrayp("\tshape", varp->ndims, varp->shape);
arrayp("\tdsizes", varp->ndims, varp->dsizes);
Expand Down
16 changes: 14 additions & 2 deletions nc_test/Makefile.am
Expand Up @@ -12,7 +12,6 @@ AM_CPPFLAGS += -DTOPBINDIR=${abs_top_bindir}
LDADD = ${top_builddir}/liblib/libnetcdf.la
AM_CPPFLAGS += -I$(top_builddir)/liblib -I$(top_builddir)/include -I$(top_srcdir)/libsrc


# Note which tests depend on other tests. necessary for make -j check
TEST_EXTENSIONS = .sh

Expand Down Expand Up @@ -46,7 +45,7 @@ test_write.c util.c error.h tests.h
# If the user asked for large file tests, then add them.
if LARGE_FILE_TESTS
TESTPROGRAMS += quick_large_files tst_big_var6 tst_big_var2 \
tst_big_rvar tst_big_var tst_large large_files tst_large_cdf5
tst_big_rvar tst_big_var tst_large large_files
endif # LARGE_FILE_TESTS

if BUILD_BENCHMARKS
Expand Down Expand Up @@ -111,6 +110,19 @@ tst_formatx.nc nc_test_cdf5.nc unlim.nc tst_inq_type.nc \
tst_elatefill.nc tst_global_fillval.nc tst_large_cdf5.nc \
tst_max_var_dims.nc benchmark.nc tst_def_var_fill.nc

EXTRA_DIST += bad_cdf5_begin.nc run_cdf5.sh
if ENABLE_CDF5
# bad_cdf5_begin.nc is a corrupted CDF-5 file with bad variable starting
# file offsets. It is to be used by tst_open_cdf5.c to check if it can
# detect and report error code NC_ENOTNC.
TESTS += run_cdf5.sh
check_PROGRAMS += tst_open_cdf5
if LARGE_FILE_TESTS
TESTPROGRAMS += tst_large_cdf5 tst_cdf5_begin
CLEANFILES += tst_large_cdf5.nc tst_cdf5_begin.nc
endif
endif

# Only clean these on maintainer-clean, because they require m4 to
# regenerate.
#MAINTAINERCLEANFILES = test_get.c test_put.c
Expand Down
Binary file added nc_test/bad_cdf5_begin.nc
Binary file not shown.
6 changes: 6 additions & 0 deletions nc_test/run_cdf5.sh
@@ -0,0 +1,6 @@
#!/bin/sh

set -e

./tst_open_cdf5 ${srcdir}/bad_cdf5_begin.nc

0 comments on commit b4a2947

Please sign in to comment.