Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve provenance information #1124

Merged
merged 5 commits into from
Sep 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ SET(CHUNK_CACHE_PREEMPTION 0.75 CACHE STRING "Default file chunk cache preemptio
SET(MAX_DEFAULT_CACHE_SIZE 67108864 CACHE STRING "Default maximum cache size.")
SET(NETCDF_LIB_NAME "" CACHE STRING "Default name of the netcdf library.")
SET(TEMP_LARGE "." CACHE STRING "Where to put large temp files if large file tests are run.")
SET(NCPROPERTIES_EXTRA "" CACHE STRNG "Specify extra pairs for _NCProperties.")
SET(MPIEXEC "mpiexec" CACHE STRING "Command to run MPI programs if parallel tests are run.")

IF(NOT NETCDF_LIB_NAME STREQUAL "")
Expand Down
2 changes: 1 addition & 1 deletion RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ This file contains a high-level description of this package's evolution. Release

## 4.7.0 - TBD


* [Enhancement] Create a new version of _NCProperties provenance attribute. This version (version 2) supports arbitrary key-value pairs. It is the default when new files are created. Version 1 continues to be accepted.
* [Enhancement] Allow user to set http read buffersize for DAP2 and DAP4 using the tag HTTP.READ.BUFFERSIZE in the .daprc file.
* [Enhancement] Allow user to set http keepalive for DAP2 and DAP4 using the tag HTTP.KEEPALIVE in the .daprc file (see the OPeNDAP documentation for details).
* [Enhancement] Support DAP4 remote tests using a new remote test server locatedon the Unidata JetStream project.
Expand Down
14 changes: 4 additions & 10 deletions cf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
DB=1
#X=-x

#FAST=1
FAST=1
#PROF=1

HDF5=1
Expand Down Expand Up @@ -120,6 +120,7 @@ FLAGS="$FLAGS --enable-logging"
#FLAGS="$FLAGS --disable-silent-rules"
#FLAGS="$FLAGS --disable-filter-testing"
#FLAGS="$FLAGS --enable-metadata-perf"
#FLAGS="$FLAGS --with-ncproperties-extra=key1=value1,key2=value2"

if test "x$TESTSERVERS" != x ; then
FLAGS="$FLAGS --with-testservers=$TESTSERVERS"
Expand Down Expand Up @@ -184,9 +185,6 @@ CFLAGS="${CFLAGS} -pg"
LDFLAGS="${LDFLAGS} -pg"
fi


#FLAGS="${FLAGS} --enable-stdio"

export PATH
export CC
export CPPFLAGS
Expand All @@ -199,17 +197,13 @@ export CXXFLAGS
DISTCHECK_CONFIGURE_FLAGS="$FLAGS"
export DISTCHECK_CONFIGURE_FLAGS

if test "x$NB" != x -o "x$FAST" = x ; then
if test "x$NB" != x1 && test "x$FAST" != x1 ; then
${MAKE} distclean >/dev/null 2>&1
fi
if test -z "$NB" ; then
if test "x$NB" != x1 ; then
if autoreconf -i --force ; then ok=1; else exit ; fi
fi

if test -z "$FAST" ; then
if test -f Makefile ; then ${MAKE} distclean >/dev/null 2>&1 ; fi
fi

sh $X ./configure ${FLAGS}
for c in $cmds; do
printenv LD_LIBRARY_PATH
Expand Down
7 changes: 6 additions & 1 deletion cf.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ DAP=1
#CDF5=1
#HDF4=1

export SETX=1

for arg in "$@" ; do
case "$arg" in
vs|VS) VS=1 ;;
Expand Down Expand Up @@ -60,14 +62,17 @@ FLAGS="$FLAGS -DENABLE_DAP_REMOTE_TESTS=true"
FLAGS="$FLAGS -DENABLE_LOGGING=true"
#FLAGS="$FLAGS -DENABLE_DOXYGEN=true -DENABLE_INTERNAL_DOCS=true"
#FLAGS="$FLAGS -DENABLE_LARGE_FILE_TESTS=true"
FLAGS="$FLAGS -DENABLE_FILTER_TESTING=true"
#FLAGS="$FLAGS -DENABLE_FILTER_TESTING=true"

# Disables
FLAGS="$FLAGS -DENABLE_EXAMPLES=false"
FLAGS="$FLAGS -DENABLE_CONVERSION_WARNINGS=false"
#FLAGS="$FLAGS -DENABLE_TESTS=false"
#FLAGS="$FLAGS -DENABLE_DISKLESS=false"

# Withs
FLAGS="$FLAGS -DNCPROPERTIES_EXTRA=\"key1=value1|key2=value2\""

rm -fr build
mkdir build
cd build
Expand Down
3 changes: 3 additions & 0 deletions config.h.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,9 @@ are set when opening a binary file on Windows. */
/* min blocksize for posixio. */
#cmakedefine NCIO_MINBLOCKSIZE ${NCIO_MINBLOCKSIZE}

/* Add extra properties to _NCProperties attribute */
#cmakedefine NCPROPERTIES_EXTRA ${NCPROPERTIES_EXTRA}

/* no IEEE float on this platform */
#cmakedefine NO_IEEE_FLOAT 1

Expand Down
12 changes: 12 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,18 @@ AC_MSG_RESULT($TEMP_LARGE)
#AC_SUBST(TEMP_LARGE)
AC_DEFINE_UNQUOTED([TEMP_LARGE], ["$TEMP_LARGE"], [Place to put very large netCDF test files.])

# Specify extra values to add to _NCProperties attribute
# --with-ncproperties-extra="<name>=<value>|...".
# Note: need to figure out a way to do this programmatically also
AC_MSG_CHECKING([Extra values for _NCProperties])
AC_ARG_WITH([ncproperties-extra],
[AS_HELP_STRING([--with-ncproperties-extra="<name>=<value>,...],
[specify extra pairs for _NCProperties])],
[NCPROPERTIES_EXTRA=$with_ncproperties_extra],
[NCPROPERTIES_EXTRA=""])
AC_MSG_RESULT([$NCPROPERTIES_EXTRA])
AC_DEFINE_UNQUOTED([NCPROPERTIES_EXTRA], ["$NCPROPERTIES_EXTRA"], [Extra pairs for _NCProperties])

# Did the user specify a user-defined format 0?
AC_MSG_CHECKING([whether user-defined format 0 was specified])
AC_ARG_WITH([udf0],
Expand Down
28 changes: 17 additions & 11 deletions docs/attribute_conventions.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,18 +161,24 @@ Using the following API calls will fail.

`_NCProperties`

> This attribute is persistent in the file, but hidden. It is inserted in the file at creation time and is never modified after that point. It specifies the following.
> - The version for the netcdf library used at creation time.
> - The version for the HDF5 library used at creation time.
> - The type of this attribute is NC_CHAR.

> Its format is: `name=value|name=value ...`<br>
> Occurrences of '|' in the name or value are disallowed.
> This attribute is persistent in the file, but hidden. It is inserted in the file at creation time and is never modified after that point. The type of this attribute is currently NC_CHAR. There two versions of this property, but both have the general form
>> version=n,key=value,key=value...,key=value
> where the version number n is either 1 or 2.

> The current set of known names is as follows.
> - version=... The current format version for the _NCProperties file, currently 1.
> - netcdflibversion=... The version of the netcdf library used to create the file. The value is, for example, 4.4.1-rc1-development or 4.4.1.
> - hdf5libversion=... The version of the HDF5 library used to create the file. The value is, for example, 1.8.16 or 1.10.0.
> Version 1 has two (key,value) pairs (after than the initial version pair)
> - netcdfversion={netcdfversion} where the version number is the version for the netcdf library used at creation time.
> - hdf5version={hdf5fversion} where the version number is the version for the hdf5 library used at creation time.
> - The version for the HDF5 library used at creation time.
> - Note that for version 1, the separator character is '|' instead of ','
> - Occurrences of '=' or '|' in the name or value are disallowed.

> Version 2 has an unlimited set of (key,value) pairs (after the initial version pair). By convention (but unenforced), the first pair is the name and version of the primary library used to create this file. For netcdf, it has the form _netcdf={version}_. The remaining fields are by convention as follows.
> - If the primary build library is netcdf, then the second pair is of the form _hdf5={version}. The remaining pairs consist of a combination of the name and version of important supporting libraries -- the libcurl version, for example -- plus an arbitrary set of pairs as specified by the _--with-ncproperties_ option to the ./configure command. Note that the argument to --with-ncproperties should be wrapped with double quotes, like this.
>> _./configure ... --with-ncproperties="key1=value,key2=value"_
>
> Note the following for version 2.
> - The pair separator character was changed from '|' to ',' because of problems with bash, which did not like '|' in the --with-ncproperties value.
> - It is possible to include escaped characters using the standard '\' escape convention.

`_SuperblockVersion`

Expand Down
6 changes: 6 additions & 0 deletions include/hdf5internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,15 @@ typedef struct NC_HDF5_FILE_INFO
int rec_detach_scales(NC_GRP_INFO_T *grp, int dimid, hid_t dimscaleid);
int rec_reattach_scales(NC_GRP_INFO_T *grp, int dimid, hid_t dimscaleid);


/* Used by NC4_set_provenance */
int nc4_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type,
size_t len, const void *data, nc_type mem_type, int force);

/* In-memory functions */
extern hid_t NC4_image_init(NC_FILE_INFO_T* h5);
extern void NC4_image_finalize(void*);

/* These functions are internal to the libhdf5 directory. */
int nc4_detect_preserve_dimids(NC_GRP_INFO_T *grp, nc_bool_t *bad_coord_orderp);
int hdf5_set_log_level();
Expand Down
77 changes: 57 additions & 20 deletions include/nc4internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP} NC_SORT;
typedef enum {NC_FALSE = 0, NC_TRUE = 1} nc_bool_t;

/*Forward*/
struct NCFILEINFO;
struct NCPROVENANCE;
struct NC_GRP_INFO;
struct NC_TYPE_INFO;

Expand Down Expand Up @@ -299,7 +299,7 @@ typedef struct NC_FILE_INFO
NClist* alltypes;
NClist* allgroups; /* including root group */
void *format_file_info;
struct NCFILEINFO* fileinfo;
struct NCPROVENANCE* provenance;
struct NC4_Memio {
NC_memio memio; /* What we sent to image_init and what comes back*/
int locked; /* do not copy and do not free */
Expand Down Expand Up @@ -441,8 +441,10 @@ typedef struct NC_reservedatt {
#define DIMSCALEFLAG 1
/* Readonly global attributes; readable, but immutable thru the API */
#define READONLYFLAG 2
/* Subset of readonly flags; readable by name only thru the API*/
/* Subset of readonly flags; readable by name only thru the API */
#define NAMEONLYFLAG 4
/* Subset of readonly flags; Value is actually in file */
#define MATERIALIZEDFLAG 8

/* Binary searcher for reserved attributes */
extern const NC_reservedatt* NC_findreserved(const char* name);
Expand All @@ -465,38 +467,73 @@ For netcdf4 files, capture state information about the following:
5. Per file: _NCProperties attribute
*/

#define NCPROPS "_NCProperties"
#define NCPROPS_VERSION (1)
#define NCPROPSSEP '|'
/* Most of this needs to be moved to hdf5internal.h */

/* Currently used properties */
#define NCPROPS "_NCProperties"
#define NCPVERSION "version" /* Of the properties format */
#define NCPHDF5LIBVERSION "hdf5libversion"
#define NCPNCLIBVERSION "netcdflibversion"
#define NCPHDF5LIB1 "hdf5libversion"
#define NCPNCLIB1 "netcdflibversion"
#define NCPHDF5LIB2 "hdf5"
#define NCPNCLIB2 "netcdf"
#define NCPROPS_VERSION (2)
/* Version 2 changes this because '|' was causing bash problems */
#define NCPROPSSEP1 '|'
#define NCPROPSSEP2 ','


/* Other hidden attributes */
#define ISNETCDF4ATT "_IsNetcdf4"
#define SUPERBLOCKATT "_SuperblockVersion"

struct NCFILEINFO {
struct NCPROVENANCE {
int superblockversion;
/* Following is filled from NCPROPS attribute or from global version */
struct NCPROPINFO {
int version; /* 0 => not defined */
char hdf5ver[NC_MAX_NAME+1];
char netcdfver[NC_MAX_NAME+1];
} propattr;
struct NCPROPINFO {
int version; /* 0 => not defined */
/* Following is filled from NCPROPS attribute or from global version */
/* Version 1 format is:
"netcdflibversion=<version|hdf5libversion=<version>"
Version 2 format is:
"<mainbuildlib>=<version|<supportlib1>=<version>...|<other>=..."
*/
/* The _NCProperties values are stored as an arbitrary
set of (key,value) pairs */
/* It is assumed that the first entry is the primary library
used to build the file, and it is followed by other libraries
used in the build, and finally an arbitrary list of other
(key,value) pairs. */
NClist* properties;
} propattr;
};

/* Provenance Initialization */
extern struct NCPROPINFO globalpropinfo;

extern int NC4_fileinfo_init(void); /*libsrc4/ncinfo.c*/
extern int NC4_get_fileinfo(struct NC_FILE_INFO* info, struct NCPROPINFO*); /*libsrc4/ncinfo.c*/
extern int NC4_put_propattr(struct NC_FILE_INFO* info); /*libsrc4/ncinfo.c*/
extern int NC4_buildpropinfo(struct NCPROPINFO* info,char** propdatap);
/* Initialize the fileinfo global state */
extern int NC4_provenance_init();

/* Write the properties attribute to file. */
extern int NC4_put_ncproperties(NC_FILE_INFO_T* file);

/* Extract the provenance from a file, using dfalt as default */
extern int NC4_get_provenance(NC_FILE_INFO_T* file, const char* propstring, const struct NCPROPINFO* dfalt);

/* Set the provenance for a created file using dfalt as default */
extern int NC4_set_provenance(NC_FILE_INFO_T* file, const struct NCPROPINFO* dfalt);

/* Recover memory of an NCPROVENANCE object */
extern int NC4_free_provenance(struct NCPROVENANCE* prov);

extern int NC4_hdf5get_libversion(unsigned*,unsigned*,unsigned*);/*libsrc4/nc4hdf.c*/
extern int NC4_hdf5get_superblock(struct NC_FILE_INFO*, int*);/*libsrc4/nc4hdf.c*/
extern int NC4_isnetcdf4(struct NC_FILE_INFO*); /*libsrc4/nc4hdf.c*/

/* Convert a NCPROPINFO instance to a single string. */
extern int NC4_buildpropinfo(struct NCPROPINFO* info, char** propdatap);

/* Use HDF5 API to read the _NCProperties attribute */
extern int NC4_read_ncproperties(NC_FILE_INFO_T*);

/* Use HDF5 API to write the _NCProperties attribute */
extern int NC4_write_ncproperties(NC_FILE_INFO_T*);

#endif /* _NC4INTERNAL_ */
Loading