-
Notifications
You must be signed in to change notification settings - Fork 261
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
enable compact storage for netcdf-4 vars #1570
Changes from 8 commits
82df287
bb1f5e1
1a665b6
1a1f537
8599484
06896f4
89b8981
e43a5d9
90324df
66a2b4c
e75c248
fb4a209
19fef32
fd604dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,9 @@ | |
* order. */ | ||
#define NC_TEMP_NAME "_netcdf4_temporary_variable_name_for_rename" | ||
|
||
/** Number of bytes in 64 MB. */ | ||
#define SIXTY_FOUR_MB (67108864) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The limit for a compact data set is 64 KiB, not 64 MiB There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I will fix. |
||
#ifdef LOGGING | ||
/** | ||
* Report the chunksizes selected for a variable. | ||
|
@@ -707,41 +710,64 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *deflate, | |
var->contiguous = NC_FALSE; | ||
} | ||
|
||
/* Does the user want a contiguous dataset? Not so fast! Make sure | ||
* that there are no unlimited dimensions, and no filters in use | ||
* for this data. */ | ||
if (contiguous && *contiguous) | ||
/* Handle storage settings. */ | ||
if (contiguous) | ||
{ | ||
if (var->deflate || var->fletcher32 || var->shuffle) | ||
return NC_EINVAL; | ||
|
||
for (d = 0; d < var->ndims; d++) | ||
if (var->dim[d]->unlimited) | ||
/* Does the user want a contiguous or compact dataset? Not so | ||
* fast! Make sure that there are no unlimited dimensions, and | ||
* no filters in use for this data. */ | ||
if (*contiguous) | ||
{ | ||
if (var->deflate || var->fletcher32 || var->shuffle) | ||
return NC_EINVAL; | ||
var->contiguous = NC_TRUE; | ||
} | ||
|
||
/* Chunksizes anyone? */ | ||
if (contiguous && *contiguous == NC_CHUNKED) | ||
{ | ||
var->contiguous = NC_FALSE; | ||
for (d = 0; d < var->ndims; d++) | ||
if (var->dim[d]->unlimited) | ||
return NC_EINVAL; | ||
} | ||
|
||
/* If the user provided chunksizes, check that they are not too | ||
* big, and that their total size of chunk is less than 4 GB. */ | ||
if (chunksizes) | ||
/* Handle chunked storage settings. */ | ||
if (*contiguous == NC_CHUNKED) | ||
{ | ||
var->contiguous = NC_FALSE; | ||
|
||
if ((retval = check_chunksizes(grp, var, chunksizes))) | ||
return retval; | ||
/* If the user provided chunksizes, check that they are not too | ||
* big, and that their total size of chunk is less than 4 GB. */ | ||
if (chunksizes) | ||
{ | ||
/* Check the chunksizes for validity. */ | ||
if ((retval = check_chunksizes(grp, var, chunksizes))) | ||
return retval; | ||
|
||
/* Ensure chunksize is smaller than dimension size */ | ||
for (d = 0; d < var->ndims; d++) | ||
if(!var->dim[d]->unlimited && var->dim[d]->len > 0 && chunksizes[d] > var->dim[d]->len) | ||
return NC_EBADCHUNK; | ||
/* Ensure chunksize is smaller than dimension size */ | ||
for (d = 0; d < var->ndims; d++) | ||
if (!var->dim[d]->unlimited && var->dim[d]->len > 0 && | ||
chunksizes[d] > var->dim[d]->len) | ||
return NC_EBADCHUNK; | ||
|
||
/* Set the chunksizes for this variable. */ | ||
/* Set the chunksizes for this variable. */ | ||
for (d = 0; d < var->ndims; d++) | ||
var->chunksizes[d] = chunksizes[d]; | ||
} | ||
} | ||
else if (*contiguous == NC_CONTIGUOUS) | ||
{ | ||
var->contiguous = NC_TRUE; | ||
} | ||
else if (*contiguous == NC_COMPACT) | ||
{ | ||
size_t ndata = 1; | ||
|
||
/* Ensure that total var is < 64 MB. */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another 'kb not mb', I can fix downstream easily enough. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! I will fix... |
||
for (d = 0; d < var->ndims; d++) | ||
var->chunksizes[d] = chunksizes[d]; | ||
ndata *= var->dim[d]->len; | ||
|
||
/* Ensure var is small enough to fit in compact storage. */ | ||
if (ndata * var->type_info->size > SIXTY_FOUR_MB) | ||
return NC_EINVAL; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this error out if the dataset is too large, or should it fall back to NC_CONTIGUOUS. In CGNS, we fall back instead of erroring out; not sure which is best behavior. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the netCDF API, when you ask for something specifically, and you can't have it, we give you an error. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense. In CGNS the library was in control and not the client. Would There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes NC_VARSIZE is a much better choice. I will change to that. |
||
|
||
var->contiguous = NC_FALSE; | ||
var->compact = NC_TRUE; | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if this matters in C, but if this were C++, it would save space in the struct (due to alignment concerns) to have all the
nc_bool_t
together instead of having theint parallel_access
in between. I haven't measured whether it makes any difference in C.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like the
nc_bool_t
size is 4 bytes which is the same size as an int, so intermingling int and nc_bool_t doesn't change the size of the struct in this case. If thenc_bool_t
were changed to the stdbool-definedbool
, the size of the struct would drop by 40-bytes (something for the future...)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nc_bool_t is just an int. If it ever changes, then whoever makes the change will be in charge of making the correct change. I don't generally code based on this kind of thinking. I can only code correctly, and hope that future netCDF programmers will do the same. ;-)