Permalink
Browse files

Merge pull request #509 from terrelln/dict-builder-32

Handle cover dictionary builder maximum input size for 32-bit mode
  • Loading branch information...
2 parents 5695850 + 8d98469 commit b8cdc169692137b2fdfbe4e98c66b433dd67c01f @Cyan4973 Cyan4973 committed on GitHub Jan 10, 2017
Showing with 9 additions and 4 deletions.
  1. +5 −3 lib/dictBuilder/cover.c
  2. +2 −0 lib/dictBuilder/zdict.h
  3. +2 −1 programs/dibio.c
@@ -28,7 +28,7 @@
/*-*************************************
* Constants
***************************************/
-#define COVER_MAX_SAMPLES_SIZE ((U32)-1)
+#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
/*-*************************************
* Console display
@@ -500,7 +500,9 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
/* Checks */
if (totalSamplesSize < d ||
- totalSamplesSize > (size_t)COVER_MAX_SAMPLES_SIZE) {
+ totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
+ DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
+ (COVER_MAX_SAMPLES_SIZE >> 20));
return 0;
}
/* Zero the context */
@@ -518,6 +520,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
/* The offsets of each file */
ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
+ DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
COVER_ctx_destroy(ctx);
return 0;
}
@@ -651,7 +654,6 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
/* Initialize context and activeDmers */
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
parameters.d)) {
- DISPLAYLEVEL(1, "Failed to initialize context\n");
return ERROR(GENERIC);
}
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
@@ -108,6 +108,7 @@ typedef struct {
The resulting dictionary will be saved into `dictBuffer`.
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
or an error code, which can be tested with ZDICT_isError().
+ Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
In general, it's recommended to provide a few thousands samples, but this can vary a lot.
@@ -131,6 +132,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
or an error code, which can be tested with ZDICT_isError().
On success `*parameters` contains the parameters selected.
+ Note : COVER_optimizeTrainFromBuffer() requires about 9 bytes of memory for each input byte.
*/
ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
View
@@ -235,7 +235,8 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
void* const dictBuffer = malloc(maxDictSize);
size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
unsigned long long const totalSizeToLoad = DiB_getTotalCappedFileSize(fileNamesTable, nbFiles);
- size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT;
+ size_t const memMult = params ? MEMMULT : COVER_MEMMULT;
+ size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * memMult) / memMult;
size_t benchedSize = (size_t) MIN ((unsigned long long)maxMem, totalSizeToLoad);
void* const srcBuffer = malloc(benchedSize+NOISELENGTH);
int result = 0;

0 comments on commit b8cdc16

Please sign in to comment.