Skip to content

Commit

Permalink
Increased convert2bed intermediate buffer size and sort-bed stack all…
Browse files Browse the repository at this point in the history
…ocation for converting and sorting very-long reads from Nanopore inputs
  • Loading branch information
alexpreynolds committed May 4, 2017
1 parent 08884fa commit d45974c
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 5 deletions.
6 changes: 3 additions & 3 deletions applications/bed/conversion/src/convert2bed.c
Expand Up @@ -4985,7 +4985,7 @@ c2b_process_intermediate_bytes_by_lines(void *arg)
c2b_pipeline_stage_t *stage = (c2b_pipeline_stage_t *) arg;
c2b_pipeset_t *pipes = stage->pipeset;
char *src_buffer = NULL;
ssize_t src_buffer_size = C2B_MAX_LONGER_LINE_LENGTH_VALUE;
ssize_t src_buffer_size = C2B_THREAD_IO_BUFFER_SIZE;
ssize_t src_bytes_read = 0;
ssize_t remainder_length = 0;
ssize_t remainder_offset = 0;
Expand All @@ -4994,7 +4994,7 @@ c2b_process_intermediate_bytes_by_lines(void *arg)
ssize_t start_offset = 0;
ssize_t end_offset = 0;
char *dest_buffer = NULL;
ssize_t dest_buffer_size = C2B_MAX_LONGER_LINE_LENGTH_VALUE * C2B_MAX_LINES_VALUE;
ssize_t dest_buffer_size = C2B_THREAD_IO_BUFFER_SIZE * C2B_MAX_LINES_VALUE;
ssize_t dest_bytes_written = 0;
void (*line_functor)(char *, ssize_t *, char *, ssize_t) = stage->line_functor;
int exit_status = 0;
Expand Down Expand Up @@ -5078,7 +5078,7 @@ c2b_process_intermediate_bytes_by_lines(void *arg)

if (remainder_offset == -1) {
if (src_bytes_read + remainder_length == src_buffer_size) {
fprintf(stderr, "Error: Could not find newline in intermediate buffer; check input\n");
fprintf(stderr, "Error: Could not find newline in intermediate buffer; check input [%zu | %zu | %zu]\n", (unsigned long) src_bytes_read, (unsigned long) remainder_length, (unsigned long) src_buffer_size);
c2b_print_usage(stderr);
exit(EINVAL); /* Invalid argument (POSIX.1) */
}
Expand Down
16 changes: 14 additions & 2 deletions interfaces/general-headers/suite/BEDOPS.Constants.hpp
Expand Up @@ -26,12 +26,24 @@
#ifndef CONSTANTS_BEDOPS_H
#define CONSTANTS_BEDOPS_H


//
// Don't use these directly; they just synchronize C and C++'s uses below
// - just want to utilize C++'s type system explicitly
//
// The minimum of running 'ulimit -s' on CentOS 7.2 and Mac OS X 10.12.4 shows
// a stack size limit of 8192 kB (8388608 bytes) and we adjust the TOKENS_MAX_LENGTH
// to this minimum value -- minus 4 MB, to leave some stack space for other parts of
// a running binary.
//
// This change is specifically so that sort-bed can sort very-long reads obtained from
// converting BAM to BED via convert2bed (e.g., Nanopore or PacBio sequencing reads), which
// easily blow up line sizes past what we've generally been used to with short-read sequencers.
// If read lengths get longer, we may need to explore some changes to sort-bed memory usage.
//

#define INT_TOKEN_CHR_MAX_LENGTH 127
#define INT_TOKEN_ID_MAX_LENGTH 16383
#define INT_TOKEN_REST_MAX_LENGTH 8*131072
#define INT_TOKEN_REST_MAX_LENGTH 8372074 - 4*(1024*1024)
#define INT_MAX_DEC_INTEGERS 12
#define INT_MAX_COORD_VALUE 999999999999 /* MAX_DEC_INTEGERS decimal integers; we assume >= 64-bit systems */
#define INT_TOKENS_MAX_LENGTH (TOKEN_CHR_MAX_LENGTH + TOKEN_ID_MAX_LENGTH + TOKEN_REST_MAX_LENGTH + 2*MAX_DEC_INTEGERS)
Expand Down

0 comments on commit d45974c

Please sign in to comment.