Skip to content

Commit

Permalink
Fix for multiset union output upon input of one Starch archive with v…
Browse files Browse the repository at this point in the history
…ariable ID content
  • Loading branch information
alexpreynolds committed Jun 20, 2017
1 parent 343ec7c commit 51a8967
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 28 deletions.
2 changes: 1 addition & 1 deletion docs/content/revision-history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ This revision of BEDOPS includes significant performance improvements for defaul

* Performance of :code:`bedops` tool increased by **1.3x** (or **23%**).

* Performance of :code:`-u`/`--everything` with large numbers of inputs is improved by approximately **1.8x**.
* Performance of :code:`-u`/:code:`--everything` with large numbers of inputs is improved by approximately **1.8x**.

* :code:`megarow` build target is available to compile a worst-case interval-optimized version of `bedops` that preserves speed improvements at the cost of increased memory usage.

Expand Down
36 changes: 25 additions & 11 deletions interfaces/general-headers/data/starch/starchApi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -865,8 +865,10 @@ namespace starch
archMdIter = archMdIter->next;
if (archMdIter)
setCurrentChromosome(archMdIter->chromosome);
else
free(currentChromosome), currentChromosome = NULL;
else {
free(currentChromosome);
currentChromosome = NULL;
}
}
void setCurrentChromosome(char *_cC) {
if (currentChromosome) {
Expand All @@ -880,7 +882,16 @@ namespace starch
}
void setCurrentStart(Bed::SignedCoordType _cS) { currentStart = _cS; }
void setCurrentStop(Bed::SignedCoordType _cS) { currentStop = _cS; }
void setCurrentRemainder(char * _remainder) { currentRemainder = _remainder; }
void setCurrentRemainder(char * _remainder) {
if (currentRemainder) {
free(currentRemainder);
currentRemainder = NULL;
}
currentRemainder = static_cast<char *>( malloc (strlen(_remainder) + 1) );
if (currentRemainder) {
strncpy(currentRemainder, _remainder, strlen(_remainder) + 1);
}
}
void setSelectedChromosome(const std::string& _selChr) { selectedChromosome = _selChr; }
};

Expand Down Expand Up @@ -1462,7 +1473,7 @@ namespace starch
extractLine(line);
firstPass = false;
}

if ((t_firstInputToken[0] == 'p') && (archMdIter)) {
#ifdef DEBUG
std::fprintf(stderr, "--> prefix is 'p'\n");
Expand Down Expand Up @@ -1566,7 +1577,7 @@ namespace starch
&_currRemainder,
&_currRemainderLen);
#ifdef DEBUG
std::fprintf(stderr,"t_ [ %s | %s] _curr [ %s | %" PRId64 " | %" PRId64 " ]\n", t_firstInputToken, t_secondInputToken, _currChr, _currStart, _currStop);
std::fprintf(stderr,"t_ [ %s | %s] _curr [ %s | %" PRId64 " | %" PRId64 " | remlen: %zu]\n", t_firstInputToken, t_secondInputToken, _currChr, _currStart, _currStop, _currRemainderLen);
#endif
if (res != 0)
break;
Expand Down Expand Up @@ -1772,7 +1783,7 @@ namespace starch
case kUndefined: {
throw(std::string("ERROR: backend compression type is undefined"));
}
}
}
}

if ((_currChr && archType == kGzip && !postBreakdownZValuesIdentical) || (_currChr && archType == kBzip2)) {
Expand All @@ -1781,14 +1792,17 @@ namespace starch
#endif
setCurrentStart(_currStart);
setCurrentStop(_currStop);
setCurrentRemainder(_currRemainder);
if (_currRemainder)
if (_currRemainder) {
setCurrentRemainder(_currRemainder);
}

if (_currRemainder && (_currRemainderLen > 0)) {
std::sprintf(out, "%s\t%" PRId64 "\t%" PRId64 "\t%s", _currChr, _currStart, _currStop, _currRemainder);
else
}
else {
std::sprintf(out, "%s\t%" PRId64 "\t%" PRId64, _currChr, _currStart, _currStop);
}
line = out;
_currRemainder[0] = '\0';
_currRemainderLen = 0;

if (archType == kGzip)
postBreakdownZValuesIdentical = (zOutBufIdx == zHave);
Expand Down
40 changes: 24 additions & 16 deletions interfaces/src/data/starch/unstarchHelpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -971,13 +971,13 @@ UNSTARCH_sReverseTransformIgnoringHeaderedInput(const char *chr, const unsigned
*currentChr = currentChrCopy;
*currentChrLen = strlen(chr) * 2;
}
strncpy(*currentChr, chr, strlen(chr) + 1);
strncpy(*currentChr, chr, strlen(chr) + 1);
if (!*currentChr) {
fprintf(stderr, "ERROR: Current chromosome name is not set\n");
return UNSTARCH_FATAL_ERROR;
return UNSTARCH_FATAL_ERROR;
}
*currentStart = *start;
*currentStop = *lastEnd;
*currentStop = *lastEnd;
if (! *currentRemainder) {
#ifdef __cplusplus
*currentRemainder = static_cast<char *>( malloc(strlen(elemTok2) + 1) );
Expand Down Expand Up @@ -1054,7 +1054,7 @@ UNSTARCH_sReverseTransformIgnoringHeaderedInput(const char *chr, const unsigned
/* strncpy(*currentChr, chr, strlen(chr) + 1); */
if (! *currentChr) {
fprintf(stderr, "ERROR: Current chromosome name could not be copied\n");
return UNSTARCH_FATAL_ERROR;
return UNSTARCH_FATAL_ERROR;
}
#ifdef __cplusplus
*currentStart = static_cast<SignedCoordType>( strtoull(elemTok1, NULL, UNSTARCH_RADIX) );
Expand Down Expand Up @@ -1086,7 +1086,7 @@ UNSTARCH_sReverseTransformIgnoringHeaderedInput(const char *chr, const unsigned
strncpy(*currentRemainder, elemTok2, strlen(elemTok2) + 1);
if (!*currentRemainder) {
fprintf(stderr, "ERROR: Current remainder token could not be copied\n");
return UNSTARCH_FATAL_ERROR;
return UNSTARCH_FATAL_ERROR;
}
}
}
Expand Down Expand Up @@ -1119,7 +1119,7 @@ UNSTARCH_sReverseTransformIgnoringHeaderedInput(const char *chr, const unsigned
strncpy(*currentChr, chr, strlen(chr) + 1);
if (! *currentChr) {
fprintf(stderr, "ERROR: Current chromosome name could not be copied\n");
return UNSTARCH_FATAL_ERROR;
return UNSTARCH_FATAL_ERROR;
}

if (elemTok1[0] == 'p') {
Expand All @@ -1143,6 +1143,11 @@ UNSTARCH_sReverseTransformIgnoringHeaderedInput(const char *chr, const unsigned
*currentStart = *start;
*currentStop = *lastEnd;
}
if (*currentRemainder) {
free(*currentRemainder);
*currentRemainder = NULL;
*currentRemainderLen = 0;
}
}
}
else {
Expand Down Expand Up @@ -1458,7 +1463,7 @@ UNSTARCH_sReverseTransformHeaderlessInput(const char *chr, const unsigned char *
strncpy(*currentRemainder, elemTok2, strlen(elemTok2) + 1);
if (!*currentRemainder) {
fprintf(stderr, "ERROR: Current remainder token could not be copied\n");
return UNSTARCH_FATAL_ERROR;
return UNSTARCH_FATAL_ERROR;
}
}
}
Expand Down Expand Up @@ -1519,7 +1524,7 @@ UNSTARCH_sReverseTransformHeaderlessInput(const char *chr, const unsigned char *
strncpy(*currentChr, chr, strlen(chr) + 1);
if (!*currentChr) {
fprintf(stderr, "ERROR: Current chromosome name could not be copied\n");
return UNSTARCH_FATAL_ERROR;
return UNSTARCH_FATAL_ERROR;
}
*currentStart = *start;
*currentStop = *lastEnd;
Expand All @@ -1534,8 +1539,8 @@ UNSTARCH_sReverseTransformHeaderlessInput(const char *chr, const unsigned char *
return 0;
}

int
UNSTARCH_createInverseTransformTokens(const unsigned char *s, const char delim, char elemTok1[], char elemTok2[])
int
UNSTARCH_createInverseTransformTokens(const unsigned char *s, const char delim, char elemTok1[], char elemTok2[])
{
#ifdef DEBUG
fprintf(stderr, "\n--- UNSTARCH_createInverseTransformTokens() ---\n");
Expand All @@ -1546,18 +1551,21 @@ UNSTARCH_createInverseTransformTokens(const unsigned char *s, const char delim,
charCnt = 0;
sCnt = 0;
elemCnt = 0;


elemTok1[0] = '\0';
elemTok2[0] = '\0';

do {
buffer[charCnt++] = s[sCnt];
if (buffer[(charCnt - 1)] == delim) {
if (elemCnt == 0) {
if (elemCnt == 0) {
buffer[(charCnt - 1)] = '\0';
#ifdef __cplusplus
strncpy(elemTok1, reinterpret_cast<const char *>( buffer ), strlen(reinterpret_cast<const char *>( buffer )) + 1);
strncpy(elemTok1, reinterpret_cast<const char *>( buffer ), strlen(reinterpret_cast<const char *>( buffer )) + 1);
#else
strncpy(elemTok1, (const char *) buffer, strlen((const char *) buffer) + 1);
strncpy(elemTok1, (const char *) buffer, strlen((const char *) buffer) + 1);
#endif
elemCnt++;
elemCnt++;
charCnt = 0;
}
}
Expand Down Expand Up @@ -1593,7 +1601,7 @@ UNSTARCH_strnstr(const char *haystack, const char *needle, size_t haystackLen)
size_t pLen;
size_t len = strlen(needle);

if (*needle == '\0') {
if (*needle == '\0') {
/* everything matches empty string */
#ifdef __cplusplus
return const_cast<char *>( haystack );
Expand Down

0 comments on commit 51a8967

Please sign in to comment.