Skip to content

Commit

Permalink
More 3.0 tidies.
Browse files Browse the repository at this point in the history
git-svn-id: https://regexkit.svn.sourceforge.net/svnroot/regexkit/RegexKitLite@46 83f0d1e4-963b-0410-a2a1-d3bd3f33e299
  • Loading branch information
jengelhart committed Apr 30, 2009
1 parent 4b80992 commit 23744fa
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 9 deletions.
9 changes: 5 additions & 4 deletions RegexKitLite.html
Expand Up @@ -4,7 +4,6 @@
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta http-equiv="Content-Language" content="EN-US">
<meta name = "format-detection" content = "telephone=no">
<meta name = "viewport" content = "width = 480">
<style type="text/css">
body { font: 12px "lucida grande", geneva, helvetica, arial, sans-serif; color: #000; background-color: #FFF; }
h1 { margin-top: 1.0em; margin-bottom: 0.8334em; font-size: 2.5000em; }
Expand Down Expand Up @@ -428,7 +427,7 @@
.softNobr { white-space: nowrap; }

.syntax > .specification .optional { -moz-box-shadow: 2px 3px 5px #888; -webkit-box-shadow: 2px 3px 5px #888; }
.XXX { -moz-box-shadow: 8px 8px 10px #444; -webkit-box-shadow: 3px 3px 8px #666; }
.XXX { -moz-box-shadow: 8px 8px 10px #666; -webkit-box-shadow: 3px 3px 8px #666; }
.box { -moz-box-shadow: 3px 3px 4px #999; -webkit-box-shadow: 3px 3px 4px #999; }
.box.caution { -moz-box-shadow: 0px 0px 5px #000; -webkit-box-shadow: 0px 0px 5px #000; }
.box .metainfo { -moz-box-shadow: 2px 2px 3px #999; -webkit-box-shadow: 2px 2px 3px #999; }
Expand Down Expand Up @@ -4871,15 +4870,17 @@ <h2 id="ReleaseInformation">Release Information</h2>
// { @&quot;Bob&quot;, @&quot;Tom&quot;, @&quot;Sam&quot; } <- RegexKitLite &ge; v3.0.</div>
</li>
<li>
<p>The results returned by <a href="#NSString_RegexKitLiteAdditions__-componentsSeparatedByRegex:" class="code">componentsSeparatedByRegex:</a> were found to differ from the expected results when zero-width assertions, such as <span class="regex">\b</span>, were used. Prior to v3.0, regular expressions that matched zero characters would be considered a 'match', and a zero length string would be added to the results array.
The expected results are the results that are returned by the <span class="quotedText">perl</span> <span class="code">split()</span> function, which does not create a zero length string in such cases.</p>
<p>The results returned by <a href="#NSString_RegexKitLiteAdditions__-componentsSeparatedByRegex:" class="code">componentsSeparatedByRegex:</a> were found to differ from the expected results when zero-width assertions, such as <span class="regex">\b</span>, were used. Prior to v3.0, regular expressions that matched zero characters would be considered a 'match', and a zero length string would be added to the results array. The expected results are the results that are returned by the <span class="quotedText">perl</span> <span class="code">split()</span> function, which does not create a zero length string in such cases. ICU ticket #<a href="http://bugs.icu-project.org/trac/ticket/6826">6826</a>.</p>

<div class="box sourcecode">NSArray *splitArray = [@&quot;I|at|ice I eat rice&quot; componentsSeparatedByRegex:@&quot;\\b\\s*&quot;];

// ICU : { @&quot;&quot;, @&quot;I&quot;, @&quot;|&quot;, @&quot;at&quot;, @&quot;|&quot;, @&quot;ice&quot;, @&quot;&quot;, @&quot;I&quot;, @&quot;&quot;, @&quot;eat&quot;, @&quot;&quot;, @&quot;rice&quot; } <- RegexKitLite &le; v2.2.
// perl: { @&quot;I&quot;, @&quot;|&quot;, @&quot;at&quot;, @&quot;|&quot;, @&quot;ice&quot;, @&quot;I&quot;, @&quot;eat&quot;, @&quot;rice&quot; } <- RegexKitLite &ge; v3.0.</div>

</li>
<li>
<p>As part of the <span class="hardNobr">64-bit</span> tidy, a check of the length of a string that is passed to <span class="rkl">RegexKit<i>Lite</i></span>, and therefore ICU, was added to ensure that the length is less than <span class="code">INT_MAX</span>. If the length of the string <span class="hardNobr">is &ge; <span class="code">INT_MAX</span>,</span> then <span class="rkl">RegexKit<i>Lite</i></span> will raise <a href="http://developer.apple.com/documentation/Cocoa/Reference/Foundation/Miscellaneous/Foundation_Constants/Reference/reference.html#//apple_ref/doc/c_ref/NSRangeException" class="code">NSRangeException</a>. The value of <span class="hardNobr"><span class="code">INT_MAX</span> is 2<sup>31</sup>-1 (<span class="code">0x7fffffff</span>).</span> This was done because ICU uses the <span class="code">int</span> type for representing offset values.</p>
</li>

</ul>

Expand Down
14 changes: 9 additions & 5 deletions RegexKitLite.m
Expand Up @@ -623,6 +623,8 @@ static void rkl_dtrace_getRegexUTF8(CFStringRef str, char *utf8Buffer) {
UParseError parseError = (UParseError){-1, -1, {0}, {0}};
const UniChar *regexUniChar = NULL;

if(RKL_EXPECTED(regexStringU16Length >= (CFIndex)INT_MAX, 0L)) { *exception = [NSException exceptionWithName:NSRangeException reason:@"Regex string length exceeds INT_MAX" userInfo:NULL]; goto exitNow; }

// Try to quickly obtain regexString in UTF16 format.
if((regexUniChar = CFStringGetCharactersPtr(cacheSlot->regexString)) == NULL) { // We didn't get the UTF16 pointer quickly and need to perform a full conversion in a temp buffer.
UniChar *uniCharBuffer = NULL;
Expand Down Expand Up @@ -825,9 +827,9 @@ static id rkl_performRegexOp(id self, SEL _cmd, RKLRegexOp regexOp, NSString *re

if(RKL_EXPECTED((cacheSlot = rkl_getCachedRegexSetToString(regexString, options, matchString, &stringU16Length, matchRange, error, &exception, &status)) == NULL, 0L)) { stringU16Length = (NSUInteger)CFStringGetLength((CFStringRef)matchString); }
if(RKL_EXPECTED(matchRange->length == NSUIntegerMax, 1L)) { matchRange->length = stringU16Length; } // For convenience.
if(RKL_EXPECTED(stringU16Length < NSMaxRange(*matchRange), 0L) && RKL_EXPECTED(exception == NULL, 1L)) { exception = (id)RKL_EXCEPTION(NSRangeException, @"Range or index out of bounds"); goto exitNow; }
if(RKL_EXPECTED(stringU16Length >= INT_MAX, 0L) && RKL_EXPECTED(exception == NULL, 1L)) { exception = (id)RKL_EXCEPTION(NSRangeException, @"String length exceeds INT_MAX"); goto exitNow; }
if(((maskedRegexOp == RKLRangeOp) || (maskedRegexOp == RKLArrayOfStringsOp)) && RKL_EXPECTED(cacheSlot != NULL, 1L) && (RKL_EXPECTED(capture < 0L, 0L) || RKL_EXPECTED(capture > cacheSlot->captureCount, 0L)) && RKL_EXPECTED(exception == NULL, 1L)) { exception = (id)RKL_EXCEPTION(NSInvalidArgumentException, @"The capture argument is not valid."); }
if(RKL_EXPECTED(stringU16Length < NSMaxRange(*matchRange), 0L) && RKL_EXPECTED(exception == NULL, 1L)) { exception = (id)RKL_EXCEPTION(NSRangeException, @"Range or index out of bounds"); goto exitNow; }
if(RKL_EXPECTED(stringU16Length >= (NSUInteger)INT_MAX, 0L) && RKL_EXPECTED(exception == NULL, 1L)) { exception = (id)RKL_EXCEPTION(NSRangeException, @"String length exceeds INT_MAX"); goto exitNow; }
if(((maskedRegexOp == RKLRangeOp) || (maskedRegexOp == RKLArrayOfStringsOp)) && RKL_EXPECTED(cacheSlot != NULL, 1L) && (RKL_EXPECTED(capture < 0L, 0L) || RKL_EXPECTED(capture > cacheSlot->captureCount, 0L)) && RKL_EXPECTED(exception == NULL, 1L)) { exception = (id)RKL_EXCEPTION(NSInvalidArgumentException, @"The capture argument is not valid."); goto exitNow; }
if(RKL_EXPECTED(cacheSlot == NULL, 0L) || RKL_EXPECTED(status > U_ZERO_ERROR, 0L) || RKL_EXPECTED(exception != NULL, 0L)) { goto exitNow; }

RKLCDelayedAssert((cacheSlot->icu_regex != NULL) && (cacheSlot->regexString != NULL) && (cacheSlot->captureCount >= 0L) && (cacheSlot->setToString != NULL) && (cacheSlot->setToLength >= 0L) && (cacheSlot->setToUniChar != NULL) && ((CFIndex)NSMaxRange(cacheSlot->setToRange) <= cacheSlot->setToLength), &exception, exitNow);
Expand Down Expand Up @@ -1106,15 +1108,17 @@ static NSUInteger rkl_growFindRanges(RKLCacheSlot *cacheSlot, NSUInteger lastLoc
// ----------

static NSString *rkl_replaceString(RKLCacheSlot *cacheSlot, id searchString, NSUInteger searchU16Length, NSString *replacementString, NSUInteger replacementU16Length, NSUInteger *replacedCountPtr, NSUInteger replaceMutable, id *exception, int32_t *status) {
uint64_t searchU16Length64 = (uint64_t)searchU16Length, replacementU16Length64 = (uint64_t)replacementU16Length;
int32_t resultU16Length = 0, tempUniCharBufferU16Capacity = 0;
UniChar *tempUniCharBuffer = NULL;
const UniChar *replacementUniChar = NULL;
id resultObject = NULL;
NSUInteger replacedCount = 0UL;

if((RKL_EXPECTED(replacementU16Length >= INT_MAX, 0L) || RKL_EXPECTED((((uint64_t)searchU16Length / 2ULL) + ((uint64_t)replacementU16Length * 2ULL)) >= (uint64_t)INT_MAX, 0L))) { *exception = [NSException exceptionWithName:NSRangeException reason:@"String length exceeds INT_MAX" userInfo:NULL]; }
if((RKL_EXPECTED(replacementU16Length64 >= (uint64_t)INT_MAX, 0L) || RKL_EXPECTED(((searchU16Length64 / 2ULL) + (replacementU16Length64 * 2ULL)) >= (uint64_t)INT_MAX, 0L))) { *exception = [NSException exceptionWithName:NSRangeException reason:@"Replacement string length exceeds INT_MAX" userInfo:NULL]; goto exitNow; }

RKLCDelayedAssert((searchU16Length < INT_MAX) && (replacementU16Length < INT_MAX) && ((16UL + (searchU16Length + (searchU16Length / 2UL)) + (replacementU16Length * 2UL)) < INT_MAX), exception, exitNow);
RKLCDelayedAssert((searchU16Length64 < (uint64_t)INT_MAX) && (replacementU16Length64 < (uint64_t)INT_MAX) && (((searchU16Length64 / 2ULL) + (replacementU16Length64 * 2ULL)) < (uint64_t)INT_MAX), exception, exitNow);

// Zero order approximation of the buffer sizes for holding the replaced string or split strings and split strings pointer offsets. As UTF16 code units.
tempUniCharBufferU16Capacity = (int32_t)(16UL + (searchU16Length + (searchU16Length / 2UL)) + (replacementU16Length * 2UL));

Expand Down

0 comments on commit 23744fa

Please sign in to comment.