Permalink
Browse files

Whitespace around fields is significant (unless field sanitization is…

… enabled)
  • Loading branch information...
1 parent 753f689 commit 741bf3a46f4c46ee186f387aaf57cc8c6895d107 @davedelong committed Dec 22, 2012
Showing with 60 additions and 32 deletions.
  1. +29 −9 CHCSVParser/CHCSVParser.m
  2. +2 −1 Test.csv
  3. +19 −18 UnitTests.m
  4. +10 −4 main.m
@@ -101,8 +101,8 @@ - (id)initWithInputStream:(NSInputStream *)stream usedEncoding:(NSStringEncoding
_nextIndex = 0;
_recognizesComments = NO;
- _recognizesBackslashesAsEscapes = NO;
- _sanitizesFields = NO;
+ _recognizesBackslashesAsEscapes = YES;
+ _sanitizesFields = YES;
_sanitizedField = [[NSMutableString alloc] init];
NSMutableCharacterSet *m = [[NSCharacterSet newlineCharacterSet] mutableCopy];
@@ -312,19 +312,40 @@ - (BOOL)_parseComment {
return [self _parseNewline];
}
+- (void)_parseFieldWhitespace {
+ NSCharacterSet *whitespace = [NSCharacterSet whitespaceCharacterSet];
+ while ([self _peekCharacter] != '\0' &&
+ [whitespace characterIsMember:[self _peekCharacter]] &&
+ [self _peekCharacter] != _delimiter) {
+ // if we're sanitizing fields, then these characters would be stripped (because they're not appended to _sanitizedField)
+ // if we're not sanitizing fields, then they'll be included in the -substringWithRange:
+ [self _advance];
+ }
+}
+
- (BOOL)_parseField {
if (_cancelled) { return NO; }
- [_sanitizedField setString:@""];
+ BOOL parsedField = NO;
+ [self _beginField];
+ // consume leading whitespace
+ [self _parseFieldWhitespace];
+
if ([self _peekCharacter] == DOUBLE_QUOTE) {
- return [self _parseEscapedField];
+ parsedField = [self _parseEscapedField];
} else {
- return [self _parseUnescapedField];
+ parsedField = [self _parseUnescapedField];
}
+
+ if (parsedField) {
+ // consume trailing whitespace
+ [self _parseFieldWhitespace];
+ [self _endField];
+ }
+ return parsedField;
}
- (BOOL)_parseEscapedField {
- [self _beginField];
[self _advance]; // consume the opening double quote
NSCharacterSet *newlines = [NSCharacterSet newlineCharacterSet];
@@ -359,15 +380,13 @@ - (BOOL)_parseEscapedField {
if ([self _peekCharacter] == DOUBLE_QUOTE) {
[self _advance];
- [self _endField];
return YES;
}
return NO;
}
- (BOOL)_parseUnescapedField {
- [self _beginField];
BOOL isBackslashEscaped = NO;
while (1) {
@@ -391,7 +410,6 @@ - (BOOL)_parseUnescapedField {
}
}
- [self _endField];
return YES;
}
@@ -441,6 +459,7 @@ - (void)_endRecord {
- (void)_beginField {
if (_cancelled) { return; }
+ [_sanitizedField setString:@""];
_fieldRange.location = _nextIndex;
}
@@ -452,6 +471,7 @@ - (void)_endField {
if (_sanitizesFields) {
field = CHCSV_AUTORELEASE([_sanitizedField copy]);
+ field = [field stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
} else {
field = [_string substringWithRange:_fieldRange];
}
View
@@ -16,5 +16,6 @@ This,has,quoted,"commas,"
This,has,empty,quoted,fields,"",""
This,has,mixed,"""escaped quotes\""
#empty line
+ This , line , has , significant , whitespace
This,is,the,last,line
-\
+,
View
@@ -38,23 +38,24 @@ - (void) tearDown {
- (NSArray *) expectedFields {
return @[
- @[@"This",@"is",@"a",@"simple",@"line"],
- @[@"This",@"is",@"a",@"quoted",@"line"],
- @[@"This",@"is",@"a",@"mixed",@"line"],
- @[@"This",@"has",@"a\nmultiline\nfield"],
- @[@"This",@"has",@"single",@"apostrophes",@"ma'am"],
- @[@"#This",@"line",@"should",@"not",@"be",@"ignored"],
- @[@"This",@"has",@"\"escaped\"",@"quotes"],
- @[@"This",@"has",@"\"escaped\"",@"quotes"],
- @[@"This",@"has",@"empty",@"fields",@"",@"",@""],
- @[@"This",@"has",@"escaped",@"escapes\\"],
- @[@"This",@"has",@"escaped",@"commas,"],
- @[@"This",@"has",@"quoted",@"commas,"],
- @[@"This",@"has",@"empty",@"quoted",@"fields",@"",@""],
- @[@"This",@"has",@"mixed",@"\"escaped quotes\""],
- @[@"This",@"is",@"the",@"last",@"line"],
- @[@""]
- ];
+ @[@"This",@"is",@"a",@"simple",@"line"],
+ @[@"This",@"is",@"a",@"quoted",@"line"],
+ @[@"This",@"is",@"a",@"mixed",@"line"],
+ @[@"This",@"has",@"a\nmultiline\nfield"],
+ @[@"This",@"has",@"single",@"apostrophes",@"ma'am"],
+ @[@"#This",@"line",@"should",@"not",@"be",@"ignored"],
+ @[@"This",@"has",@"\"escaped\"",@"quotes"],
+ @[@"This",@"has",@"\"escaped\"",@"quotes"],
+ @[@"This",@"has",@"empty",@"fields",@"",@"",@""],
+ @[@"This",@"has",@"escaped",@"escapes\\"],
+ @[@"This",@"has",@"escaped",@"commas,"],
+ @[@"This",@"has",@"quoted",@"commas,"],
+ @[@"This",@"has",@"empty",@"quoted",@"fields",@"",@""],
+ @[@"This",@"has",@"mixed",@"\"escaped quotes\""],
+ @[@" This ",@" line ",@" has ",@" significant ",@" whitespace "],
+ @[@"This",@"is",@"the",@"last",@"line"],
+ @[@""]
+ ];
}
- (void) testCSV {
@@ -78,7 +79,7 @@ - (void) testCSV {
NSString *tempFileName = [NSString stringWithFormat:@"%d-test.csv", arc4random()];
NSString *tempFile = [NSTemporaryDirectory() stringByAppendingPathComponent:tempFileName];
NSLog(@"Writing to file: %@", tempFile);
-
+
NSOutputStream *output = [NSOutputStream outputStreamToFileAtPath:tempFile append:NO];
CHCSVWriter *writer = [[CHCSVWriter alloc] initWithOutputStream:output encoding:NSUTF8StringEncoding delimiter:','];
for (NSArray *line in expectedFields) {
View
@@ -11,11 +11,16 @@ - (void) parser:(CHCSVParser *)parser didStartDocument:(NSString *)csvFile {
- (void) parser:(CHCSVParser *)parser didStartLine:(NSUInteger)lineNumber {
// NSLog(@"Starting line: %lu", lineNumber);
}
-- (void) parser:(CHCSVParser *)parser didReadField:(NSString *)field {
-// NSLog(@" field: %@", field);
+- (void)parser:(CHCSVParser *)parser didReadField:(NSString *)field atIndex:(NSInteger)fieldIndex {
+ if (fieldIndex == 0) {
+ printf("\t%s", [field UTF8String]);
+ } else {
+ printf(",%s", [field UTF8String]);
+ }
}
- (void) parser:(CHCSVParser *)parser didEndLine:(NSUInteger)lineNumber {
// NSLog(@"Ending line: %lu", lineNumber);
+ printf("\n");
}
- (void) parser:(CHCSVParser *)parser didEndDocument:(NSString *)csvFile {
// NSLog(@"parser ended: %@", csvFile);
@@ -30,12 +35,13 @@ - (void) parser:(CHCSVParser *)parser didFailWithError:(NSError *)error {
int main (int argc, const char * argv[]) {
NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
NSString * file = @"/Users/dave/Developer/Open Source/Git Projects/CHCSVParser/Test.csv";
- file = @"/Users/dave/Downloads/test.csv";
NSArray *a = [NSArray arrayWithContentsOfCSVFile:file];
NSLog(@"%@", a);
CHCSVParser *newP = [[CHCSVParser alloc] initWithContentsOfCSVFile:file];
+// [newP setDelegate:[[[Delegate alloc] init] autorelease]];
// [newP setRecognizesBackslashesAsEscapes:NO];
- [newP setSanitizesFields:YES];
+// [newP setSanitizesFields:YES];
+
[newP parse];
[newP release];

0 comments on commit 741bf3a

Please sign in to comment.