Skip to content

Commit

Permalink
Allow fields to have a leading equal sign. Fixes #66.
Browse files Browse the repository at this point in the history
  • Loading branch information
davedelong committed Jul 12, 2014
1 parent 556f4f0 commit 0375a42
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 9 deletions.
30 changes: 25 additions & 5 deletions CHCSVParser/CHCSVParser/CHCSVParser.h
Expand Up @@ -60,10 +60,13 @@ typedef NS_ENUM(NSInteger, CHCSVErrorCode) {
@interface CHCSVParser : NSObject

@property (assign) id<CHCSVParserDelegate> delegate;
@property (assign) BOOL recognizesBackslashesAsEscapes; // default is NO
@property (assign) BOOL sanitizesFields; // default is NO
@property (assign) BOOL recognizesComments; // default is NO
@property (assign) BOOL stripsLeadingAndTrailingWhitespace; // default is NO

// For a description of these properties, see the CHCSVParserOptions below
@property (nonatomic, assign) BOOL recognizesBackslashesAsEscapes; // default is NO
@property (nonatomic, assign) BOOL sanitizesFields; // default is NO
@property (nonatomic, assign) BOOL recognizesComments; // default is NO
@property (nonatomic, assign) BOOL stripsLeadingAndTrailingWhitespace; // default is NO
@property (nonatomic, assign) BOOL recognizesLeadingEqualSign; // default is NO

@property (readonly) NSUInteger totalBytesRead;

Expand Down Expand Up @@ -99,14 +102,31 @@ typedef NS_ENUM(NSInteger, CHCSVErrorCode) {
#pragma mark - Convenience Categories

typedef NS_OPTIONS(NSUInteger, CHCSVParserOptions) {
// Allows you to have delimiters and other control characters in a field without quoting the field
// a,b\,c,d
// If you use this option, you may not use "\" as the delimiter
CHCSVParserOptionsRecognizesBackslashesAsEscapes = 1 << 0,

// Cleans the field before reporting it (unescaping characters, stripping leading/trailing whitespace, etc)
CHCSVParserOptionsSanitizesFields = 1 << 1,

// Fields that begin with a "#" will be reported as comments. Comments are ended by unescaped newlines
// If you use this option, you may not use "#" as the delimiter
CHCSVParserOptionsRecognizesComments = 1 << 2,

// Trims whitespace around a field
CHCSVParserOptionsTrimsWhitespace = 1 << 3,

// When you specify this option, instead of getting an Array of Arrays of Strings,
// you get an Array of CHCSVOrderedDictionaries
CHCSVParserOptionsUsesFirstLineAsKeys = 1 << 4
// If the CSV only contains a single line, then an empty array is returned
CHCSVParserOptionsUsesFirstLineAsKeys = 1 << 4,

// Some CSV files contain fields that look like:
// a,="001234",b
// http://edoceo.com/utilitas/csv-file-format
// If you specify this option, you may not use "=" as the delimiter
CHCSVParserOptionsRecognizesLeadingEqualSign = 1 << 5
};

@interface CHCSVOrderedDictionary : NSDictionary
Expand Down
30 changes: 29 additions & 1 deletion CHCSVParser/CHCSVParser/CHCSVParser.m
Expand Up @@ -35,6 +35,7 @@ of this software and associated documentation files (the "Software"), to deal
#define DOUBLE_QUOTE '"'
#define COMMA ','
#define OCTOTHORPE '#'
#define EQUAL '='
#define BACKSLASH '\\'
#define NULLCHAR '\0'

Expand Down Expand Up @@ -91,7 +92,6 @@ - (id)initWithInputStream:(NSInputStream *)stream usedEncoding:(NSStringEncoding
NSParameterAssert(delimiter);
NSAssert([[NSCharacterSet newlineCharacterSet] characterIsMember:delimiter] == NO, @"The field delimiter may not be a newline");
NSAssert(delimiter != DOUBLE_QUOTE, @"The field delimiter may not be a double quote");
NSAssert(delimiter != OCTOTHORPE, @"The field delimiter may not be an octothorpe");

self = [super init];
if (self) {
Expand All @@ -109,6 +109,7 @@ - (id)initWithInputStream:(NSInputStream *)stream usedEncoding:(NSStringEncoding
_sanitizesFields = NO;
_sanitizedField = [[NSMutableString alloc] init];
_stripsLeadingAndTrailingWhitespace = NO;
_recognizesLeadingEqualSign = NO;

NSMutableCharacterSet *m = [[NSCharacterSet newlineCharacterSet] mutableCopy];
NSString *invalid = [NSString stringWithFormat:@"%c%C", DOUBLE_QUOTE, _delimiter];
Expand All @@ -134,6 +135,29 @@ - (void)dealloc {

#pragma mark -

- (void)setRecognizesBackslashesAsEscapes:(BOOL)recognizesBackslashesAsEscapes {
_recognizesBackslashesAsEscapes = recognizesBackslashesAsEscapes;
if (_delimiter == BACKSLASH && _recognizesBackslashesAsEscapes) {
[NSException raise:NSInternalInconsistencyException format:@"Cannot recognize backslashes as escapes when using '\\' as the delimiter"];
}
}

- (void)setRecognizesComments:(BOOL)recognizesComments {
_recognizesComments = recognizesComments;
if (_delimiter == OCTOTHORPE && _recognizesComments) {
[NSException raise:NSInternalInconsistencyException format:@"Cannot recognize comments when using '#' as the delimiter"];
}
}

- (void)setRecognizesLeadingEqualSign:(BOOL)recognizesLeadingEqualSign {
_recognizesLeadingEqualSign = recognizesLeadingEqualSign;
if (_delimiter == EQUAL && _recognizesLeadingEqualSign) {
[NSException raise:NSInternalInconsistencyException format:@"Cannot recognize leading equal sign when using '=' as the delimiter"];
}
}

#pragma mark -

- (void)_sniffEncoding {
NSStringEncoding encoding = NSUTF8StringEncoding;

Expand Down Expand Up @@ -353,6 +377,9 @@ - (BOOL)_parseField {

if ([self _peekCharacter] == DOUBLE_QUOTE) {
parsedField = [self _parseEscapedField];
} else if (_recognizesLeadingEqualSign && [self _peekCharacter] == EQUAL && [self _peekPeekCharacter] == DOUBLE_QUOTE) {
[self _advance]; // consume the equal sign
parsedField = [self _parseEscapedField];
} else {
parsedField = [self _parseUnescapedField];
if (_stripsLeadingAndTrailingWhitespace) {
Expand Down Expand Up @@ -739,6 +766,7 @@ - (void)parser:(CHCSVParser *)parser didEndLine:(NSUInteger)recordNumber {
parser.sanitizesFields = !!(options & CHCSVParserOptionsSanitizesFields);
parser.recognizesComments = !!(options & CHCSVParserOptionsRecognizesComments);
parser.stripsLeadingAndTrailingWhitespace = !!(options & CHCSVParserOptionsTrimsWhitespace);
parser.recognizesLeadingEqualSign = !!(options & CHCSVParserOptionsRecognizesLeadingEqualSign);

[parser parse];

Expand Down
1 change: 1 addition & 0 deletions Unit Tests/UnitTestContent.h
Expand Up @@ -14,6 +14,7 @@
#define SPACE @" "
#define BACKSLASH @"\\"
#define OCTOTHORPE @"#"
#define EQUAL @"="

#define FIELD1 @"field1"
#define FIELD2 @"field2"
Expand Down
68 changes: 65 additions & 3 deletions Unit Tests/UnitTests.m
Expand Up @@ -27,8 +27,8 @@ of this software and associated documentation files (the "Software"), to deal
#import "UnitTestContent.h"
#import "CHCSVParser.h"

#define TEST_ARRAYS(_csv, _expected) do {\
XCTAssertEqualObjects(_csv, _expected, @"failed"); \
#define TEST_ARRAYS(_actual, _expected) do {\
XCTAssertEqualObjects(_actual, _expected, @"failed"); \
} while(0)

#define TEST(_csv, _expected, ...) do {\
Expand All @@ -53,7 +53,7 @@ - (void)testSimpleUTF8 {
}

- (void)testGithubIssue38 {
NSString *csv = FIELD1 COMMA FIELD2 COMMA FIELD3 NEWLINE @"#";
NSString *csv = FIELD1 COMMA FIELD2 COMMA FIELD3 NEWLINE OCTOTHORPE;
NSArray *expected = @[@[FIELD1, FIELD2, FIELD3]];
TEST(csv, expected, CHCSVParserOptionsRecognizesComments);
}
Expand Down Expand Up @@ -289,4 +289,66 @@ - (void)testFirstLineAsKeys_MismatchedFieldCount {
XCTAssertEqual(error.code, CHCSVErrorCodeIncorrectNumberOfFields, @"Unexpected error");
}

#pragma mark - Testing Valid Delimiters

- (void)testAllowedDelimiter_Octothorpe {
NSString *csv = FIELD1 OCTOTHORPE FIELD2 OCTOTHORPE FIELD3;
NSArray *actual = [csv componentsSeparatedByDelimiter:'#'];
NSArray *expected = @[@[FIELD1, FIELD2, FIELD3]];

TEST_ARRAYS(actual, expected);
}

- (void)testDisallowedDelimiter_Octothorpe {
NSString *csv = FIELD1 OCTOTHORPE FIELD2 OCTOTHORPE FIELD3;

XCTAssertThrows([csv componentsSeparatedByDelimiter:'#' options:CHCSVParserOptionsRecognizesComments], @"failed");

}

- (void)testAllowedDelimiter_Backslash {
NSString *csv = FIELD1 BACKSLASH FIELD2 BACKSLASH FIELD3;
NSArray *actual = [csv componentsSeparatedByDelimiter:'\\'];
NSArray *expected = @[@[FIELD1, FIELD2, FIELD3]];

TEST_ARRAYS(actual, expected);
}

- (void)testDisallowedDelimiter_Backslash {
NSString *csv = FIELD1 BACKSLASH FIELD2 BACKSLASH FIELD3;

XCTAssertThrows([csv componentsSeparatedByDelimiter:'\\' options:CHCSVParserOptionsRecognizesBackslashesAsEscapes], @"failed");

}

- (void)testAllowedDelimiter_Equal {
NSString *csv = FIELD1 EQUAL FIELD2 EQUAL FIELD3;
NSArray *actual = [csv componentsSeparatedByDelimiter:'='];
NSArray *expected = @[@[FIELD1, FIELD2, FIELD3]];

TEST_ARRAYS(actual, expected);
}

- (void)testDisallowedDelimiter_Equal {
NSString *csv = FIELD1 EQUAL FIELD2 EQUAL FIELD3;

XCTAssertThrows([csv componentsSeparatedByDelimiter:'=' options:CHCSVParserOptionsRecognizesLeadingEqualSign], @"failed");
}

#pragma mark - Testing Leading Equal

- (void)testLeadingEqual {
NSString *csv = FIELD1 COMMA EQUAL QUOTED_FIELD2 COMMA EQUAL QUOTED_FIELD3;
NSArray *expected = @[@[FIELD1, EQUAL QUOTED_FIELD2, EQUAL QUOTED_FIELD3]];

TEST(csv, expected, CHCSVParserOptionsRecognizesLeadingEqualSign);
}

- (void)testSanitizedLeadingEqual {
NSString *csv = FIELD1 COMMA EQUAL QUOTED_FIELD2 COMMA EQUAL QUOTED_FIELD3;
NSArray *expected = @[@[FIELD1, FIELD2, FIELD3]];

TEST(csv, expected, CHCSVParserOptionsRecognizesLeadingEqualSign | CHCSVParserOptionsSanitizesFields);
}

@end

0 comments on commit 0375a42

Please sign in to comment.