Permalink
Browse files

Added support for parsing data from NSInputStreams. Updated README

  • Loading branch information...
davedelong committed Apr 30, 2011
1 parent 2f93d86 commit 59858216b2b7f3c0daa01d5389be1165f4c0aa48
View
@@ -31,34 +31,38 @@
@private
__weak id<CHCSVParserDelegate> parserDelegate;
- NSFileHandle * csvFileHandle;
- NSString * csvFile;
+ NSInputStream *csvReadStream;
+ BOOL endOfStreamReached;
NSStringEncoding fileEncoding;
+
+ NSString *csvFile;
BOOL hasStarted;
- NSString * delimiter;
+ NSString *delimiter;
unichar delimiterCharacter;
- NSMutableData * currentChunk;
- NSMutableString * currentChunkString;
+ NSMutableData *currentChunk;
+ NSMutableString *currentChunkString;
NSUInteger stringIndex;
- BOOL doneReadingFile;
BOOL balancedQuotes;
BOOL balancedEscapes;
- NSMutableString * currentField;
+ NSMutableString *currentField;
NSUInteger currentLine;
NSUInteger state;
- NSError * error;
+ NSError *error;
}
@property (assign) __weak id<CHCSVParserDelegate> parserDelegate;
@property (readonly) NSError * error;
@property (readonly) NSString * csvFile;
@property (nonatomic, copy) NSString *delimiter;
+- (id) initWithStream:(NSInputStream *)readStream usedEncoding:(NSStringEncoding *)usedEncoding error:(NSError **)anError; //designated initializer
+- (id) initWithStream:(NSInputStream *)readStream encoding:(NSStringEncoding)encoding error:(NSError **)anError;
+
- (id) initWithContentsOfCSVFile:(NSString *)aCSVFile encoding:(NSStringEncoding)encoding error:(NSError **)anError;
- (id) initWithContentsOfCSVFile:(NSString *)aCSVFile usedEncoding:(NSStringEncoding *)usedEncoding error:(NSError **)anError;
View
@@ -55,7 +55,7 @@ - (void) trimString_csv:(NSString *)character {
}
- (void) trimCharactersInSet_csv:(NSCharacterSet *)set {
- NSString * trimmed = [self stringByTrimmingCharactersInSet:set];
+ NSString *trimmed = [self stringByTrimmingCharactersInSet:set];
[self setString:trimmed];
}
@@ -67,10 +67,12 @@ - (void) replaceOccurrencesOfString:(NSString *)find withString_csv:(NSString *)
@interface CHCSVParser ()
-@property (retain) NSMutableData * currentChunk;
+@property (retain) NSMutableData *currentChunk;
- (NSStringEncoding) textEncodingForData:(NSData *)chunkToSniff offset:(NSUInteger *)offset;
+- (void) determineTextEncoding;
+- (void) readNextChunk;
- (NSString *) nextCharacter;
- (void) runParseLoop;
- (void) processComposedCharacter:(NSString *)currentCharacter previousCharacter:(NSString *)previousCharacter previousPreviousCharacter:(NSString *)previousPreviousCharacter;
@@ -87,82 +89,78 @@ - (void) finishCurrentLine;
@implementation CHCSVParser
@synthesize parserDelegate, currentChunk, error, csvFile, delimiter;
-- (id) initWithContentsOfCSVFile:(NSString *)aCSVFile encoding:(NSStringEncoding)encoding error:(NSError **)anError {
+- (id) initWithStream:(NSInputStream *)readStream usedEncoding:(NSStringEncoding *)usedEncoding error:(NSError **)anError {
self = [super init];
- if (self) {
- csvFile = [aCSVFile copy];
- csvFileHandle = [[NSFileHandle fileHandleForReadingAtPath:csvFile] retain];
- if (csvFileHandle == nil) {
- if (anError) {
- *anError = [NSError errorWithDomain:@"com.davedelong.csv" code:0 userInfo:[NSDictionary dictionaryWithObject:@"Unable to open file for reading" forKey:NSLocalizedDescriptionKey]];
- }
- [self release];
- return nil;
- }
- fileEncoding = encoding;
+ if (self) {
+ csvReadStream = [readStream retain];
+ [csvReadStream open];
+
+ if (usedEncoding && *usedEncoding > 0) {
+ //if we're supplied an encoding, just use that
+ fileEncoding = *usedEncoding;
+ } else {
+ //otherwise try to guess
+ [self determineTextEncoding];
+ }
+ if (usedEncoding) {
+ *usedEncoding = fileEncoding;
+ }
balancedQuotes = YES;
balancedEscapes = YES;
currentLine = 0;
currentField = [[NSMutableString alloc] init];
- currentChunk = [[NSMutableData alloc] init];
- doneReadingFile = NO;
+ if (currentChunk == nil) {
+ currentChunk = [[NSMutableData alloc] init];
+ }
+ endOfStreamReached = NO;
currentChunkString = [[NSMutableString alloc] init];
stringIndex = 0;
[self setDelimiter:@","];
SETSTATE(CHCSVParserStateInsideFile)
- }
- return self;
+
+ }
+ return self;
+}
+
+- (id)initWithStream:(NSInputStream *)readStream encoding:(NSStringEncoding)encoding error:(NSError **)anError {
+ return [self initWithStream:readStream usedEncoding:&encoding error:anError];
+}
+
+- (id) initWithContentsOfCSVFile:(NSString *)aCSVFile encoding:(NSStringEncoding)encoding error:(NSError **)anError {
+ return [self initWithContentsOfCSVFile:aCSVFile usedEncoding:&encoding error:anError];
}
- (id) initWithContentsOfCSVFile:(NSString *)aCSVFile usedEncoding:(NSStringEncoding *)usedEncoding error:(NSError **)anError {
- self = [self initWithContentsOfCSVFile:aCSVFile encoding:NSUTF8StringEncoding error:anError];
+ NSInputStream *readStream = [NSInputStream inputStreamWithFileAtPath:aCSVFile];
+ if (readStream == nil) {
+ if (anError) {
+ *anError = [NSError errorWithDomain:@"com.davedelong.csv" code:0 userInfo:[NSDictionary dictionaryWithObject:@"Unable to open file for reading" forKey:NSLocalizedDescriptionKey]];
+ }
+ [self release];
+ return nil;
+ }
+
+ self = [self initWithStream:readStream usedEncoding:usedEncoding error:anError];
if (self) {
-
- NSData * chunk = [csvFileHandle readDataOfLength:CHUNK_SIZE];
- NSUInteger seekOffset = 0;
- fileEncoding = [self textEncodingForData:chunk offset:&seekOffset];
- [csvFileHandle seekToFileOffset:seekOffset];
-
- [self setDelimiter:@","];
-
- if (usedEncoding) {
- *usedEncoding = fileEncoding;
- }
+ csvFile = [aCSVFile copy];
}
return self;
}
- (id) initWithCSVString:(NSString *)csvString encoding:(NSStringEncoding)encoding error:(NSError **)anError {
- self = [super init];
- if (self) {
- csvFile = nil;
- csvFileHandle = nil;
- fileEncoding = encoding;
-
- balancedQuotes = YES;
- balancedEscapes = YES;
-
- currentLine = 0;
- currentField = [[NSMutableString alloc] init];
-
- currentChunkString = [csvString mutableCopy];
- doneReadingFile = YES;
- stringIndex = 0;
-
- [self setDelimiter:@","];
-
- SETSTATE(CHCSVParserStateInsideFile)
- }
- return self;
+ return [self initWithStream:[NSInputStream inputStreamWithData:[csvString dataUsingEncoding:encoding]]
+ encoding:encoding
+ error:anError];
}
- (void) dealloc {
- [csvFileHandle release];
+ [csvReadStream close];
+ [csvReadStream release];
[csvFile release];
[currentField release];
[currentChunk release];
@@ -173,6 +171,21 @@ - (void) dealloc {
[super dealloc];
}
+- (void) determineTextEncoding {
+ uint8_t bytes[CHUNK_SIZE];
+ NSUInteger bytesRead = [csvReadStream read:bytes maxLength:CHUNK_SIZE];
+ currentChunk = [[NSMutableData alloc] initWithBytes:bytes length:bytesRead];
+
+ if ([currentChunk length] > 0) {
+ NSUInteger offset = 0;
+ fileEncoding = [self textEncodingForData:currentChunk offset:&offset];
+ if (offset > 0) {
+ // strip off the text encoding bytes
+ [currentChunk replaceBytesInRange:NSMakeRange(0, offset) withBytes:NULL];
+ }
+ }
+}
+
- (NSStringEncoding) textEncodingForData:(NSData *)chunkToSniff offset:(NSUInteger *)offset {
NSUInteger length = [chunkToSniff length];
*offset = 0;
@@ -256,11 +269,13 @@ - (void) setDelimiter:(NSString *)newDelimiter {
#pragma mark Parsing methods
- (void) readNextChunk {
- NSData * nextChunk = nil;
+ NSData *nextChunk = nil;
@try {
- nextChunk = [csvFileHandle readDataOfLength:CHUNK_SIZE];
+ uint8_t bytes[CHUNK_SIZE];
+ NSUInteger bytesRead = [csvReadStream read:bytes maxLength:CHUNK_SIZE];
+ nextChunk = [NSData dataWithBytes:bytes length:bytesRead];
}
- @catch (NSException * e) {
+ @catch (NSException *e) {
error = [[NSError alloc] initWithDomain:@"com.davedelong.csv" code:0 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:
e, NSUnderlyingErrorKey,
[e reason], NSLocalizedDescriptionKey,
@@ -293,21 +308,21 @@ - (void) readNextChunk {
[currentChunk replaceBytesInRange:NSMakeRange(0, readLength) withBytes:NULL length:0];
}
- if ([nextChunk length] < CHUNK_SIZE) {
- doneReadingFile = YES;
+ if ([csvReadStream streamStatus] == NSStreamStatusAtEnd) {
+ endOfStreamReached = YES;
}
}
- (NSString *) nextCharacter {
- if (doneReadingFile == NO && stringIndex >= [currentChunkString length]/2) {
+ if (endOfStreamReached == NO && stringIndex >= [currentChunkString length]/2) {
[self readNextChunk];
}
if (stringIndex >= [currentChunkString length]) { return nil; }
if ([currentChunkString length] == 0) { return nil; }
NSRange charRange = [currentChunkString rangeOfComposedCharacterSequenceAtIndex:stringIndex];
- NSString * nextChar = [currentChunkString substringWithRange:charRange];
+ NSString *nextChar = [currentChunkString substringWithRange:charRange];
stringIndex = charRange.location + charRange.length;
return nextChar;
}
@@ -327,11 +342,11 @@ - (void) parse {
}
- (void) runParseLoop {
- NSString * currentCharacter = nil;
- NSString * previousCharacter = nil;
- NSString * previousPreviousCharacter = nil;
+ NSString *currentCharacter = nil;
+ NSString *previousCharacter = nil;
+ NSString *previousPreviousCharacter = nil;
- NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
+ NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
unsigned char counter = 0;
while (error == nil &&
@@ -425,7 +440,7 @@ - (void) processComposedCharacter:(NSString *)currentCharacter previousCharacter
}
} else {
if (previousUnichar == UNICHAR_QUOTE && previousPreviousUnichar != UNICHAR_BACKSLASH && balancedQuotes == YES && balancedEscapes == YES) {
- NSString * reason = [NSString stringWithFormat:@"Invalid CSV format on line #%lu immediately after \"%@\"", currentLine, currentField];
+ NSString *reason = [NSString stringWithFormat:@"Invalid CSV format on line #%lu immediately after \"%@\"", currentLine, currentField];
error = [[NSError alloc] initWithDomain:@"com.davedelong.csv" code:0 userInfo:[NSDictionary dictionaryWithObject:reason forKey:NSLocalizedDescriptionKey]];
return;
}
@@ -482,7 +497,7 @@ - (void) finishCurrentField {
nextSlash = [currentField rangeOfString:STRING_BACKSLASH options:NSLiteralSearch range:nextSearchRange];
}
- NSString * field = [currentField copy];
+ NSString *field = [currentField copy];
[[self parserDelegate] parser:self didReadField:field];
[field release];
@@ -8,9 +8,7 @@
/* Begin PBXBuildFile section */
5516BCB512578CFC0025F235 /* NSString+CHCSVAdditions.m in Sources */ = {isa = PBXBuildFile; fileRef = 5516BCB412578CFC0025F235 /* NSString+CHCSVAdditions.m */; };
- 5516BCB912578D750025F235 /* CHCSVSupport.m in Sources */ = {isa = PBXBuildFile; fileRef = 5516BCB812578D750025F235 /* CHCSVSupport.m */; };
5516BCBB12578EA90025F235 /* NSString+CHCSVAdditions.m in Sources */ = {isa = PBXBuildFile; fileRef = 5516BCB412578CFC0025F235 /* NSString+CHCSVAdditions.m */; };
- 5516BCBC12578EAD0025F235 /* CHCSVSupport.m in Sources */ = {isa = PBXBuildFile; fileRef = 5516BCB812578D750025F235 /* CHCSVSupport.m */; };
551981D61203715400FBE033 /* CHCSVParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 551981D51203715400FBE033 /* CHCSVParser.m */; };
5538B52D1344F0A1004930DD /* test.tsv in Resources */ = {isa = PBXBuildFile; fileRef = 5538B52C1344F0A1004930DD /* test.tsv */; };
557FCEB61203F938009FCDBA /* CoreServices.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 557FCEB51203F938009FCDBA /* CoreServices.framework */; };
@@ -46,8 +44,6 @@
5516BCB312578CFC0025F235 /* NSString+CHCSVAdditions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSString+CHCSVAdditions.h"; sourceTree = "<group>"; };
5516BCB412578CFC0025F235 /* NSString+CHCSVAdditions.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "NSString+CHCSVAdditions.m"; sourceTree = "<group>"; };
5516BCB612578D480025F235 /* CHCSV.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CHCSV.h; sourceTree = "<group>"; };
- 5516BCB712578D750025F235 /* CHCSVSupport.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CHCSVSupport.h; sourceTree = "<group>"; };
- 5516BCB812578D750025F235 /* CHCSVSupport.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CHCSVSupport.m; sourceTree = "<group>"; };
551981D41203715400FBE033 /* CHCSVParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CHCSVParser.h; sourceTree = "<group>"; };
551981D51203715400FBE033 /* CHCSVParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CHCSVParser.m; sourceTree = "<group>"; };
551981EE1203800300FBE033 /* Test.csv */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = Test.csv; sourceTree = "<group>"; };
@@ -137,8 +133,6 @@
551981D51203715400FBE033 /* CHCSVParser.m */,
55EFC7B71210608C0070B303 /* CHCSVWriter.h */,
55EFC7B81210608C0070B303 /* CHCSVWriter.m */,
- 5516BCB712578D750025F235 /* CHCSVSupport.h */,
- 5516BCB812578D750025F235 /* CHCSVSupport.m */,
557FD0411204A45D009FCDBA /* NSArray+CHCSVAdditions.h */,
557FD0421204A45D009FCDBA /* NSArray+CHCSVAdditions.m */,
5516BCB312578CFC0025F235 /* NSString+CHCSVAdditions.h */,
@@ -257,7 +251,6 @@
557FD05D1204A731009FCDBA /* UnitTests.m in Sources */,
55EFC7B91210608C0070B303 /* CHCSVWriter.m in Sources */,
5516BCB512578CFC0025F235 /* NSString+CHCSVAdditions.m in Sources */,
- 5516BCB912578D750025F235 /* CHCSVSupport.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -269,7 +262,6 @@
551981D61203715400FBE033 /* CHCSVParser.m in Sources */,
557FD0431204A45D009FCDBA /* NSArray+CHCSVAdditions.m in Sources */,
5516BCBB12578EA90025F235 /* NSString+CHCSVAdditions.m in Sources */,
- 5516BCBC12578EAD0025F235 /* CHCSVSupport.m in Sources */,
55BB40E612A367A500A1BB33 /* CHCSVWriter.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
Oops, something went wrong.

0 comments on commit 5985821

Please sign in to comment.