Permalink
Browse files

adding URLState/EmailState

  • Loading branch information...
itod committed Apr 1, 2010
1 parent 66e813d commit 37311564f9fab5c23fc953c819db6574a19792b0
@@ -242,6 +242,16 @@
D355C8810FEB36A1006A91A4 /* xml.grammar in Resources */ = {isa = PBXBuildFile; fileRef = D355C8800FEB36A1006A91A4 /* xml.grammar */; };
D355C8840FEB4B94006A91A4 /* proto.grammar in Resources */ = {isa = PBXBuildFile; fileRef = D355C8830FEB4B94006A91A4 /* proto.grammar */; };
D3587EAF0FE83EC900DDD023 /* AppKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0867D6A5FE840307C02AAC07 /* AppKit.framework */; };
+ D35F4A8711643630003811F3 /* PKEmailState.m in Sources */ = {isa = PBXBuildFile; fileRef = D35F4A8511643630003811F3 /* PKEmailState.m */; };
+ D35F4A8811643630003811F3 /* PKURLState.m in Sources */ = {isa = PBXBuildFile; fileRef = D35F4A8611643630003811F3 /* PKURLState.m */; };
+ D35F4A8911643630003811F3 /* PKEmailState.m in Sources */ = {isa = PBXBuildFile; fileRef = D35F4A8511643630003811F3 /* PKEmailState.m */; };
+ D35F4A8A11643630003811F3 /* PKURLState.m in Sources */ = {isa = PBXBuildFile; fileRef = D35F4A8611643630003811F3 /* PKURLState.m */; };
+ D35F4A8D11643662003811F3 /* PKEmailState.h in Headers */ = {isa = PBXBuildFile; fileRef = D35F4A8B11643662003811F3 /* PKEmailState.h */; settings = {ATTRIBUTES = (Public, ); }; };
+ D35F4A8E11643662003811F3 /* PKURLState.h in Headers */ = {isa = PBXBuildFile; fileRef = D35F4A8C11643662003811F3 /* PKURLState.h */; settings = {ATTRIBUTES = (Public, ); }; };
+ D35F4A8F11643662003811F3 /* PKEmailState.h in Headers */ = {isa = PBXBuildFile; fileRef = D35F4A8B11643662003811F3 /* PKEmailState.h */; settings = {ATTRIBUTES = (Public, ); }; };
+ D35F4A9011643662003811F3 /* PKURLState.h in Headers */ = {isa = PBXBuildFile; fileRef = D35F4A8C11643662003811F3 /* PKURLState.h */; settings = {ATTRIBUTES = (Public, ); }; };
+ D35F4AA21164380C003811F3 /* TDURLStateTest.m in Sources */ = {isa = PBXBuildFile; fileRef = D35F4AA11164380C003811F3 /* TDURLStateTest.m */; };
+ D35F4ABA11643979003811F3 /* TDEmailStateTest.m in Sources */ = {isa = PBXBuildFile; fileRef = D35F4AB911643979003811F3 /* TDEmailStateTest.m */; };
D36568A70EEF9FE900226554 /* TDPlistParser.m in Sources */ = {isa = PBXBuildFile; fileRef = D36568A60EEF9FE900226554 /* TDPlistParser.m */; };
D36568AA0EEFA05300226554 /* TDPlistParserTest.m in Sources */ = {isa = PBXBuildFile; fileRef = D36568A90EEFA05300226554 /* TDPlistParserTest.m */; };
D3656DFE0EF2620E00226554 /* TDTokenArraySourceTest.m in Sources */ = {isa = PBXBuildFile; fileRef = D3656DFD0EF2620E00226554 /* TDTokenArraySourceTest.m */; };
@@ -922,6 +932,14 @@
D355C6300FE9EFEA006A91A4 /* TDNSPredicateEvaluatorTest.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TDNSPredicateEvaluatorTest.m; path = test/TDNSPredicateEvaluatorTest.m; sourceTree = "<group>"; };
D355C8800FEB36A1006A91A4 /* xml.grammar */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = xml.grammar; path = res/xml.grammar; sourceTree = "<group>"; };
D355C8830FEB4B94006A91A4 /* proto.grammar */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = proto.grammar; path = res/proto.grammar; sourceTree = "<group>"; };
+ D35F4A8511643630003811F3 /* PKEmailState.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = PKEmailState.m; path = src/PKEmailState.m; sourceTree = "<group>"; };
+ D35F4A8611643630003811F3 /* PKURLState.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = PKURLState.m; path = src/PKURLState.m; sourceTree = "<group>"; };
+ D35F4A8B11643662003811F3 /* PKEmailState.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PKEmailState.h; path = include/ParseKit/PKEmailState.h; sourceTree = "<group>"; };
+ D35F4A8C11643662003811F3 /* PKURLState.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PKURLState.h; path = include/ParseKit/PKURLState.h; sourceTree = "<group>"; };
+ D35F4AA01164380C003811F3 /* TDURLStateTest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TDURLStateTest.h; path = test/TDURLStateTest.h; sourceTree = "<group>"; };
+ D35F4AA11164380C003811F3 /* TDURLStateTest.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TDURLStateTest.m; path = test/TDURLStateTest.m; sourceTree = "<group>"; };
+ D35F4AB811643979003811F3 /* TDEmailStateTest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TDEmailStateTest.h; path = test/TDEmailStateTest.h; sourceTree = "<group>"; };
+ D35F4AB911643979003811F3 /* TDEmailStateTest.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TDEmailStateTest.m; path = test/TDEmailStateTest.m; sourceTree = "<group>"; };
D36568A50EEF9FE900226554 /* TDPlistParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TDPlistParser.h; path = test/TDPlistParser.h; sourceTree = "<group>"; };
D36568A60EEF9FE900226554 /* TDPlistParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TDPlistParser.m; path = test/TDPlistParser.m; sourceTree = "<group>"; };
D36568A80EEFA05300226554 /* TDPlistParserTest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TDPlistParserTest.h; path = test/TDPlistParserTest.h; sourceTree = "<group>"; };
@@ -1524,6 +1542,10 @@
D34BAECE0FF9D56400D7773A /* PKSingleLineCommentState.m */,
D3C221E70FFE8C60004514FE /* PKMultiLineCommentState.h */,
D34BAEC80FF9D56400D7773A /* PKMultiLineCommentState.m */,
+ D35F4A8B11643662003811F3 /* PKEmailState.h */,
+ D35F4A8511643630003811F3 /* PKEmailState.m */,
+ D35F4A8C11643662003811F3 /* PKURLState.h */,
+ D35F4A8611643630003811F3 /* PKURLState.m */,
D3541F890DFB108300429B4F /* symbol */,
);
name = states;
@@ -1561,6 +1583,10 @@
D3DDDAFD0F083C9700A58000 /* TDCommentStateTest.m */,
D3E39C3D0FC5FFD10022DAB9 /* TDDelimitStateTest.h */,
D3E39C3E0FC5FFD10022DAB9 /* TDDelimitStateTest.m */,
+ D35F4AA01164380C003811F3 /* TDURLStateTest.h */,
+ D35F4AA11164380C003811F3 /* TDURLStateTest.m */,
+ D35F4AB811643979003811F3 /* TDEmailStateTest.h */,
+ D35F4AB911643979003811F3 /* TDEmailStateTest.m */,
);
name = "tokenizer states";
sourceTree = "<group>";
@@ -2376,6 +2402,8 @@
D3C2222E0FFE8DEE004514FE /* NSString+ParseKitAdditions.h in Headers */,
D3F0E2480FFE8EB900C9DF74 /* PKQuoteState.h in Headers */,
D3376D5910093A1600E4602E /* PKGrammarParser.h in Headers */,
+ D35F4A8D11643662003811F3 /* PKEmailState.h in Headers */,
+ D35F4A8E11643662003811F3 /* PKURLState.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -2484,6 +2512,8 @@
D3F0E2490FFE8EB900C9DF74 /* PKQuoteState.h in Headers */,
D3F0E3CB0FFEB70100C9DF74 /* PKNumber.h in Headers */,
D37A28681013942A00E936B7 /* PKGrammarParser.h in Headers */,
+ D35F4A8F11643662003811F3 /* PKEmailState.h in Headers */,
+ D35F4A9011643662003811F3 /* PKURLState.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -2853,6 +2883,8 @@
D34BAFDE0FF9E95500D7773A /* PKParserFactory.m in Sources */,
D3126D060FFD9BA700CBF4C4 /* PKNegation.m in Sources */,
D3376D5A10093A1600E4602E /* PKGrammarParser.m in Sources */,
+ D35F4A8711643630003811F3 /* PKEmailState.m in Sources */,
+ D35F4A8811643630003811F3 /* PKURLState.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -3075,6 +3107,8 @@
D38E97CC1061CF6500739C39 /* TDTokenizerBlocksTest.m in Sources */,
D38E98D81062C5BA00739C39 /* TDParserBlocksTest.m in Sources */,
D319E42E106D8A31008C63DD /* TDArithmeticAssembler.m in Sources */,
+ D35F4AA21164380C003811F3 /* TDURLStateTest.m in Sources */,
+ D35F4ABA11643979003811F3 /* TDEmailStateTest.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -3135,6 +3169,8 @@
D3126DEC0FFDBC1D00CBF4C4 /* PKNegation.m in Sources */,
D3F0E3CC0FFEB70700C9DF74 /* PKNumber.m in Sources */,
D37A28671013942300E936B7 /* PKGrammarParser.m in Sources */,
+ D35F4A8911643630003811F3 /* PKEmailState.m in Sources */,
+ D35F4A8A11643630003811F3 /* PKURLState.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -49,7 +49,8 @@
/*!
@property balancesEOFTerminatedStrings
- @brief if true, this state will append a matching end delimiter marker (e.g. <tt>--></tt> or <tt>%></tt>) to strings terminated by EOF. Default is NO.
+ @brief if YES, this state will append a matching end delimiter marker (e.g. <tt>--></tt> or <tt>%></tt>) to strings terminated by EOF.
+ @details Default is NO.
*/
@property (nonatomic) BOOL balancesEOFTerminatedStrings;
@@ -0,0 +1,21 @@
+//
+// PKEmailState.h
+// ParseKit
+//
+// Created by Todd Ditchendorf on 3/31/10.
+// Copyright 2010 Todd Ditchendorf. All rights reserved.
+//
+
+#import <Foundation/Foundation.h>
+#import <ParseKit/PKTokenizerState.h>
+
+/*!
+ @class PKEmailState
+ @brief An email state returns an email address from a reader.
+ @details
+*/
+@interface PKEmailState : PKTokenizerState {
+ PKUniChar c;
+}
+
+@end
View
@@ -35,7 +35,9 @@ typedef enum {
PKTokenTypeWhitespace,
PKTokenTypeComment,
PKTokenTypeDelimitedString,
- PKTokenTypeAny
+ PKTokenTypeAny,
+ PKTokenTypeURL,
+ PKTokenTypeEmail
} PKTokenType;
/*!
@@ -55,6 +57,8 @@ typedef enum {
BOOL whitespace;
BOOL comment;
BOOL delimitedString;
+ BOOL URL;
+ BOOL email;
id value;
NSUInteger offset;
@@ -140,6 +144,18 @@ typedef enum {
*/
@property (nonatomic, readonly, getter=isDelimitedString) BOOL delimitedString;
+/*!
+ @property URL
+ @brief True if this token is a URL. getter=isURL
+*/
+@property (nonatomic, readonly, getter=isURL) BOOL URL;
+
+/*!
+ @property email
+ @brief True if this token is an email address. getter=isEmail
+*/
+@property (nonatomic, readonly, getter=isEmailString) BOOL email;
+
/*!
@property tokenType
@brief The type of this token.
@@ -24,6 +24,8 @@
@class PKWhitespaceState;
@class PKWordState;
@class PKDelimitState;
+@class PKURLState;
+@class PKEmailState;
@class PKReader;
/*!
@@ -36,8 +38,8 @@
@code
From To State
0 ' ' whitespaceState
- 'a' 'z' wordState
- 'A' 'Z' wordState
+ 'a' 'z' URLState
+ 'A' 'Z' URLState
160 255 wordState
'0' '9' numberState
'-' '-' numberState
@@ -61,6 +63,8 @@
PKWhitespaceState *whitespaceState;
PKWordState *wordState;
PKDelimitState *delimitState;
+ PKURLState *URLState;
+ PKEmailState *emailState;
}
/*!
@@ -153,4 +157,7 @@
@brief The state this tokenizer uses to build delimited strings.
*/
@property (nonatomic, retain) PKDelimitState *delimitState;
+
+@property (nonatomic, retain) PKURLState *URLState;
+@property (nonatomic, retain) PKEmailState *emailState;
@end
@@ -22,7 +22,7 @@
/*!
@class PKTokenizerState
@brief A <tt>PKTokenizerState</tt> returns a token, given a reader, an initial character read from the reader, and a tokenizer that is conducting an overall tokenization of the reader.
- @details The tokenizer will typically have a character state table that decides which state to use, depending on an initial character. If a single character is insufficient, a state such as <tt>PKCommentState</tt> will read a second character, and may delegate to another state, such as <tt>PKSingleLineState</tt>. This prospect of delegation is the reason that the <tt>-nextToken</tt> method has a tokenizer argument.
+ @details The tokenizer will typically have a character state table that decides which state to use, depending on an initial character. If a single character is insufficient, a state such as <tt>PKCommentState</tt> will read a second character, and may delegate to another state, such as <tt>PKSingleLineCommentState</tt>. This prospect of delegation is the reason that the <tt>-nextToken</tt> method has a tokenizer argument.
*/
@interface PKTokenizerState : NSObject {
NSMutableString *stringbuf;
@@ -0,0 +1,23 @@
+//
+// PKURLState.h
+// ParseKit
+//
+// Created by Todd Ditchendorf on 3/26/10.
+// Copyright 2010 Todd Ditchendorf. All rights reserved.
+//
+
+#import <Foundation/Foundation.h>
+#import <ParseKit/PKTokenizerState.h>
+
+/*!
+ @class PKURLState
+ @brief A URL state returns a URL from a reader.
+ @details
+*/
+@interface PKURLState : PKTokenizerState {
+ PKUniChar c;
+ BOOL allowsWWWPrefix;
+}
+
+@property (nonatomic) BOOL allowsWWWPrefix;
+@end
@@ -62,6 +62,8 @@
#import <ParseKit/PKNumberState.h>
#import <ParseKit/PKQuoteState.h>
#import <ParseKit/PKDelimitState.h>
+#import <ParseKit/PKURLState.h>
+#import <ParseKit/PKEmailState.h>
#import <ParseKit/PKCommentState.h>
#import <ParseKit/PKSingleLineCommentState.h>
#import <ParseKit/PKMultiLineCommentState.h>
View
@@ -0,0 +1,110 @@
+//
+// PKEmailState.m
+// ParseKit
+//
+// Created by Todd Ditchendorf on 3/31/10.
+// Copyright 2010 Todd Ditchendorf. All rights reserved.
+//
+
+#import <ParseKit/PKEmailState.h>
+#import <ParseKit/PKReader.h>
+#import <ParseKit/PKTokenizer.h>
+#import <ParseKit/PKToken.h>
+#import <ParseKit/PKTypes.h>
+
+@interface PKToken ()
+@property (nonatomic, readwrite) NSUInteger offset;
+@end
+
+@interface PKTokenizerState ()
+- (void)resetWithReader:(PKReader *)r;
+- (void)append:(PKUniChar)c;
+- (NSString *)bufferedString;
+- (PKTokenizerState *)nextTokenizerStateFor:(PKUniChar)c tokenizer:(PKTokenizer *)t;
+@end
+
+@interface PKEmailState ()
+- (BOOL)parseNameFromReader:(PKReader *)r;
+- (BOOL)parseHostFromReader:(PKReader *)r;
+@end
+
+@implementation PKEmailState
+
+- (void)dealloc {
+ [super dealloc];
+}
+
+
+- (PKToken *)nextTokenFromReader:(PKReader *)r startingWith:(PKUniChar)cin tokenizer:(PKTokenizer *)t {
+ NSParameterAssert(r);
+ [self resetWithReader:r];
+
+ c = cin;
+ BOOL matched = [self parseNameFromReader:r];
+ if (matched) {
+ matched = [self parseHostFromReader:r];
+ }
+
+ if (PKEOF != c) {
+ [r unread];
+ }
+
+ NSString *s = [self bufferedString];
+ if (matched) {
+ PKToken *tok = [PKToken tokenWithTokenType:PKTokenTypeEmail stringValue:s floatValue:0.0];
+ tok.offset = offset;
+ return tok;
+ } else {
+ [r unread:[s length] - 1];
+ return [[self nextTokenizerStateFor:cin tokenizer:t] nextTokenFromReader:r startingWith:cin tokenizer:t];
+ }
+}
+
+
+- (BOOL)parseNameFromReader:(PKReader *)r {
+ BOOL result = NO;
+ BOOL hasAtLeastOneChar = NO;
+
+ for (;;) {
+ if (PKEOF == c || isspace(c)) {
+ result = NO;
+ break;
+ } else if ('@' == c && hasAtLeastOneChar) {
+ [self append:c];
+ result = YES;
+ break;
+ } else {
+ hasAtLeastOneChar = YES;
+ [self append:c];
+ c = [r read];
+ }
+ }
+
+ return result;
+}
+
+
+- (BOOL)parseHostFromReader:(PKReader *)r {
+ BOOL result = NO;
+ BOOL hasAtLeastOneChar = NO;
+ BOOL hasDot = NO;
+
+ // ^[:space:]()<>/
+ for (;;) {
+ if (PKEOF == c || isspace(c) || '(' == c || ')' == c || '<' == c || '>' == c || '/' == c) {
+ result = hasAtLeastOneChar && hasDot;
+ break;
+ } else {
+ if ('.' == c) {
+ hasDot = YES;
+ }
+ hasAtLeastOneChar = YES;
+ [self append:c];
+ c = [r read];
+ }
+ }
+
+ return result;
+}
+
+@end
View
@@ -34,7 +34,7 @@ - (id)initWithParent:(PKSymbolNode *)p character:(PKUniChar)c {
self.children = [NSMutableDictionary dictionary];
// this private property is an optimization.
- // cache the NSString for the char to prevent it being constantly recreated in -determinAncestry
+ // cache the NSString for the char to prevent it being constantly recreated in -determineAncestry
self.string = [NSString stringWithFormat:@"%C", character];
[self determineAncestry];
Oops, something went wrong.

0 comments on commit 3731156

Please sign in to comment.