Skip to content

Commit

Permalink
Cleanup of HEParser
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Gordon committed Dec 17, 2009
1 parent c9e45c6 commit 0b9bf0f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 27 deletions.
34 changes: 8 additions & 26 deletions HEParser.m
Expand Up @@ -15,6 +15,7 @@
@interface HEParser ()

+ (id)autodetectAndParseURL:(NSString *)urlString feedObject:(NSManagedObject *)feed allowHTMLIndirection:(BOOL)allowHTMLIndirection;
+ (NSString *)decodingXMLEntitiesInString:(NSString *)entStr;

@end

Expand Down Expand Up @@ -113,28 +114,24 @@ + (id)autodetectAndParseURL:(NSString *)urlString feedObject:(NSManagedObject *)
{
if (![urlString length])
return nil;

NSLog(@"Autodetect and parse URL = %@", urlString);


//Sanatize the URL
if (![urlString isMatchedByRegex:@"^[A-Za-z_\\-]+://"])
{
urlString = [@"http://" stringByAppendingString:urlString];
}
NSLog(@"\t urlString = '%@'", urlString);

//FIXME: Use NSURLConnection throughout to handle redirects

NSURL *url = [NSURL URLWithString:urlString];
NSLog(@"\t url = '%@'", url);
NSLog(@"Parsing URL = '%@'", url);

NSStringEncoding stringEncoding = NSUTF8StringEncoding;
NSError *err = nil;
NSString *string = [NSString stringWithContentsOfURL:url usedEncoding:&stringEncoding error:&err];
NSLog(@"\t [string length] = '%d'", [string length]);
NSLog(@"\t err = '%@'", err);
if (err || ![string length])
{
NSLog(@"\t Error getting URL = '%@'", err);
return nil;
}

Expand All @@ -143,7 +140,6 @@ + (id)autodetectAndParseURL:(NSString *)urlString feedObject:(NSManagedObject *)
//RSS
if ([string isMatchedByRegex:@"<rss\\b"])
{
NSLog(@"Matched rss");
HERSSParser *parser = [[HERSSParser alloc] init];
parser.stringContents = string;
return parser;
Expand All @@ -152,7 +148,6 @@ + (id)autodetectAndParseURL:(NSString *)urlString feedObject:(NSManagedObject *)
//Atom
else if ([string isMatchedByRegex:@"<feed\\b"])
{
NSLog(@"Matched feed");
HEAtomParser *parser = [[HEAtomParser alloc] init];
parser.stringContents = string;
return parser;
Expand All @@ -161,19 +156,13 @@ + (id)autodetectAndParseURL:(NSString *)urlString feedObject:(NSManagedObject *)
//HTML
else if (allowHTMLIndirection)
{
//<\s*link.+?type\s*=\s*("|')application/rss(\+xml)?("|')[^>]*>

//application/(rss/atom)\+xml

//href=("|')([^"']+)("|')
//FIXME: Parsing the HTML with regexes is a bit of a hack, and there are many (albeit unlikely) cases that could trip it up. Replace with something better

//Extract a link tag
NSString *linkTag = [string stringByMatching:@"<\\s*link[^>]+type\\s*=\\s*(\"|')application/(atom|rss)(\\+xml)?(\"|')[^>]*>"];
NSLog(@"\t linkTag = %@", linkTag);

//Extract the href
NSArray *components = [linkTag captureComponentsMatchedByRegex:@"href=(\"|')([^\"']+)(\"|')"];
NSLog(@"\t components = %@", components);
if ([components count] == 4)
{
NSMutableString *href = [self decodingXMLEntitiesInString:[components objectAtIndex:2]];
Expand All @@ -183,10 +172,7 @@ + (id)autodetectAndParseURL:(NSString *)urlString feedObject:(NSManagedObject *)
}
}

NSLog(@"Nothing?");

//NSLog(@"Nothing? %@", string);

NSLog(@"\t Contents appears to be invalid");

return nil;
}
Expand All @@ -213,9 +199,7 @@ - (void)parseIntoContext:(NSManagedObjectContext *)ctx
{
xml = [[NSXMLDocument alloc] initWithContentsOfURL:url options:NSXMLDocumentTidyXML error:&err];
}

//NSLog(@"Parsing = %@", xml);


if (err || !xml)
{
NSLog(@"Error parsing XML document: %@", err);
Expand Down Expand Up @@ -276,9 +260,7 @@ - (BOOL)addSubelementFrom:(NSXMLElement *)xmlElement name:(NSString *)subelement
NSString *string = [self subElementStringFrom:xmlElement name:subelementName];
if (string)
[mo setValue:string forKey:key];

//NSLog(@"Set key/value pair: %@, %@", key, string);


return YES;
}

Expand Down
1 change: 0 additions & 1 deletion HERefresher.m
Expand Up @@ -38,7 +38,6 @@ - (id)init
- (void)scheduleTimer
{
NSInteger refreshIntervalInt = [[NSUserDefaults standardUserDefaults] integerForKey:@"HERefreshInterval"];
NSLog(@"refreshIntervalInt = %d", refreshIntervalInt);
NSTimeInterval refreshInterval = 0.0;
if (refreshIntervalInt >= 30) //30 seconds is the minimum
refreshInterval = (NSTimeInterval)refreshIntervalInt;
Expand Down

0 comments on commit 0b9bf0f

Please sign in to comment.