Skip to content

Commit

Permalink
Merge pull request #36 from siuying/HTML
Browse files Browse the repository at this point in the history
Add HTML parser supports
  • Loading branch information
ZaBlanc committed Jan 15, 2014
2 parents 2112698 + a2aaad7 commit 76b59ec
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 0 deletions.
4 changes: 4 additions & 0 deletions RaptureXML.xcodeproj/project.pbxproj
Expand Up @@ -45,6 +45,7 @@
02F3A3FF1526D22600E8C822 /* players.xml in Resources */ = {isa = PBXBuildFile; fileRef = 0DEB8F2D14681BD800024989 /* players.xml */; }; 02F3A3FF1526D22600E8C822 /* players.xml in Resources */ = {isa = PBXBuildFile; fileRef = 0DEB8F2D14681BD800024989 /* players.xml */; };
0DEB8EB51467EC9B00024989 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0252B2C5142ADFC60018B75D /* Foundation.framework */; }; 0DEB8EB51467EC9B00024989 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0252B2C5142ADFC60018B75D /* Foundation.framework */; };
0DEB8F2C14681A9400024989 /* libz.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 0252B305142AE3FF0018B75D /* libz.dylib */; }; 0DEB8F2C14681A9400024989 /* libz.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 0252B305142AE3FF0018B75D /* libz.dylib */; };
5415BE4216FC638100AFC566 /* HTMLTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 5415BE4116FC638100AFC566 /* HTMLTests.m */; };
6BD1BDA91558B91400F1D055 /* RXMLElement.h in Copy Headers */ = {isa = PBXBuildFile; fileRef = 027DAC2E14FBF443001BA563 /* RXMLElement.h */; }; 6BD1BDA91558B91400F1D055 /* RXMLElement.h in Copy Headers */ = {isa = PBXBuildFile; fileRef = 027DAC2E14FBF443001BA563 /* RXMLElement.h */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */


Expand Down Expand Up @@ -88,6 +89,7 @@
0DEB8F2B14681A0800024989 /* Tests-Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "Tests-Prefix.pch"; path = "Tests/Tests-Prefix.pch"; sourceTree = SOURCE_ROOT; }; 0DEB8F2B14681A0800024989 /* Tests-Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "Tests-Prefix.pch"; path = "Tests/Tests-Prefix.pch"; sourceTree = SOURCE_ROOT; };
0DEB8F2D14681BD800024989 /* players.xml */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; name = players.xml; path = Tests/players.xml; sourceTree = SOURCE_ROOT; }; 0DEB8F2D14681BD800024989 /* players.xml */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; name = players.xml; path = Tests/players.xml; sourceTree = SOURCE_ROOT; };
1413670716D9BEC700501ABB /* CopyTests.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CopyTests.m; path = Tests/CopyTests.m; sourceTree = SOURCE_ROOT; }; 1413670716D9BEC700501ABB /* CopyTests.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CopyTests.m; path = Tests/CopyTests.m; sourceTree = SOURCE_ROOT; };
5415BE4116FC638100AFC566 /* HTMLTests.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = HTMLTests.m; path = Tests/HTMLTests.m; sourceTree = SOURCE_ROOT; };
6BD1BD951558B7A800F1D055 /* RaptureXML-StaticLib-Prefix.pch */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "RaptureXML-StaticLib-Prefix.pch"; sourceTree = "<group>"; }; 6BD1BD951558B7A800F1D055 /* RaptureXML-StaticLib-Prefix.pch */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "RaptureXML-StaticLib-Prefix.pch"; sourceTree = "<group>"; };
/* End PBXFileReference section */ /* End PBXFileReference section */


Expand Down Expand Up @@ -168,6 +170,7 @@
02F3A4041526D7BC00E8C822 /* EncodingTests.m */, 02F3A4041526D7BC00E8C822 /* EncodingTests.m */,
027B6BCF153C652E00A4EDF2 /* XPathTests.m */, 027B6BCF153C652E00A4EDF2 /* XPathTests.m */,
02ADE6A116A0E33A008643D5 /* AttributeTests.m */, 02ADE6A116A0E33A008643D5 /* AttributeTests.m */,
5415BE4116FC638100AFC566 /* HTMLTests.m */,
); );
name = Tests; name = Tests;
path = RaptureXMLTests; path = RaptureXMLTests;
Expand Down Expand Up @@ -353,6 +356,7 @@
02ADE6A916A0E491008643D5 /* EncodingTests.m in Sources */, 02ADE6A916A0E491008643D5 /* EncodingTests.m in Sources */,
02ADE6AA16A0E491008643D5 /* XPathTests.m in Sources */, 02ADE6AA16A0E491008643D5 /* XPathTests.m in Sources */,
02565F9916E6320700A882F9 /* CopyTests.m in Sources */, 02565F9916E6320700A882F9 /* CopyTests.m in Sources */,
5415BE4216FC638100AFC566 /* HTMLTests.m in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
Expand Down
13 changes: 13 additions & 0 deletions RaptureXML/RXMLElement.h
Expand Up @@ -31,6 +31,7 @@
#import <Foundation/Foundation.h> #import <Foundation/Foundation.h>
#import <libxml2/libxml/xmlreader.h> #import <libxml2/libxml/xmlreader.h>
#import <libxml2/libxml/xmlmemory.h> #import <libxml2/libxml/xmlmemory.h>
#import <libxml2/libxml/HTMLparser.h>
#import <libxml/xpath.h> #import <libxml/xpath.h>
#import <libxml/xpathInternals.h> #import <libxml/xpathInternals.h>


Expand All @@ -55,6 +56,12 @@
- (id)initFromXMLData:(NSData *)data; - (id)initFromXMLData:(NSData *)data;
- (id)initFromXMLDoc:(RXMLDocHolder *)doc node:(xmlNodePtr)node; - (id)initFromXMLDoc:(RXMLDocHolder *)doc node:(xmlNodePtr)node;


- (id)initFromHTMLString:(NSString *)xmlString encoding:(NSStringEncoding)encoding;
- (id)initFromHTMLFile:(NSString *)filename;
- (id)initFromHTMLFile:(NSString *)filename fileExtension:(NSString*)extension;
- (id)initFromHTMLFilePath:(NSString *)fullPath;
- (id)initFromHTMLData:(NSData *)data;

+ (id)elementFromXMLString:(NSString *)xmlString encoding:(NSStringEncoding)encoding; + (id)elementFromXMLString:(NSString *)xmlString encoding:(NSStringEncoding)encoding;
+ (id)elementFromXMLFile:(NSString *)filename; + (id)elementFromXMLFile:(NSString *)filename;
+ (id)elementFromXMLFilename:(NSString *)filename fileExtension:(NSString *)extension; + (id)elementFromXMLFilename:(NSString *)filename fileExtension:(NSString *)extension;
Expand All @@ -63,6 +70,12 @@
+ (id)elementFromXMLData:(NSData *)data; + (id)elementFromXMLData:(NSData *)data;
+ (id)elementFromXMLDoc:(RXMLDocHolder *)doc node:(xmlNodePtr)node; + (id)elementFromXMLDoc:(RXMLDocHolder *)doc node:(xmlNodePtr)node;


+ (id)elementFromHTMLString:(NSString *)xmlString encoding:(NSStringEncoding)encoding;
+ (id)elementFromHTMLFile:(NSString *)filename;
+ (id)elementFromHTMLFile:(NSString *)filename fileExtension:(NSString*)extension;
+ (id)elementFromHTMLFilePath:(NSString *)fullPath;
+ (id)elementFromHTMLData:(NSData *)data;

- (NSString *)attribute:(NSString *)attributeName; - (NSString *)attribute:(NSString *)attributeName;
- (NSString *)attribute:(NSString *)attributeName inNamespace:(NSString *)ns; - (NSString *)attribute:(NSString *)attributeName inNamespace:(NSString *)ns;


Expand Down
56 changes: 56 additions & 0 deletions RaptureXML/RXMLElement.m
Expand Up @@ -102,6 +102,42 @@ - (id)initFromXMLDoc:(RXMLDocHolder *)doc node:(xmlNodePtr)node {
return self; return self;
} }


- (id)initFromHTMLString:(NSString *)xmlString encoding:(NSStringEncoding)encoding {
return [self initFromHTMLData:[xmlString dataUsingEncoding:encoding]];
}

- (id)initFromHTMLFile:(NSString *)filename {
NSString *fullPath = [[[NSBundle bundleForClass:self.class] bundlePath] stringByAppendingPathComponent:filename];
return [self initFromHTMLData:[NSData dataWithContentsOfFile:fullPath]];
}

- (id)initFromHTMLFile:(NSString *)filename fileExtension:(NSString*)extension {
NSString *fullPath = [[NSBundle bundleForClass:[self class]] pathForResource:filename ofType:extension];
return [self initFromHTMLData:[NSData dataWithContentsOfFile:fullPath]];
}

- (id)initFromHTMLFilePath:(NSString *)fullPath {
return [self initFromHTMLData:[NSData dataWithContentsOfFile:fullPath]];

}

- (id)initFromHTMLData:(NSData *)data {
if ((self = [super init])) {
xmlDocPtr doc = htmlReadMemory([data bytes], (int)[data length], "", nil, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
self.xmlDoc = [[RXMLDocHolder alloc] initWithDocPtr:doc];

if ([self isValid]) {
node_ = xmlDocGetRootElement(doc);

if (!node_) {
self.xmlDoc = nil;
}
}
}
return self;
}


// Copy the RaptureXML element // Copy the RaptureXML element
// (calling copy will call this method automatically with the default zone) // (calling copy will call this method automatically with the default zone)
-(id)copyWithZone:(NSZone *)zone{ -(id)copyWithZone:(NSZone *)zone{
Expand Down Expand Up @@ -143,6 +179,26 @@ - (NSString *)description {
return [self text]; return [self text];
} }


+ (id)elementFromHTMLString:(NSString *)xmlString encoding:(NSStringEncoding)encoding {
return [[RXMLElement alloc] initFromHTMLString:xmlString encoding:encoding];
}

+ (id)elementFromHTMLFile:(NSString *)filename {
return [[RXMLElement alloc] initFromHTMLFile:filename];
}

+ (id)elementFromHTMLFile:(NSString *)filename fileExtension:(NSString*)extension {
return [[RXMLElement alloc] initFromHTMLFile:filename fileExtension:extension];
}

+ (id)elementFromHTMLFilePath:(NSString *)fullPath {
return [[RXMLElement alloc] initFromHTMLFilePath:fullPath];
}

+ (id)elementFromHTMLData:(NSData *)data {
return [[RXMLElement alloc] initFromHTMLData:data];
}

#pragma mark - #pragma mark -


- (NSString *)tag { - (NSString *)tag {
Expand Down
56 changes: 56 additions & 0 deletions Tests/HTMLTests.m
@@ -0,0 +1,56 @@
//
// HTMLTests.m
// RaptureXML
//
// Created by Francis Chong on 22/3/13.
// Copyright (c) 2013 Rapture In Venice. All rights reserved.
//

#import "RXMLElement.h"

@interface HTMLTests : SenTestCase {
NSString *simpleHTML_;
}
@end

@implementation HTMLTests

- (void)setUp {
simpleHTML_ = @"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\
\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\
<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" >\
<head>\
<title>Minimal XHTML 1.1 Document</title>\
</head>\
<body>\
<p>This is a minimal <a href=\"http://www.w3.org/TR/xhtml11\">XHTML 1.1</a> document.</p>\
</body>\
</html>";
}

- (void)testBasicXHTML {
RXMLElement *html = [RXMLElement elementFromHTMLString:simpleHTML_ encoding:NSUTF8StringEncoding];
NSArray *atts = [html attributeNames];
STAssertEquals(atts.count, 2U, nil);

NSArray* children = [html childrenWithRootXPath:@"//html/body/p"];
STAssertTrue([children count] > 0, nil);

RXMLElement* child = [children objectAtIndex:0];
NSLog(@"content: %@", [child text]);
STAssertEqualObjects([child text], @"This is a minimal XHTML 1.1 document.", nil);
}

-(void) testHtmlEntity {
RXMLElement* html = [RXMLElement elementFromHTMLString:@"<p>Don&apos;t say &quot;lazy&quot;</p>" encoding:NSUTF8StringEncoding];
STAssertEqualObjects([html text], @"Don't say \"lazy\"", nil);
}

-(void) testFixBrokenHtml {
RXMLElement* html = [RXMLElement elementFromHTMLString:@"<p><b>Test</p> Broken HTML</b>" encoding:NSUTF8StringEncoding];
STAssertEqualObjects([html text], @"Test Broken HTML", nil);
STAssertEqualObjects([html xml], @"<html><body><p><b>Test</b></p> Broken HTML</body></html>", nil);
}

@end

0 comments on commit 76b59ec

Please sign in to comment.