Skip to content

Commit

Permalink
ignore malformed html, handle funny encodings
Browse files Browse the repository at this point in the history
  • Loading branch information
Kyle Maxwell committed Jan 4, 2009
1 parent 08ca013 commit 87d5ae6
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
8 changes: 4 additions & 4 deletions dexter.c
Expand Up @@ -28,15 +28,15 @@ int yywrap(void){
xmlDocPtr dex_parse_file(dexPtr dex, char* file, boolean html) {
if(html) {
htmlParserCtxtPtr htmlCtxt = htmlNewParserCtxt();
htmlDocPtr html = htmlCtxtReadFile(htmlCtxt, file, "UTF-8", 3);
htmlDocPtr html = htmlCtxtReadFile(htmlCtxt, file, NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
if(html == NULL) {
asprintf(&dex->error, "Couldn't parse file: %s\n", file);
return NULL;
}
return dex_parse_doc(dex, html);
} else {
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
xmlDocPtr xml = xmlCtxtReadFile(ctxt, file, "UTF-8", 3);
xmlDocPtr xml = xmlCtxtReadFile(ctxt, file, NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
if(xml == NULL) {
asprintf(&dex->error, "Couldn't parse file: %s\n", file);
return NULL;
Expand All @@ -48,15 +48,15 @@ xmlDocPtr dex_parse_file(dexPtr dex, char* file, boolean html) {
xmlDocPtr dex_parse_string(dexPtr dex, char* string, size_t size, boolean html) {
if(html) {
htmlParserCtxtPtr htmlCtxt = htmlNewParserCtxt();
htmlDocPtr html = htmlCtxtReadMemory(htmlCtxt, string, size, "http://kylemaxwell.com/dexter/memory", "UTF-8", 3);
htmlDocPtr html = htmlCtxtReadMemory(htmlCtxt, string, size, "http://kylemaxwell.com/dexter/memory", NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
if(html == NULL) {
asprintf(&dex->error, "Couldn't parse string\n");
return NULL;
}
return dex_parse_doc(dex, html);
} else {
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
xmlDocPtr xml = xmlCtxtReadMemory(ctxt, string, size, "http://kylemaxwell.com/dexter/memory", "UTF-8", 3);
xmlDocPtr xml = xmlCtxtReadMemory(ctxt, string, size, "http://kylemaxwell.com/dexter/memory", NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
if(xml == NULL) {
asprintf(&dex->error, "Couldn't parse string\n");
return NULL;
Expand Down
2 changes: 1 addition & 1 deletion functions.c
Expand Up @@ -287,7 +287,7 @@ xsltLoadHtmlDocument(xsltTransformContextPtr ctxt, const xmlChar *URI) {
ret = ret->next;
}

doc = htmlReadFile(URI, NULL, ctxt->parserOptions | HTML_PARSE_RECOVER);
doc = htmlReadFile(URI, NULL, ctxt->parserOptions | HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);

if (doc == NULL)
return(NULL);
Expand Down

0 comments on commit 87d5ae6

Please sign in to comment.