forked from commonmark/cmark
-
Notifications
You must be signed in to change notification settings - Fork 169
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
When we encounter a tag that causes an HTML 5 parser's content model flag [1] to be changed to RCDATA, CDATA or RAWTEXT [2] [3], we escape the tag by replacing its opening "<" with "<". This causes the tag to appear verbatim in the page it's placed on. We do this to prevent users breaking the page content, where the parser would not interpret further tags as inserted by cmark as HTML until a matching close tag was hit. (Such a closing tag could exist if a user entered it themselves, but it'd cause all cmark-generated markup in between to be rendered raw, and is unlikely to be desireable behaviour.) [1] https://www.w3.org/TR/2009/WD-html5-20090423/syntax.html#tokenization [2] https://www.w3.org/TR/2009/WD-html5-20090212/serializing-html-fragments.html#parsing-html-fragments [3] https://github.com/google/gumbo-parser/blob/aa91b27b02c0c80c482e24348a457ed7c3c088e0/src/parser.c#L4023-L4053
- Loading branch information
Yuki Izumi
authored and
Yuki Izumi
committed
Jun 27, 2017
1 parent
e001c1e
commit b5ccb88
Showing
8 changed files
with
125 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#include "tagfilter.h" | ||
#include <parser.h> | ||
|
||
static const char *blacklist[] = { | ||
"title", "textarea", "style", "xmp", "iframe", | ||
"noembed", "noframes", "script", "plaintext", NULL, | ||
}; | ||
|
||
static int is_tag(const unsigned char *tag_data, size_t tag_size, | ||
const char *tagname) { | ||
size_t i; | ||
|
||
if (tag_size < 3 || tag_data[0] != '<') | ||
return 0; | ||
|
||
i = 1; | ||
|
||
if (tag_data[i] == '/') { | ||
i++; | ||
} | ||
|
||
for (; i < tag_size; ++i, ++tagname) { | ||
if (*tagname == 0) | ||
break; | ||
|
||
if (tag_data[i] != *tagname) | ||
return 0; | ||
} | ||
|
||
if (i == tag_size) | ||
return 0; | ||
|
||
if (cmark_isspace(tag_data[i]) || tag_data[i] == '>') | ||
return 1; | ||
|
||
if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>') | ||
return 1; | ||
|
||
return 0; | ||
} | ||
|
||
static int filter(cmark_syntax_extension *ext, const unsigned char *tag, | ||
size_t tag_len) { | ||
const char **it; | ||
|
||
for (it = blacklist; *it; ++it) { | ||
if (is_tag(tag, tag_len, *it)) { | ||
return 0; | ||
} | ||
} | ||
|
||
return 1; | ||
} | ||
|
||
cmark_syntax_extension *create_tagfilter_extension(void) { | ||
cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter"); | ||
cmark_syntax_extension_set_html_filter_func(ext, filter); | ||
return ext; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#ifndef TAGFILTER_H | ||
#define TAGFILTER_H | ||
|
||
#include "core-extensions.h" | ||
|
||
cmark_syntax_extension *create_tagfilter_extension(void); | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters