Skip to content

Commit

Permalink
Add a test case to verify there is no wrong escaping/unescaping happi…
Browse files Browse the repository at this point in the history
…ng during (#37175)

tokenization.

Tests token.data contains unescaped data.

PiperOrigin-RevId: 414922207

Co-authored-by: Amaltas Bohra <amaltas@google.com>
  • Loading branch information
banaag and amaltas committed Dec 10, 2021
1 parent 80b9d6b commit 91f1a82
Showing 1 changed file with 18 additions and 11 deletions.
29 changes: 18 additions & 11 deletions validator/cpp/htmlparser/tokenizer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@

TEST(TokenizerTest, BasicTokenizationOfADocument) {
std::string html =
"<html><head><title>hello</title></head><body><div>Hello</div>"
"<textarea id=\"my-text\" class=\"my-style\"></textarea>"
"<html><head><title>hello</title></head><body><div>"
"Hello&amp;World Number &num; 123</div>"
"<textarea id=\"my-text\" class=\"my-style\">Foo &amp; Bar "
"Number &num; 123</textarea>"
"<img src=\"foo.png\" /></body></html>";

htmlparser::Tokenizer t(html);
Expand All @@ -22,7 +24,7 @@ TEST(TokenizerTest, BasicTokenizationOfADocument) {
tokens.push_back(token);
}

EXPECT_EQ(tokens.size(), 15) << "Total 15 tokens generated by tokenizer";
EXPECT_EQ(tokens.size(), 16) << "Total 15 tokens generated by tokenizer";

// First three start tags. <html><head><title>.
EXPECT_EQ(tokens[0].token_type, htmlparser::TokenType::START_TAG_TOKEN)
Expand Down Expand Up @@ -51,7 +53,8 @@ TEST(TokenizerTest, BasicTokenizationOfADocument) {
// Text "Hello"
EXPECT_EQ(tokens[8].token_type , htmlparser::TokenType::TEXT_TOKEN)
<< "Hello text token 8";
EXPECT_EQ(tokens[8].data , "Hello") << "Hello string inside <div> 8";
EXPECT_EQ(tokens[8].data , "Hello&World Number # 123")
<< "Hello string inside <div> 8";

// End div.
EXPECT_EQ(tokens[9].token_type , htmlparser::TokenType::END_TAG_TOKEN)
Expand All @@ -74,25 +77,29 @@ TEST(TokenizerTest, BasicTokenizationOfADocument) {
EXPECT_EQ(tokens[10].attributes[1].value , "my-style")
<< "textarea second attribute class value is my-style";

// Text content in text area.
EXPECT_EQ(tokens[11].token_type, htmlparser::TokenType::TEXT_TOKEN);
EXPECT_EQ(tokens[11].data, "Foo & Bar Number # 123");

// End textarea.
EXPECT_EQ(tokens[11].token_type , htmlparser::TokenType::END_TAG_TOKEN)
EXPECT_EQ(tokens[12].token_type , htmlparser::TokenType::END_TAG_TOKEN)
<< "<textarea> end tag 11";

// img tag. self closing.
EXPECT_EQ(tokens[12].token_type ,
EXPECT_EQ(tokens[13].token_type ,
htmlparser::TokenType::SELF_CLOSING_TAG_TOKEN)
<< "<img> self closing tag 12";
EXPECT_EQ(tokens[12].attributes.size() , 1)
EXPECT_EQ(tokens[13].attributes.size() , 1)
<< "img only one attribute";
EXPECT_EQ(tokens[12].attributes[0].key , "src")
EXPECT_EQ(tokens[13].attributes[0].key , "src")
<< "img first attribute is src";
EXPECT_EQ(tokens[12].attributes[0].value , "foo.png")
EXPECT_EQ(tokens[13].attributes[0].value , "foo.png")
<< "img first attribute src value is foo.png";

// Close body, html.
EXPECT_EQ(tokens[13].token_type , htmlparser::TokenType::END_TAG_TOKEN)
<< "body close tag 13";
EXPECT_EQ(tokens[14].token_type , htmlparser::TokenType::END_TAG_TOKEN)
<< "body close tag 13";
EXPECT_EQ(tokens[15].token_type , htmlparser::TokenType::END_TAG_TOKEN)
<< "html close tag 14";

EXPECT_EQ(t.LinesProcessed(), 1);
Expand Down

0 comments on commit 91f1a82

Please sign in to comment.