Skip to content

Commit

Permalink
Sync for validator cpp engine and cpp htmlparser (#35403)
Browse files Browse the repository at this point in the history
  • Loading branch information
Greg Grothaus committed Jul 26, 2021
1 parent e7326ab commit 98bfa97
Show file tree
Hide file tree
Showing 15 changed files with 116 additions and 137 deletions.
38 changes: 20 additions & 18 deletions validator/cpp/htmlparser/BUILD
Expand Up @@ -51,11 +51,11 @@ cc_library(
hdrs = [
"atomutil.h",
],
copts = ["-std=c++17"],
deps = [
":atom",
":hash",
],
copts = ["-std=c++17"],
)

cc_test(
Expand Down Expand Up @@ -90,11 +90,11 @@ cc_library(
hdrs = [
"doctype.h",
],
copts = ["-std=c++17"],
deps = [
":node",
":strings",
],
copts = ["-std=c++17"],
)

cc_test(
Expand Down Expand Up @@ -138,11 +138,11 @@ cc_library(
hdrs = [
"fileutil.h",
],
copts = ["-std=c++17"],
deps = [
":defer",
":strings",
],
copts = ["-std=c++17"],
)

cc_test(
Expand All @@ -165,12 +165,12 @@ cc_library(
hdrs = [
"foreign.h",
],
copts = ["-std=c++17"],
deps = [
":comparators",
":node",
":strings",
],
copts = ["-std=c++17"],
)

# Various hashing utility functions.
Expand Down Expand Up @@ -199,11 +199,11 @@ cc_library(
hdrs = [
"token.h",
],
copts = ["-std=c++17"],
deps = [
":atom",
":strings",
],
copts = ["-std=c++17"],
)

# Defines node and node stack.
Expand All @@ -217,6 +217,7 @@ cc_library(
"elements.h",
"node.h",
],
copts = ["-std=c++17"],
deps = [
":atom",
":atomutil",
Expand All @@ -225,7 +226,6 @@ cc_library(
":token",
"@com_github_glog//:glog",
],
copts = ["-std=c++17"],
)

cc_test(
Expand All @@ -248,10 +248,10 @@ cc_library(
hdrs = [
"entity.h",
],
copts = ["-std=c++17"],
deps = [
":comparators",
],
copts = ["-std=c++17"],
)

cc_test(
Expand All @@ -273,24 +273,24 @@ cc_library(
hdrs = [
"document.h",
],
copts = ["-std=c++17"],
deps = [
":allocator",
":node",
":token",
"@com_google_absl//absl/flags:flag",
],
copts = ["-std=c++17"],
)

cc_library(
name = "casetable",
hdrs = [
"casetable.h",
],
copts = ["-std=c++17"],
deps = [
":comparators",
],
copts = ["-std=c++17"],
)

cc_test(
Expand All @@ -314,6 +314,7 @@ cc_library(
hdrs = [
"parser.h",
],
copts = ["-std=c++17"],
deps = [
":atom",
":atomutil",
Expand All @@ -326,11 +327,10 @@ cc_library(
":node",
":strings",
":tokenizer",
"@com_github_glog//:glog",
"@com_google_absl//absl/base",
"@com_google_absl//absl/flags:flag",
"@com_github_glog//:glog",
],
copts = ["-std=c++17"],
)

cc_test(
Expand Down Expand Up @@ -359,12 +359,12 @@ cc_library(
hdrs = [
"renderer.h",
],
copts = ["-std=c++17"],
deps = [
":atomutil",
":node",
":strings",
],
copts = ["-std=c++17"],
)

cc_test(
Expand Down Expand Up @@ -396,15 +396,15 @@ cc_library(
hdrs = [
"strings.h",
],
copts = ["-std=c++17"],
deps = [
":casetable",
":htmlentities",
":logging",
":whitespacetable",
"@com_google_absl//absl/base",
"@com_github_glog//:glog",
"@com_google_absl//absl/base",
],
copts = ["-std=c++17"],
)

cc_test(
Expand All @@ -427,6 +427,7 @@ cc_library(
hdrs = [
"tokenizer.h",
],
copts = ["-std=c++17"],
deps = [
":atom",
":atomutil",
Expand All @@ -435,7 +436,6 @@ cc_library(
":token",
"@com_google_absl//absl/flags:flag",
],
copts = ["-std=c++17"],
)

cc_test(
Expand All @@ -459,10 +459,10 @@ cc_library(
hdrs = [
"url.h",
],
copts = ["-std=c++17"],
deps = [
":strings",
],
copts = ["-std=c++17"],
)

cc_test(
Expand Down Expand Up @@ -491,8 +491,10 @@ filegroup(

filegroup(
name = "html5lib_test_files",
srcs = glob(["testdata/tree-construction/*.dat",
"testdata/tree-construction/scripted/*.dat"]),
srcs = glob([
"testdata/tree-construction/*.dat",
"testdata/tree-construction/scripted/*.dat",
]),
)

cc_test(
Expand Down
1 change: 0 additions & 1 deletion validator/cpp/htmlparser/bin/statetablegen.cc
Expand Up @@ -24,7 +24,6 @@

#include <iostream>

#include "glog/logging.h"
#include "absl/flags/flag.h"
#include "absl/flags/parse.h"
#include "grammar/tablebuilder.h"
Expand Down
4 changes: 2 additions & 2 deletions validator/cpp/htmlparser/css/BUILD
Expand Up @@ -6,7 +6,7 @@
#
# bazel build --repo_env=CC=clang --cxxopt='-std=c++17' <build_target>

load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test", "cc_proto_library")
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_proto_library", "cc_test")
load("@rules_proto//proto:defs.bzl", "proto_library")

package(default_visibility = ["//visibility:public"])
Expand Down Expand Up @@ -96,9 +96,9 @@ cc_library(
hdrs = ["amp4ads-parse-css.h"],
deps = [
":parse-css",
"@com_github_re2//:re2",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@com_github_re2//:re2",
],
)

Expand Down
3 changes: 1 addition & 2 deletions validator/cpp/htmlparser/css/amp4ads-parse-css_test.cc
Expand Up @@ -16,8 +16,7 @@

#include "css/amp4ads-parse-css.h"

#include "glog/logging.h"
#include "gmock/gmock.h"
#include <gmock/gmock.h>
#include "gtest/gtest.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
Expand Down
41 changes: 17 additions & 24 deletions validator/cpp/htmlparser/css/parse-css-urls.cc
Expand Up @@ -18,13 +18,13 @@

#include <memory>

#include "glog/logging.h"
#include "absl/algorithm/container.h"
#include "absl/memory/memory.h"
#include "absl/strings/ascii.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_join.h"
#include "absl/types/variant.h"
#include "logging.h"
#include "strings.h"

using absl::AsciiStrToLower;
Expand Down Expand Up @@ -157,9 +157,8 @@ class Tokenizer {
}

char32_t Next(int num = 1) {
CHECK_GE(num, 0);
CHECK_LE(num, 3)
<< "Spec Error; no more than three codepoints of lookahead.";
CHECK(num >= 0, "Spec Error; negative lookahead.");
CHECK(num <= 3, "Spec Error; no more than three codepoints of lookahead.");
return Codepoint(pos_ + num);
}

Expand All @@ -181,9 +180,7 @@ class Tokenizer {
return true;
}

bool EofNext(int num = 1) {
return pos_ + num >= str_.size();
}
bool EofNext(int num = 1) { return pos_ + num >= str_.size(); }

bool Eof() { return eof_; }

Expand Down Expand Up @@ -453,8 +450,8 @@ class Tokenizer {
}
if (Whitespace(Next())) Consume();
uint32_t value;
if (!absl::numbers_internal::safe_strtou32_base(
digits, &value, /*base=*/16)) {
if (!absl::numbers_internal::safe_strtou32_base(digits, &value,
/*base=*/16)) {
value = 0xfffd;
}
if (value > kMaximumallowedcodepoint) value = 0xfffd;
Expand Down Expand Up @@ -597,8 +594,7 @@ int ConsumeAComponentValue(const vector<unique_ptr<Token>>& tokens,

TokenType::Code TypeOrEof(const vector<unique_ptr<Token>>& tokens,
const int pos) {
if (pos < tokens.size())
return tokens[pos]->Type();
if (pos < tokens.size()) return tokens[pos]->Type();
return tokens[tokens.size() - 1]->Type();
}

Expand Down Expand Up @@ -634,7 +630,7 @@ int ConsumeABlock(const vector<unique_ptr<Token>>& tokens,
int ConsumeAComponentValue(const vector<unique_ptr<Token>>& tokens,
const int start_pos) {
TokenType::Code type = TypeOrEof(tokens, start_pos);
CHECK_NE(type, TokenType::EOF_TOKEN);
CHECK(type != TokenType::EOF_TOKEN, "");
if (type == TokenType::OPEN_CURLY || type == TokenType::OPEN_SQUARE ||
type == TokenType::OPEN_PAREN || type == TokenType::FUNCTION_TOKEN) {
return ConsumeABlock(tokens, start_pos);
Expand All @@ -647,11 +643,11 @@ int ConsumeAComponentValue(const vector<unique_ptr<Token>>& tokens,
// starts at position |start_pos|.
int ConsumeAFontFace(const vector<unique_ptr<Token>>& tokens,
const int start_pos) {
CHECK_EQ(TypeOrEof(tokens, start_pos), TokenType::AT_KEYWORD)
<< TokenType::Code_Name(TypeOrEof(tokens, start_pos));
CHECK_EQ(static_cast<const AtKeywordToken&>(*tokens[start_pos]).StringValue(),
"font-face")
<< static_cast<const AtKeywordToken&>(*tokens[start_pos]).StringValue();
CHECK(TypeOrEof(tokens, start_pos) == TokenType::AT_KEYWORD,
TokenType::Code_Name(TypeOrEof(tokens, start_pos)));
CHECK(static_cast<const AtKeywordToken&>(*tokens[start_pos]).StringValue() ==
"font-face",
static_cast<const AtKeywordToken&>(*tokens[start_pos]).StringValue());
int cur_pos = start_pos;
while (true) {
cur_pos++;
Expand All @@ -669,7 +665,7 @@ int ConsumeAFontFace(const vector<unique_ptr<Token>>& tokens,
int ConsumeAUrlFunction(const vector<unique_ptr<Token>>& tokens,
const int start_pos, std::string* url) {
TokenType::Code type = TypeOrEof(tokens, start_pos);
CHECK_EQ(type, TokenType::FUNCTION_TOKEN) << TokenType::Code_Name(type);
CHECK(type == TokenType::FUNCTION_TOKEN, TokenType::Code_Name(type));
int cur_pos = start_pos;
*url = "";
while (true) {
Expand All @@ -694,8 +690,7 @@ int ConsumeAUrlFunction(const vector<unique_ptr<Token>>& tokens,
return ConsumeABlock(tokens, start_pos);
}

bool SegmentCss(const std::string& utf8_css,
vector<CssSegment>* segments) {
bool SegmentCss(const std::string& utf8_css, vector<CssSegment>* segments) {
// This changes the input string into an array of UTF8 Codepoints. Each
// codepoint can match one or more bytes in the input string.
vector<char32_t> css = htmlparser::Strings::Utf8ToCodepoints(utf8_css);
Expand All @@ -706,8 +701,7 @@ bool SegmentCss(const std::string& utf8_css,
const vector<unique_ptr<Token>> tokens = url::Tokenize(css, &errors);

// Documents with CSS errors are invalid AMP, so we can just exit early.
if (!errors.empty())
return false;
if (!errors.empty()) return false;

// This code includes limited CSS parsing. The reason it works is that we
// can assume that the input is valid CSS. It might not be, but if it isn't
Expand Down Expand Up @@ -757,8 +751,7 @@ bool SegmentCss(const std::string& utf8_css,
segments->emplace_back(segment);
}

// Safe: |tokens| ends w/ EOF_TOKEN.
CHECK_LT(cur_pos + 1, tokens.size());
CHECK(cur_pos + 1 < tokens.size(), "tokens missing EOF_TOKEN");
// Set our next range start to the start of the next token.
css_chars_emitted_until = tokens[cur_pos + 1]->pos();
}
Expand Down
2 changes: 0 additions & 2 deletions validator/cpp/htmlparser/css/parse-css-urls.h
Expand Up @@ -34,8 +34,6 @@
#include "absl/memory/memory.h"
#include "absl/types/variant.h"
#include "css/parse-css-urls.pb.h"
#include "validator.pb.h"


namespace htmlparser::css::url {
// Implements 3.3. Preprocessing the input stream.
Expand Down
4 changes: 2 additions & 2 deletions validator/cpp/htmlparser/css/parse-css-urls_test.cc
Expand Up @@ -17,8 +17,8 @@
#include "css/parse-css-urls.h"

#include <ostream>
#include "glog/logging.h"
#include "gmock/gmock.h"

#include <gmock/gmock.h>
#include "gtest/gtest.h"
#include "absl/strings/str_cat.h"
#include "strings.h"
Expand Down

0 comments on commit 98bfa97

Please sign in to comment.