Skip to content

Commit

Permalink
No description. (#32833)
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 359079534

Co-authored-by: Googler <noreply@google.com>
  • Loading branch information
banaag and Googler committed Feb 23, 2021
1 parent 073390f commit 93b5760
Show file tree
Hide file tree
Showing 9 changed files with 346 additions and 516 deletions.
Expand Up @@ -15,14 +15,22 @@
//

// To regenerate states.h file, run:
// blaze clean (necessary to take into account txt file changes)
// bazel build htmlparser/bin:jsongrammargen
// bazel-bin/htmlparser/bin/jsongrammargen
//
// blaze clean
// bazel build htmlparser/bin:statetablegen
// bazel-bin/htmlparser/bin/statetablegen --input_grammar=url
// or
// bazel-bin/htmlparser/bin/statetablegen --input_grammar=json

#include <iostream>

#include "glog/logging.h"
#include "absl/flags/flag.h"
#include "absl/flags/parse.h"
#include "grammar/tablebuilder.h"

ABSL_FLAG(std::string, input_grammar, "", "Options: url, json");

constexpr std::string_view kLicenseHeader = R"LICENSETXT(//
// Copyright 2020 The AMP HTML Authors. All Rights Reserved.
//
Expand All @@ -41,12 +49,29 @@ constexpr std::string_view kLicenseHeader = R"LICENSETXT(//


int main(int argc, char** argv) {
absl::ParseCommandLine(argc, argv);
std::string grammar = absl::GetFlag(FLAGS_input_grammar);

std::string input_grammar;
std::string cpp_namespace;
std::string ifdef_guard;
std::string output_file;
if (grammar == "json") {
input_grammar = "data/jsongrammar.txt";
cpp_namespace = "htmlparser::json";
ifdef_guard = "HTMLPARSER__JSON_STATES_H_";
output_file = "json/states.h";
} else {
std::cerr << "Invalid -input_grammar value: " << grammar;
return -1;
}

htmlparser::grammar::TableBuilder builder(
"data/jsongrammar.txt",
{.output_file_path = "json/states.h",
input_grammar,
{.output_file_path = output_file,
.license_header = kLicenseHeader.data(),
.ifdef_guard = "HTMLPARSER__JSON_STATES_H_",
.cpp_namespace = "htmlparser::json"});
.ifdef_guard = ifdef_guard,
.cpp_namespace = cpp_namespace});

if (!builder.ParseRulesAndGenerateTable()) {
std::cerr << "Table generation failed.\n";
Expand Down
4 changes: 2 additions & 2 deletions validator/cpp/htmlparser/css/amp4ads-parse-css.cc
Expand Up @@ -68,13 +68,13 @@ class Amp4AdsVisitor : public RuleVisitor {

void VisitQualifiedRule(const QualifiedRule& qualified_rule) override {
for (const unique_ptr<Declaration>& decl : qualified_rule.declarations()) {
string_view name = StripVendorPrefix(decl->name());
auto name = StripVendorPrefix(decl->name());

// The name of the property may identify a transition. The only
// properties that may be transitioned are opacity and transform.
if (name == "transition") {
std::string transitioned_property = FirstIdent(decl->value());
string_view transitioned_property_stripped =
auto transitioned_property_stripped =
StripVendorPrefix(transitioned_property);

if (transitioned_property_stripped != "opacity" &&
Expand Down
50 changes: 26 additions & 24 deletions validator/cpp/htmlparser/css/parse-css.cc
Expand Up @@ -25,7 +25,6 @@
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "absl/strings/strip.h"
#include "absl/types/variant.h"
#include "css/parse-css.pb.h"
#include "strings.h"
Expand All @@ -39,6 +38,32 @@ using std::unique_ptr;
using std::vector;

namespace htmlparser::css {

namespace internal {
std::string_view StripVendorPrefix(absl::string_view prefixed_string) {
// Checking for '-' is an optimization.
if (!prefixed_string.empty() && prefixed_string[0] == '-') {
// ConsumePrefix returns true if anything is consumed. This slightly
// strange syntax will cause us to exit early if we find a match.
if (absl::ConsumePrefix(&prefixed_string, "-o-")) {
} else if (absl::ConsumePrefix(&prefixed_string, "-moz-")) {
} else if (absl::ConsumePrefix(&prefixed_string, "-ms-")) {
} else if (absl::ConsumePrefix(&prefixed_string, "-webkit-")) {
}
}
return prefixed_string.data();
}

std::string_view StripMinMaxPrefix(absl::string_view prefixed_string) {
// We could just consume 'min-' and then 'max-, but then we'd allow 'min-max-'
// but not 'max-min-' which is both wrong and weird.
if (!absl::ConsumePrefix(&prefixed_string, "min-")) {
absl::ConsumePrefix(&prefixed_string, "max-");
}
return prefixed_string.data();
}
} // namespace internal

namespace {
// Sets |dest| to be a JSON array of the ->ToJson() results of the
// elements contained within |container|.
Expand Down Expand Up @@ -980,29 +1005,6 @@ unique_ptr<Token> TokenStream::ReleaseCurrentOrCreateEof() {
return CreateEOFTokenAt(*eof_);
}

absl::string_view StripVendorPrefix(absl::string_view prefixed_string) {
// Checking for '-' is an optimization.
if (!prefixed_string.empty() && prefixed_string[0] == '-') {
// ConsumePrefix returns true if anything is consumed. This slightly
// strange syntax will cause us to exit early if we find a match.
if (absl::ConsumePrefix(&prefixed_string, "-o-")) {
} else if (absl::ConsumePrefix(&prefixed_string, "-moz-")) {
} else if (absl::ConsumePrefix(&prefixed_string, "-ms-")) {
} else if (absl::ConsumePrefix(&prefixed_string, "-webkit-")) {
}
}
return prefixed_string;
}

absl::string_view StripMinMaxPrefix(absl::string_view prefixed_string) {
// We could just consume 'min-' and then 'max-, but then we'd allow 'min-max-'
// but not 'max-min-' which is both wrong and weird.
if (!absl::ConsumePrefix(&prefixed_string, "min-")) {
absl::ConsumePrefix(&prefixed_string, "max-");
}
return prefixed_string;
}

//
// Parsing
//
Expand Down
38 changes: 34 additions & 4 deletions validator/cpp/htmlparser/css/parse-css.h
Expand Up @@ -44,11 +44,13 @@
#include <vector>

#include "absl/memory/memory.h"
#include "absl/strings/strip.h"
#include "css/parse-css.pb.h"
#include "json/types.h"
#include "validator.pb.h"

namespace htmlparser::css {

// Implements 3.3. Preprocessing the input stream.
// http://www.w3.org/TR/css-syntax-3/#input-preprocessing
void Preprocess(std::vector<char32_t>* codepoints);
Expand Down Expand Up @@ -454,13 +456,41 @@ class ErrorToken : public Token {
const std::vector<std::string> params_;
};

// Strips the prefix 'min-' or 'max-' from the start of a media feature
// identifier, if present. E.g., "min-width" -> "width".
std::string_view StripMinMaxPrefix(std::string_view prefixed_string);
namespace internal {

std::string_view StripVendorPrefix(absl::string_view prefixed_string);
std::string_view StripMinMaxPrefix(absl::string_view prefixed_string);

// Macro to generate method that may accept any string parameter of type
// absl::string_view, std::string_view, std::string, char* and const char*
#define TEMPLATED_METHOD_FOR_STRING_TYPES(PUBLIC_METHOD, INTERNAL_IMPL)\
/* absl::string_view specialization */\
template<class T, \
typename std::enable_if<std::is_same<T, absl::string_view>::value, \
bool>::type = true>\
std::string_view PUBLIC_METHOD(T prefixed_string) {\
return INTERNAL_IMPL(prefixed_string);\
}\
\
/* std::string, char* specialization */\
template<class T, typename std::enable_if<\
std::is_same<T, std::string>::value || \
std::is_same<char const*, typename std::decay<T>::type>::value || \
std::is_same<char*, typename std::decay<T>::type>::value, \
bool>::type = true>\
std::string_view PUBLIC_METHOD(const T& str) {\
absl::string_view prefixed_string(str);\
return INTERNAL_IMPL(prefixed_string);\
}

} // namespace internal

// Strips vendor prefixes from identifiers, e.g. property names or names
// of at rules. E.g., "-moz-keyframes" -> "keyframes".
std::string_view StripVendorPrefix(std::string_view prefixed_string);
TEMPLATED_METHOD_FOR_STRING_TYPES(StripVendorPrefix,
internal::StripVendorPrefix);
TEMPLATED_METHOD_FOR_STRING_TYPES(StripMinMaxPrefix,
internal::StripMinMaxPrefix);

class RuleVisitor;

Expand Down
18 changes: 18 additions & 0 deletions validator/cpp/htmlparser/css/parse-css_test.cc
Expand Up @@ -46,6 +46,7 @@ namespace htmlparser::css {
namespace {

TEST(ParseCssTest, StripVendorPrefix) {
// char*
EXPECT_EQ("foo", StripVendorPrefix("-moz-foo"));
EXPECT_EQ("foo", StripVendorPrefix("-ms-foo"));
EXPECT_EQ("foo", StripVendorPrefix("-o-foo"));
Expand All @@ -56,6 +57,23 @@ TEST(ParseCssTest, StripVendorPrefix) {
EXPECT_EQ("foo-foo", StripVendorPrefix("foo-foo"));
EXPECT_EQ("-d-foo-foo", StripVendorPrefix("-d-foo-foo"));
EXPECT_EQ("-foo", StripVendorPrefix("-foo"));

// std::string.
std::string param("-moz-foo");
EXPECT_EQ("foo", StripVendorPrefix(param));

// std::string_view.
std::string_view param_std_view("-moz-foo");
EXPECT_EQ("foo", StripVendorPrefix(param_std_view));

// absl::string_view
absl::string_view param_absl_view("-moz-foo");
EXPECT_EQ("foo", StripVendorPrefix(param_absl_view));

// Any other type, won't compile.
// std::vector<std::string> vec;
// EXPECT_EQ("foo", StripVendorPrefix(vec));
// EXPECT_EQ("foo", StripVendorPrefix(100));
}

TEST(ParseCssTest, Tokenize_GeneratesTokensForSimpleExample) {
Expand Down
39 changes: 6 additions & 33 deletions validator/cpp/htmlparser/grammar/tablebuilder.cc
Expand Up @@ -28,7 +28,6 @@ namespace htmlparser::grammar {

constexpr std::string_view BOLD_RED_BEGIN = "\033[1;31m";
constexpr std::string_view BOLD_RED_END = "\033[0m";
std::string PrintChar(char c);

TableBuilder::TableBuilder(std::string_view grammar_file_path,
OutputFileOptions options)
Expand Down Expand Up @@ -63,6 +62,8 @@ bool TableBuilder::ParseRulesAndGenerateTable() {
transition_states.begin(), transition_states.end(),
std::inserter(unused_states, unused_states.begin()));

unused_states.erase("PUSH");
unused_states.erase("POP");
if (!unused_states.empty()) {
std::cerr << BOLD_RED_BEGIN << "Following states defined but not used: \n";
for (auto& us : unused_states) {
Expand All @@ -78,6 +79,8 @@ bool TableBuilder::ParseRulesAndGenerateTable() {
declared_states.begin(), declared_states.end(),
std::inserter(undefined_states, undefined_states.begin()));

undefined_states.erase("PUSH");
undefined_states.erase("POP");
if (!undefined_states.empty()) {
std::cerr << BOLD_RED_BEGIN << "Following states not defined: \n";
for (auto& us : undefined_states) {
Expand All @@ -96,7 +99,6 @@ bool TableBuilder::ParseRulesAndGenerateTable() {

declared_states.erase("PUSH");
declared_states.erase("POP");

if (declared_states.size() > 256) {
std::cerr << "Maximum 256 states supported. " << declared_states.size()
<< " declared." << std::endl;
Expand Down Expand Up @@ -229,11 +231,7 @@ inline static bool HasPopBit(uint32_t code);
fd << "constexpr std::array<int, 127> kTokenIndexes {\n ";
for (int i = 0; i < tokenindexes.size(); i++) {
fd << tokenindexes[i];
if (tokenindexes[i] < charset_.size()) {
fd << " /* " << PrintChar(*(std::next(charset_.begin(), tokenindexes[i])))
<< " */";
}
fd << (i > 0 && ((i + 1) % 6 == 0) ? ",\n " : ", ");
fd << (i > 0 && ((i + 1) % 8 == 0) ? ",\n " : ", ");
}
fd << "};\n\n";

Expand All @@ -248,9 +246,8 @@ inline static bool HasPopBit(uint32_t code);
fd << " {";
for (int i = 0; i < v.size(); i++) {
fd << "0x" << std::hex << v[i];
fd << " /* " << PrintChar(*(std::next(charset_.begin(), i))) << " */";
if (i < v.size() - 1) {
fd << (i > 0 && ((i + 1) % 4 == 0) ? ",\n " : ", ");
fd << (i > 0 && ((i + 1) % 8 == 0) ? ",\n " : ", ");
}
}

Expand Down Expand Up @@ -586,28 +583,4 @@ std::optional<uint32_t> TableBuilder::ComputeStateBits(
return result;
}

std::string PrintChar(char c) {
switch (c) {
case '\r':
return "CR";
case '\t':
return "TAB";
case '\f':
return "FF";
case '\b':
return "BKSPC";
case '\n':
return "LF";
case 0x80:
return "\\u";
default: {
if (static_cast<int>(c) > 126) {
return ".*";
} else {
return std::string({c});
}
}
}
}

} // namespace htmlparser::grammar
1 change: 1 addition & 0 deletions validator/cpp/htmlparser/htmldataset_test.cc
Expand Up @@ -25,6 +25,7 @@
#include <vector>

#include "gtest/gtest.h"
#include "absl/flags/flag.h"
#include "atomutil.h"
#include "defer.h"
#include "fileutil.h"
Expand Down

0 comments on commit 93b5760

Please sign in to comment.