From f1f49d9c0ea464ee2656a7f08205b720d4601013 Mon Sep 17 00:00:00 2001 From: Yue Zhang Date: Wed, 23 Mar 2022 00:01:45 +0000 Subject: [PATCH] [CHU] Feed renderer heuristics from component to ChromeCart [2] Bug: 1300332 Change-Id: I7eee52d99146a7c0ea275c5852c9a9fa65ec1a25 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3533547 Reviewed-by: Wei-Yin Chen Commit-Queue: Yue Zhang Cr-Commit-Position: refs/heads/main@{#984103} --- chrome/renderer/cart/commerce_hint_agent.cc | 278 ++++++++++-------- .../cart/commerce_hint_agent_unittest.cc | 111 +++++-- .../commerce/core/commerce_heuristics_data.cc | 39 +++ .../commerce/core/commerce_heuristics_data.h | 23 ++ .../core/commerce_heuristics_data_unittest.cc | 78 ++++- 5 files changed, 384 insertions(+), 145 deletions(-) diff --git a/chrome/renderer/cart/commerce_hint_agent.cc b/chrome/renderer/cart/commerce_hint_agent.cc index 0569c7ed7dae1a..ddb1b92c3e8af1 100644 --- a/chrome/renderer/cart/commerce_hint_agent.cc +++ b/chrome/renderer/cart/commerce_hint_agent.cc @@ -332,65 +332,173 @@ const re2::RE2& GetAddToCartPattern() { return *instance; } +const std::map& GetCartPatternMapping() { + static base::NoDestructor> pattern_map([] { + const base::Value json( + base::JSONReader::Read( + kCartPatternMapping.Get().empty() + ? ui::ResourceBundle::GetSharedInstance().GetRawDataResource( + IDR_CART_DOMAIN_CART_URL_REGEX_JSON) + : kCartPatternMapping.Get()) + .value()); + DCHECK(json.is_dict()); + std::map map; + for (auto item : json.DictItems()) { + map.insert({std::move(item.first), std::move(item.second.GetString())}); + } + return map; + }()); + return *pattern_map; +} + +const std::map& GetCheckoutPatternMapping() { + static base::NoDestructor> pattern_map([] { + const base::Value json( + base::JSONReader::Read( + kCheckoutPatternMapping.Get().empty() + ? ui::ResourceBundle::GetSharedInstance().GetRawDataResource( + IDR_CHECKOUT_URL_REGEX_DOMAIN_MAPPING_JSON) + : kCheckoutPatternMapping.Get()) + .value()); + DCHECK(json.is_dict()); + std::map map; + for (const auto item : json.DictItems()) { + map.insert({std::move(item.first), std::move(item.second.GetString())}); + } + return map; + }()); + return *pattern_map; +} + +const std::map& GetPurchaseURLPatternMapping() { + static base::NoDestructor> pattern_map([] { + const base::Value json( + base::JSONReader::Read( + kPurchaseURLPatternMapping.Get().empty() + ? ui::ResourceBundle::GetSharedInstance().GetRawDataResource( + IDR_PURCHASE_URL_REGEX_DOMAIN_MAPPING_JSON) + : kPurchaseURLPatternMapping.Get()) + .value()); + DCHECK(json.is_dict()); + std::map map; + for (const auto item : json.DictItems()) { + map.insert({std::move(item.first), std::move(item.second.GetString())}); + } + return map; + }()); + return *pattern_map; +} + +const std::map& GetPurchaseButtonPatternMapping() { + static base::NoDestructor> pattern_map([] { + const base::Value json( + base::JSONReader::Read(kPurchaseButtonPatternMapping.Get()).value()); + DCHECK(json.is_dict()); + std::map map; + for (const auto item : json.DictItems()) { + map.insert({std::move(item.first), std::move(item.second.GetString())}); + } + return map; + }()); + return *pattern_map; +} + // TODO(crbug.com/1189786): Using per-site pattern and full URL matching could // be unnecessary. Improve this later by using general pattern if possible and // more flexible matching. -const re2::RE2& GetVisitCartPattern(const GURL& url) { - static base::NoDestructor> - heuristic_string_map([] { - const base::StringPiece json_resource( - ui::ResourceBundle::GetSharedInstance().GetRawDataResource( - IDR_CART_DOMAIN_CART_URL_REGEX_JSON)); - const std::string& finch_param = kCartPatternMapping.Get(); - const base::Value json(base::JSONReader::Read(finch_param.empty() - ? json_resource - : finch_param) - .value()); - DCHECK(json.is_dict()); - std::map map; - for (auto item : json.DictItems()) { - map.insert( - {std::move(item.first), std::move(item.second.GetString())}); - } - return map; - }()); +const re2::RE2* GetVisitCartPattern(const GURL& url) { + std::string domain = eTLDPlusOne(url); + auto* pattern_from_component = + commerce_heuristics::CommerceHeuristicsData::GetInstance() + .GetCartPageURLPatternForDomain(domain); + if (pattern_from_component && + kCartPatternMapping.Get() == kCartPatternMapping.default_value) { + return pattern_from_component; + } + const std::map& cart_string_map = + GetCartPatternMapping(); static base::NoDestructor>> - heuristic_regex_map; + cart_regex_map; static re2::RE2::Options options; options.set_case_sensitive(false); - const std::string& domain = eTLDPlusOne(url); - if (heuristic_string_map->find(domain) == heuristic_string_map->end()) { + if (cart_string_map.find(domain) == cart_string_map.end()) { auto* pattern_from_component = commerce_heuristics::CommerceHeuristicsData::GetInstance() .GetCartPageURLPattern(); if (pattern_from_component && kCartPattern.Get() == kCartPattern.default_value) { - return *pattern_from_component; + return pattern_from_component; } static base::NoDestructor instance(kCartPattern.Get(), options); - return *instance; + return instance.get(); } - if (heuristic_regex_map->find(domain) == heuristic_regex_map->end()) { - heuristic_regex_map->insert( - {domain, std::make_unique(heuristic_string_map->at(domain), - options)}); + if (cart_regex_map->find(domain) == cart_regex_map->end()) { + cart_regex_map->insert({domain, std::make_unique( + cart_string_map.at(domain), options)}); } - return *heuristic_regex_map->at(domain); + return cart_regex_map->at(domain).get(); } // TODO(crbug/1164236): cover more shopping sites. -const re2::RE2& GetVisitCheckoutPattern() { +const re2::RE2* GetVisitCheckoutPattern(const GURL& url) { + std::string domain = eTLDPlusOne(url); auto* pattern_from_component = commerce_heuristics::CommerceHeuristicsData::GetInstance() - .GetCheckoutPageURLPattern(); + .GetCheckoutPageURLPatternForDomain(domain); if (pattern_from_component && - kCheckoutPattern.Get() == kCheckoutPattern.default_value) { - return *pattern_from_component; + kCheckoutPatternMapping.Get() == kCheckoutPatternMapping.default_value) { + return pattern_from_component; } - re2::RE2::Options options; + const std::map& checkout_string_map = + GetCheckoutPatternMapping(); + static base::NoDestructor>> + checkout_regex_map; + static re2::RE2::Options options; options.set_case_sensitive(false); - static base::NoDestructor instance(kCheckoutPattern.Get(), options); - return *instance; + if (checkout_string_map.find(domain) == checkout_string_map.end()) { + auto* pattern_from_component = + commerce_heuristics::CommerceHeuristicsData::GetInstance() + .GetCheckoutPageURLPattern(); + if (pattern_from_component && + kCheckoutPattern.Get() == kCheckoutPattern.default_value) { + return pattern_from_component; + } + static base::NoDestructor instance(kCheckoutPattern.Get(), + options); + return instance.get(); + } + if (checkout_regex_map->find(domain) == checkout_regex_map->end()) { + checkout_regex_map->insert( + {domain, + std::make_unique(checkout_string_map.at(domain), options)}); + } + return checkout_regex_map->at(domain).get(); +} + +const re2::RE2* GetVisitPurchasePattern(const GURL& url) { + std::string domain = eTLDPlusOne(url); + auto* pattern_from_component = + commerce_heuristics::CommerceHeuristicsData::GetInstance() + .GetPurchasePageURLPatternForDomain(domain); + if (pattern_from_component && kPurchaseURLPatternMapping.Get() == + kPurchaseURLPatternMapping.default_value) { + return pattern_from_component; + } + const std::map& purchase_string_map = + GetPurchaseURLPatternMapping(); + if (purchase_string_map.find(domain) == purchase_string_map.end()) { + return nullptr; + } + static base::NoDestructor>> + purchase_regex_map; + static re2::RE2::Options options; + options.set_case_sensitive(false); + if (purchase_regex_map->find(domain) == purchase_regex_map->end()) { + purchase_regex_map->insert( + {domain, + std::make_unique(purchase_string_map.at(domain), options)}); + } + return purchase_regex_map->at(domain).get(); } const re2::RE2& GetSkipPattern() { @@ -465,58 +573,6 @@ const std::map& GetSkipAddToCartMapping() { return *skip_map; } -const std::map& GetCheckoutPatternMapping() { - static base::NoDestructor> pattern_map([] { - const base::Value json( - base::JSONReader::Read( - kCheckoutPatternMapping.Get().empty() - ? ui::ResourceBundle::GetSharedInstance().GetRawDataResource( - IDR_CHECKOUT_URL_REGEX_DOMAIN_MAPPING_JSON) - : kCheckoutPatternMapping.Get()) - .value()); - DCHECK(json.is_dict()); - std::map map; - for (const auto item : json.DictItems()) { - map.insert({std::move(item.first), std::move(item.second.GetString())}); - } - return map; - }()); - return *pattern_map; -} - -const std::map& GetPurchaseURLPatternMapping() { - static base::NoDestructor> pattern_map([] { - const base::Value json( - base::JSONReader::Read( - kPurchaseURLPatternMapping.Get().empty() - ? ui::ResourceBundle::GetSharedInstance().GetRawDataResource( - IDR_PURCHASE_URL_REGEX_DOMAIN_MAPPING_JSON) - : kPurchaseURLPatternMapping.Get()) - .value()); - DCHECK(json.is_dict()); - std::map map; - for (const auto item : json.DictItems()) { - map.insert({std::move(item.first), std::move(item.second.GetString())}); - } - return map; - }()); - return *pattern_map; -} - -const std::map& GetPurchaseButtonPatternMapping() { - static base::NoDestructor> pattern_map([] { - const base::Value json( - base::JSONReader::Read(kPurchaseButtonPatternMapping.Get()).value()); - DCHECK(json.is_dict()); - std::map map; - for (const auto item : json.DictItems()) { - map.insert({std::move(item.first), std::move(item.second.GetString())}); - } - return map; - }()); - return *pattern_map; -} - bool DetectAddToCart(content::RenderFrame* render_frame, const blink::WebURLRequest& request) { blink::WebLocalFrame* frame = render_frame->GetWebFrame(); @@ -686,48 +742,24 @@ bool CommerceHintAgent::IsAddToCart(base::StringPiece str, } bool CommerceHintAgent::IsVisitCart(const GURL& url) { - return PartialMatch(CanonicalURL(url).substr(0, kLengthLimit), - GetVisitCartPattern(url)); + auto* pattern = GetVisitCartPattern(url); + if (!pattern) + return false; + return PartialMatch(CanonicalURL(url).substr(0, kLengthLimit), *pattern); } bool CommerceHintAgent::IsVisitCheckout(const GURL& url) { - const std::map& checkout_string_map = - GetCheckoutPatternMapping(); - static base::NoDestructor>> - checkout_regex_map; - std::string domain = eTLDPlusOne(url); - std::string url_string = CanonicalURL(url).substr(0, kLengthLimit); - if (checkout_string_map.find(domain) == checkout_string_map.end()) { - return PartialMatch(url_string, GetVisitCheckoutPattern()); - } - static re2::RE2::Options options; - options.set_case_sensitive(false); - if (checkout_regex_map->find(domain) == checkout_regex_map->end()) { - checkout_regex_map->insert( - {domain, - std::make_unique(checkout_string_map.at(domain), options)}); - } - return PartialMatch(url_string, *checkout_regex_map->at(domain)); + auto* pattern = GetVisitCheckoutPattern(url); + if (!pattern) + return false; + return PartialMatch(CanonicalURL(url).substr(0, kLengthLimit), *pattern); } bool CommerceHintAgent::IsPurchase(const GURL& url) { - const std::map& purchase_string_map = - GetPurchaseURLPatternMapping(); - static base::NoDestructor>> - purchase_regex_map; - std::string domain = eTLDPlusOne(url); - std::string url_string = CanonicalURL(url).substr(0, kLengthLimit); - if (purchase_string_map.find(domain) == purchase_string_map.end()) { + auto* pattern = GetVisitPurchasePattern(url); + if (!pattern) return false; - } - static re2::RE2::Options options; - options.set_case_sensitive(false); - if (purchase_regex_map->find(domain) == purchase_regex_map->end()) { - purchase_regex_map->insert( - {domain, - std::make_unique(purchase_string_map.at(domain), options)}); - } - return PartialMatch(url_string, *purchase_regex_map->at(domain)); + return PartialMatch(CanonicalURL(url).substr(0, kLengthLimit), *pattern); } bool CommerceHintAgent::IsPurchase(const GURL& url, diff --git a/chrome/renderer/cart/commerce_hint_agent_unittest.cc b/chrome/renderer/cart/commerce_hint_agent_unittest.cc index 3ed0b984c913c8..a15a64b2239d90 100644 --- a/chrome/renderer/cart/commerce_hint_agent_unittest.cc +++ b/chrome/renderer/cart/commerce_hint_agent_unittest.cc @@ -751,12 +751,6 @@ const char kSkipPattern[] = "(^|\\W)(?i)(skipped|萬國碼)(\\W|$)"; std::map kSkipParams = { {"product-skip-pattern", kSkipPattern}}; -const char kGlobalHeuristicsJSONData[] = R"###( - { - "sensitive_product_regex": "(^|\\W)(?i)(skipped|萬國碼)(\\W|$)" - } - )###"; - std::map kSkipAddToCartRequests = { {"https://www.electronicexpress.com", "https://www.electronicexpress.com"}, {"https://www.electronicexpress.com", "https://www.google.com"}, @@ -838,7 +832,7 @@ TEST_F(CommerceHintAgentUnitTest, IsVisitCart) { EXPECT_FALSE(CommerceHintAgent::IsVisitCart(GURL(str))) << str; } - // Heuristics from component. + // General heuristics from component. const std::string& component_pattern = R"###( { "cart_page_url_regex": "bar" @@ -846,13 +840,37 @@ TEST_F(CommerceHintAgentUnitTest, IsVisitCart) { )###"; EXPECT_TRUE(commerce_heuristics::CommerceHeuristicsData::GetInstance() .PopulateDataFromComponent("{}", component_pattern, "", "")); - EXPECT_TRUE(CommerceHintAgent::IsVisitCart(GURL("https://wwww.bar.com"))); + EXPECT_TRUE(CommerceHintAgent::IsVisitCart(GURL("https://wwww.foo.com/bar"))); - // Feature param has a higher priority. + // Per-domain heuristics from component which has a higher priority than + // general heuristics. + EXPECT_TRUE(commerce_heuristics::CommerceHeuristicsData::GetInstance() + .PopulateDataFromComponent(R"###( + { + "foo.com": { + "cart_url_regex" : "foo.com/([^/]+/)?trac" + } + } + )###", + "{}", "", "")); + EXPECT_TRUE( + CommerceHintAgent::IsVisitCart(GURL("https://wwww.foo.com/test/trac"))); + EXPECT_FALSE( + CommerceHintAgent::IsVisitCart(GURL("https://wwww.foo.com/bar"))); + + // Feature param has a higher priority than component. base::test::ScopedFeatureList feature_list; feature_list.InitAndEnableFeatureWithParameters( - ntp_features::kNtpChromeCartModule, {{"cart-pattern", "foo"}}); - EXPECT_FALSE(CommerceHintAgent::IsVisitCart(GURL("https://wwww.bar.com"))); + ntp_features::kNtpChromeCartModule, + {{"cart-pattern", "baz"}, {"cart-pattern-mapping", R"###( + { + "foo.com": "foo.com/cart" + } + )###"}}); + EXPECT_FALSE( + CommerceHintAgent::IsVisitCart(GURL("https://wwww.foo.com/bar"))); + EXPECT_FALSE( + CommerceHintAgent::IsVisitCart(GURL("https://wwww.foo.com/test/trac"))); } TEST_F(CommerceHintAgentUnitTest, IsVisitCheckout) { @@ -864,7 +882,7 @@ TEST_F(CommerceHintAgentUnitTest, IsVisitCheckout) { EXPECT_FALSE(CommerceHintAgent::IsVisitCheckout(GURL(str))) << str; } - // Heuristics from component. + // General heuristics from component. const std::string& component_pattern = R"###( { "checkout_page_url_regex": "bar" @@ -872,23 +890,74 @@ TEST_F(CommerceHintAgentUnitTest, IsVisitCheckout) { )###"; EXPECT_TRUE(commerce_heuristics::CommerceHeuristicsData::GetInstance() .PopulateDataFromComponent("{}", component_pattern, "", "")); - EXPECT_TRUE(CommerceHintAgent::IsVisitCheckout(GURL("https://wwww.bar.com"))); + EXPECT_TRUE( + CommerceHintAgent::IsVisitCheckout(GURL("https://wwww.foo.com/bar"))); - // Feature param has a higher priority. + // Per-domain heuristics from component which has a higher priority than + // general heuristics. + EXPECT_TRUE(commerce_heuristics::CommerceHeuristicsData::GetInstance() + .PopulateDataFromComponent(R"###( + { + "foo.com": { + "checkout_url_regex" : "foo.com/([^/]+/)?tuokcehc" + } + } + )###", + "{}", "", "")); + EXPECT_TRUE(CommerceHintAgent::IsVisitCheckout( + GURL("https://wwww.foo.com/test/tuokcehc"))); + EXPECT_FALSE( + CommerceHintAgent::IsVisitCheckout(GURL("https://wwww.foo.com/bar"))); + + // Feature param has a higher priority than component. base::test::ScopedFeatureList feature_list; feature_list.InitAndEnableFeatureWithParameters( - ntp_features::kNtpChromeCartModule, {{"checkout-pattern", "foo"}}); + ntp_features::kNtpChromeCartModule, + {{"checkout-pattern", "foo"}, {"checkout-pattern-mapping", R"###( + { + "foo.com": "foo.com/checkout" + } + )###"}}); EXPECT_FALSE( - CommerceHintAgent::IsVisitCheckout(GURL("https://wwww.bar.com"))); + CommerceHintAgent::IsVisitCheckout(GURL("https://wwww.foo.com/bar"))); + EXPECT_FALSE(CommerceHintAgent::IsVisitCheckout( + GURL("https://wwww.foo.com/test/tuokcehc"))); } TEST_F(CommerceHintAgentUnitTest, IsPurchaseByURL) { + // Heuristics from feature param default value. for (auto* str : kPurchaseURL) { EXPECT_TRUE(CommerceHintAgent::IsPurchase(GURL(str))) << str; } for (auto* str : kNotPurchaseURL) { EXPECT_FALSE(CommerceHintAgent::IsPurchase(GURL(str))) << str; } + + // Per-domain heuristics from component has a higher priority than default + // value. + EXPECT_TRUE(commerce_heuristics::CommerceHeuristicsData::GetInstance() + .PopulateDataFromComponent(R"###( + { + "foo.com": { + "purchase_url_regex" : "foo.com/([^/]+/)?esahcrup" + } + } + )###", + "{}", "", "")); + EXPECT_TRUE(CommerceHintAgent::IsPurchase( + GURL("https://wwww.foo.com/test/esahcrup"))); + + // Feature param has a higher priority than component. + base::test::ScopedFeatureList feature_list; + feature_list.InitAndEnableFeatureWithParameters( + ntp_features::kNtpChromeCartModule, + {{"purchase-url-pattern-mapping", R"###( + { + "foo.com": "foo.com/purchase" + } + )###"}}); + EXPECT_FALSE(CommerceHintAgent::IsPurchase( + GURL("https://wwww.foo.com/test/esahcrup"))); } TEST_F(CommerceHintAgentUnitTest, IsPurchaseByForm) { @@ -934,9 +1003,13 @@ TEST_F(CommerceHintAgentUnitTest, ShouldSkipFromFeatureParam) { } TEST_F(CommerceHintAgentUnitTest, ShouldSkipFromComponent) { - EXPECT_TRUE( - commerce_heuristics::CommerceHeuristicsData::GetInstance() - .PopulateDataFromComponent("{}", kGlobalHeuristicsJSONData, "", "")); + const std::string& component_pattern = R"###( + { + "sensitive_product_regex": "(^|\\W)(?i)(skipped|萬國碼)(\\W|$)" + } + )###"; + EXPECT_TRUE(commerce_heuristics::CommerceHeuristicsData::GetInstance() + .PopulateDataFromComponent("{}", component_pattern, "", "")); for (auto* str : kSkipText) { EXPECT_TRUE(CommerceHintAgent::ShouldSkip(str)) << str; diff --git a/components/commerce/core/commerce_heuristics_data.cc b/components/commerce/core/commerce_heuristics_data.cc index 15aba3dd6c6d6e..9b1f11b6987f94 100644 --- a/components/commerce/core/commerce_heuristics_data.cc +++ b/components/commerce/core/commerce_heuristics_data.cc @@ -13,6 +13,9 @@ namespace { // CommerceHintHeuristics types. constexpr char kMerchantNameType[] = "merchant_name"; constexpr char kMerchantCartURLType[] = "cart_url"; +constexpr char kMerchantCartURLRegexType[] = "cart_url_regex"; +constexpr char kMerchantCheckoutURLRegexType[] = "checkout_url_regex"; +constexpr char kMerchantPurchaseURLRegexType[] = "purchase_url_regex"; // CommerceGlobalHeuristics types. constexpr char kSkipProductPatternType[] = "sensitive_product_regex"; @@ -65,6 +68,9 @@ bool CommerceHeuristicsData::PopulateDataFromComponent( ConstructGlobalRegex(kPurchaseButtonTextPatternType); add_to_cart_request_pattern_ = ConstructGlobalRegex(kAddToCartRequestPatternType); + domain_cart_url_pattern_mapping_.clear(); + domain_checkout_url_pattern_mapping_.clear(); + domain_purchase_url_pattern_mapping_.clear(); return true; } @@ -108,6 +114,24 @@ const re2::RE2* CommerceHeuristicsData::GetAddToCartRequestPattern() { return add_to_cart_request_pattern_.get(); } +const re2::RE2* CommerceHeuristicsData::GetCartPageURLPatternForDomain( + const std::string& domain) { + return GetCommerceHintHeuristicsRegex(domain_cart_url_pattern_mapping_, + kMerchantCartURLRegexType, domain); +} + +const re2::RE2* CommerceHeuristicsData::GetCheckoutPageURLPatternForDomain( + const std::string& domain) { + return GetCommerceHintHeuristicsRegex(domain_checkout_url_pattern_mapping_, + kMerchantCheckoutURLRegexType, domain); +} + +const re2::RE2* CommerceHeuristicsData::GetPurchasePageURLPatternForDomain( + const std::string& domain) { + return GetCommerceHintHeuristicsRegex(domain_purchase_url_pattern_mapping_, + kMerchantPurchaseURLRegexType, domain); +} + absl::optional CommerceHeuristicsData::GetCommerceHintHeuristics( const std::string& type, const std::string& domain) { @@ -131,6 +155,21 @@ absl::optional CommerceHeuristicsData::GetCommerceGlobalHeuristics( return absl::optional(*global_heuristics_.FindString(type)); } +const re2::RE2* CommerceHeuristicsData::GetCommerceHintHeuristicsRegex( + std::map>& map, + const std::string type, + const std::string domain) { + if (map.find(domain) != map.end()) + return map.at(domain).get(); + absl::optional pattern = GetCommerceHintHeuristics(type, domain); + if (!pattern.has_value()) + return nullptr; + re2::RE2::Options options; + options.set_case_sensitive(false); + map.emplace(domain, std::make_unique(*pattern, options)); + return map.at(domain).get(); +} + std::unique_ptr CommerceHeuristicsData::ConstructGlobalRegex( const std::string& type) { if (!GetCommerceGlobalHeuristics(type).has_value()) { diff --git a/components/commerce/core/commerce_heuristics_data.h b/components/commerce/core/commerce_heuristics_data.h index 1c3f75196df65b..b3e0dd8b5d6bc9 100644 --- a/components/commerce/core/commerce_heuristics_data.h +++ b/components/commerce/core/commerce_heuristics_data.h @@ -55,6 +55,18 @@ class CommerceHeuristicsData { // request. const re2::RE2* GetAddToCartRequestPattern(); + // Try to get the pattern regex to decide if a URL is cart page URL in + // `domain`. + const re2::RE2* GetCartPageURLPatternForDomain(const std::string& domain); + + // Try to get the pattern regex to decide if a URL is checkout page URL in + // `domain`. + const re2::RE2* GetCheckoutPageURLPatternForDomain(const std::string& domain); + + // Try to get the pattern regex to decide if a URL is purchase page URL in + // `domain`. + const re2::RE2* GetPurchasePageURLPatternForDomain(const std::string& domain); + private: friend class CommerceHeuristicsDataTest; @@ -65,6 +77,11 @@ class CommerceHeuristicsData { absl::optional GetCommerceGlobalHeuristics( const std::string& type); + const re2::RE2* GetCommerceHintHeuristicsRegex( + std::map>& map, + const std::string type, + const std::string domain); + std::unique_ptr ConstructGlobalRegex(const std::string& type); base::Value::Dict hint_heuristics_; @@ -76,6 +93,12 @@ class CommerceHeuristicsData { std::unique_ptr checkout_url_pattern_; std::unique_ptr purchase_button_pattern_; std::unique_ptr add_to_cart_request_pattern_; + std::map> + domain_cart_url_pattern_mapping_; + std::map> + domain_checkout_url_pattern_mapping_; + std::map> + domain_purchase_url_pattern_mapping_; }; } // namespace commerce_heuristics diff --git a/components/commerce/core/commerce_heuristics_data_unittest.cc b/components/commerce/core/commerce_heuristics_data_unittest.cc index 7d90958abf35f6..52a4335325d24c 100644 --- a/components/commerce/core/commerce_heuristics_data_unittest.cc +++ b/components/commerce/core/commerce_heuristics_data_unittest.cc @@ -12,12 +12,16 @@ const char kHintHeuristicsJSONData[] = R"###( { "foo.com": { "merchant_name": "Foo", - "cart_url": "foo.com/cart" + "cart_url": "foo.com/cart", + "cart_url_regex" : "foo.com/([^/]+/)?cart" }, "bar.com": { - "merchant_name": "Bar" + "merchant_name": "Bar", + "checkout_url_regex" : "bar.com/([^/]+/)?checkout" }, - "baz.com": {} + "baz.com": { + "purchase_url_regex" : "baz.com/([^/]+/)?purchase" + } } )###"; const char kGlobalHeuristicsJSONData[] = R"###( @@ -62,8 +66,16 @@ TEST_F(CommerceHeuristicsDataTest, TestPopulateHintHeuristics_Success) { "Foo"); ASSERT_EQ(*hint_heuristics->FindDict("foo.com")->FindString("cart_url"), "foo.com/cart"); + ASSERT_EQ(*hint_heuristics->FindDict("foo.com")->FindString("cart_url_regex"), + "foo.com/([^/]+/)?cart"); ASSERT_EQ(*hint_heuristics->FindDict("bar.com")->FindString("merchant_name"), "Bar"); + ASSERT_EQ( + *hint_heuristics->FindDict("bar.com")->FindString("checkout_url_regex"), + "bar.com/([^/]+/)?checkout"); + ASSERT_EQ( + *hint_heuristics->FindDict("baz.com")->FindString("purchase_url_regex"), + "baz.com/([^/]+/)?purchase"); auto* global_heuristics = GetGlobalHeuristics(); ASSERT_EQ(global_heuristics->size(), 7u); ASSERT_TRUE(global_heuristics->contains("sensitive_product_regex")); @@ -196,4 +208,64 @@ TEST_F(CommerceHeuristicsDataTest, TestGetAddToCartRequestPattern) { ASSERT_EQ(data.GetAddToCartRequestPattern()->pattern(), "add_to_cart"); } + +TEST_F(CommerceHeuristicsDataTest, TestGetCartPageURLPatternForDomain) { + auto& data = commerce_heuristics::CommerceHeuristicsData::GetInstance(); + + ASSERT_TRUE(data.PopulateDataFromComponent( + kHintHeuristicsJSONData, kGlobalHeuristicsJSONData, "", "")); + + ASSERT_EQ(data.GetCartPageURLPatternForDomain("foo.com")->pattern(), + "foo.com/([^/]+/)?cart"); +} + +TEST_F(CommerceHeuristicsDataTest, TestGetCheckoutPageURLPatternForDomain) { + auto& data = commerce_heuristics::CommerceHeuristicsData::GetInstance(); + + ASSERT_TRUE(data.PopulateDataFromComponent( + kHintHeuristicsJSONData, kGlobalHeuristicsJSONData, "", "")); + + ASSERT_EQ(data.GetCheckoutPageURLPatternForDomain("bar.com")->pattern(), + "bar.com/([^/]+/)?checkout"); +} + +TEST_F(CommerceHeuristicsDataTest, TestGetPurchasePageURLPatternForDomain) { + auto& data = commerce_heuristics::CommerceHeuristicsData::GetInstance(); + + ASSERT_TRUE(data.PopulateDataFromComponent( + kHintHeuristicsJSONData, kGlobalHeuristicsJSONData, "", "")); + + ASSERT_EQ(data.GetPurchasePageURLPatternForDomain("baz.com")->pattern(), + "baz.com/([^/]+/)?purchase"); +} + +TEST_F(CommerceHeuristicsDataTest, TestRepopulateHintData) { + auto& data = commerce_heuristics::CommerceHeuristicsData::GetInstance(); + + ASSERT_TRUE(data.PopulateDataFromComponent( + kHintHeuristicsJSONData, kGlobalHeuristicsJSONData, "", "")); + + ASSERT_EQ(data.GetCartPageURLPatternForDomain("foo.com")->pattern(), + "foo.com/([^/]+/)?cart"); + ASSERT_EQ(data.GetCheckoutPageURLPatternForDomain("bar.com")->pattern(), + "bar.com/([^/]+/)?checkout"); + ASSERT_EQ(data.GetPurchasePageURLPatternForDomain("baz.com")->pattern(), + "baz.com/([^/]+/)?purchase"); + + ASSERT_TRUE(data.PopulateDataFromComponent( + R"###( + { + "qux.com": { + "purchase_url_regex" : "qux.com/([^/]+/)?purchase" + } + } + )###", + kGlobalHeuristicsJSONData, "", "")); + + ASSERT_FALSE(data.GetCartPageURLPatternForDomain("foo.com")); + ASSERT_FALSE(data.GetCheckoutPageURLPatternForDomain("bar.com")); + ASSERT_FALSE(data.GetPurchasePageURLPatternForDomain("baz.com")); + ASSERT_EQ(data.GetPurchasePageURLPatternForDomain("qux.com")->pattern(), + "qux.com/([^/]+/)?purchase"); +} } // namespace commerce_heuristics