Skip to content

Commit

Permalink
Add parsing for between street lines
Browse files Browse the repository at this point in the history
This CL adds type parsing for ADDRESS_HOME_BETWEEN_STREETS_1 and
ADDRESS_HOME_BETWEEN_STREETS_2. It is implemented behind the feature
flag kAutofillEnableSupportForBetweenStreets.

The field is classified as ADDRESS_HOME_BETWEEN_STREETS_2 only if
there exists a field with type ADDRESS_HOME_BETWEEN_STREETS_1.

With this CL, the regex for spanish locale for
ADDRESS_HOME_BETWEEN_STREETS is changed to search for "calles" instead
of "calle". As a side effect, single address between street fields with
name/label as "calle" would be labeled as ADDRESS_HOME_BETWEEN_STREETS_1
now instead of ADDRESS_HOME_BETWEEN_STREETS.

Bug: 1493377
Change-Id: I09413720cc7487574fd9f05da0531ec40adef9a4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4958347
Code-Coverage: findit-for-me@appspot.gserviceaccount.com <findit-for-me@appspot.gserviceaccount.com>
Reviewed-by: Dominic Battre <battre@chromium.org>
Commit-Queue: Dominic Battre <battre@chromium.org>
Reviewed-by: Norge Vizcay <vizcay@google.com>
Cr-Commit-Position: refs/heads/main@{#1217023}
  • Loading branch information
Vidhan authored and Chromium LUCI CQ committed Oct 30, 2023
1 parent 8c486fd commit 04a91b9
Show file tree
Hide file tree
Showing 9 changed files with 203 additions and 40 deletions.
2 changes: 1 addition & 1 deletion DEPS
Original file line number Diff line number Diff line change
Expand Up @@ -4076,7 +4076,7 @@ deps = {

'src/components/test/data/autofill/heuristics-json/internal': {
'url': Var('chrome_git') + '/chrome/test/autofill/structured_forms.git' + '@' +
'8d16b4669051c8c4937258019c05ad10bf8eb350',
'2a35e0b28ccdd4298cc12ba2ef477f9e1320c711',
'condition': 'checkout_chromium_autofill_test_dependencies',
},

Expand Down
152 changes: 118 additions & 34 deletions components/autofill/core/browser/form_parsing/address_field.cc
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ std::unique_ptr<FormField> AddressField::Parse(
address_field->street_name_ || address_field->house_number_ ||
address_field->country_ || address_field->apartment_number_ ||
address_field->dependent_locality_ || address_field->landmark_ ||
address_field->between_streets_ || address_field->admin_level2_ ||
address_field->between_streets_ ||
address_field->between_streets_line_1_ ||
address_field->between_streets_line_2_ || address_field->admin_level2_ ||
address_field->between_streets_or_landmark_ ||
address_field->overflow_and_landmark_ || address_field->overflow_ ||
address_field->street_location_) {
Expand Down Expand Up @@ -297,6 +299,10 @@ void AddressField::AddClassifications(
field_candidates);
AddClassification(between_streets_, ADDRESS_HOME_BETWEEN_STREETS,
kBaseAddressParserScore, field_candidates);
AddClassification(between_streets_line_1_, ADDRESS_HOME_BETWEEN_STREETS_1,
kBaseAddressParserScore, field_candidates);
AddClassification(between_streets_line_2_, ADDRESS_HOME_BETWEEN_STREETS_2,
kBaseAddressParserScore, field_candidates);
AddClassification(admin_level2_, ADDRESS_HOME_ADMIN_LEVEL2,
kBaseAddressParserScore, field_candidates);
AddClassification(between_streets_or_landmark_,
Expand Down Expand Up @@ -373,13 +379,19 @@ bool AddressField::ParseAddressFieldSequence(
pattern_source);
base::span<const MatchPatternRef> between_streets_patterns =
GetMatchPatterns("BETWEEN_STREETS", page_language, pattern_source);
base::span<const MatchPatternRef> between_streets_line_1_patterns =
GetMatchPatterns("BETWEEN_STREETS_LINE_1", page_language, pattern_source);
base::span<const MatchPatternRef> between_streets_line_2_patterns =
GetMatchPatterns("BETWEEN_STREETS_LINE_2", page_language, pattern_source);

AutofillField* old_street_location = street_location_;
AutofillField* old_street_name = street_name_;
AutofillField* old_overflow = overflow_;
AutofillField* old_between_streets_or_landmark = between_streets_or_landmark_;
AutofillField* old_overflow_and_landmark = overflow_and_landmark_;
AutofillField* old_between_streets = between_streets_;
AutofillField* old_between_streets_line_1 = between_streets_line_1_;
AutofillField* old_between_streets_line_2 = between_streets_line_2_;
AutofillField* old_house_number = house_number_;
AutofillField* old_zip = zip_;
AutofillField* old_zip4 = zip4_;
Expand Down Expand Up @@ -411,7 +423,8 @@ bool AddressField::ParseAddressFieldSequence(
continue;
}

if (!(between_streets_or_landmark_ || between_streets_) &&
if (!(between_streets_or_landmark_ || between_streets_ ||
between_streets_line_1_ || between_streets_line_2_) &&
base::FeatureList::IsEnabled(
features::kAutofillEnableSupportForBetweenStreetsOrLandmark) &&
ParseFieldSpecifics(scanner, kBetweenStreetsOrLandmarkRe,
Expand Down Expand Up @@ -466,13 +479,30 @@ bool AddressField::ParseAddressFieldSequence(
}

if (base::FeatureList::IsEnabled(
features::kAutofillEnableSupportForBetweenStreets) &&
!between_streets_ &&
ParseFieldSpecifics(scanner, kBetweenStreetsRe,
kBetweenStreetsMatchType, between_streets_patterns,
&between_streets_,
{log_manager_, "kBetweenStreetsRe"})) {
continue;
features::kAutofillEnableSupportForBetweenStreets)) {
if (!between_streets_ && !between_streets_line_1_ &&
ParseFieldSpecifics(scanner, kBetweenStreetsRe,
kBetweenStreetsMatchType,
between_streets_patterns, &between_streets_,
{log_manager_, "kBetweenStreetsRe"})) {
continue;
}

if (!between_streets_line_1_ &&
ParseFieldSpecifics(
scanner, kBetweenStreetsLine1Re, kBetweenStreetsMatchType,
between_streets_line_1_patterns, &between_streets_line_1_,
{log_manager_, "kBetweenStreetsLine1Re"})) {
continue;
}

if (between_streets_line_1_ && !between_streets_line_2_ &&
ParseFieldSpecifics(
scanner, kBetweenStreetsLine2Re, kBetweenStreetsMatchType,
between_streets_line_2_patterns, &between_streets_line_2_,
{log_manager_, "kBetweenStreetsLine2Re"})) {
continue;
}
}

break;
Expand All @@ -485,20 +515,7 @@ bool AddressField::ParseAddressFieldSequence(
return false;
}

// Record success if the house number and at least one of the other
// fields were found because that indicates a structured address form.
if (house_number_ &&
(street_name_ || zip_ || overflow_ || overflow_and_landmark_ ||
between_streets_or_landmark_ || apartment_number_ || between_streets_)) {
// Keep this in sync with the corresponding if-statement in
// AddressField::ParseAddress to prevent repetitive work.
return true;
}
if (street_location_ &&
(apartment_number_ || overflow_ || overflow_and_landmark_ ||
between_streets_or_landmark_ || between_streets_)) {
// Keep this in sync with the corresponding if-statement in
// AddressField::ParseAddress to prevent repetitive work.
if (PossiblyAStructuredAddressForm()) {
return true;
}

Expand All @@ -510,6 +527,8 @@ bool AddressField::ParseAddressFieldSequence(
between_streets_or_landmark_ = old_between_streets_or_landmark;
overflow_and_landmark_ = old_overflow_and_landmark;
between_streets_ = old_between_streets;
between_streets_line_1_ = old_between_streets_line_1;
between_streets_line_2_ = old_between_streets_line_2;
zip_ = old_zip;
zip4_ = old_zip4;
apartment_number_ = old_apartment_number;
Expand All @@ -527,16 +546,10 @@ bool AddressField::ParseAddress(AutofillScanner* scanner,
// evidence that the current form is a structured form. If structured form
// fields are missing, they will be discovered later via
// AddressField::ParseAddressField.
if (house_number_ &&
(street_name_ || zip_ || overflow_ || overflow_and_landmark_ ||
between_streets_or_landmark_ || apartment_number_ || between_streets_)) {
return false;
}
if (street_location_ &&
(apartment_number_ || overflow_ || overflow_and_landmark_ ||
between_streets_or_landmark_ || between_streets_)) {
if (PossiblyAStructuredAddressForm()) {
return false;
}

// Do not inline these calls: After passing an address field sequence, there
// might be an additional address line 2 to parse afterwards.
bool has_field_sequence = ParseAddressFieldSequence(
Expand Down Expand Up @@ -803,6 +816,12 @@ bool AddressField::ParseAddressField(AutofillScanner* scanner,
if (between_streets_result == RESULT_MATCH_NAME_LABEL) {
return true;
}
ParseNameLabelResult between_street_lines12_result =
ParseNameAndLabelForBetweenStreetsLines12(scanner, page_language,
pattern_source);
if (between_street_lines12_result == RESULT_MATCH_NAME_LABEL) {
return true;
}
ParseNameLabelResult admin_level2_result =
ParseNameAndLabelForAdminLevel2(scanner, page_language, pattern_source);
if (admin_level2_result == RESULT_MATCH_NAME_LABEL) {
Expand All @@ -817,8 +836,9 @@ bool AddressField::ParseAddressField(AutofillScanner* scanner,
for (const auto result :
{dependent_locality_result, city_result, state_result, country_result,
zip_result, landmark_result, between_streets_result,
admin_level2_result, between_streets_or_landmark_result,
overflow_and_landmark_result, overflow_result}) {
between_street_lines12_result, admin_level2_result,
between_streets_or_landmark_result, overflow_and_landmark_result,
overflow_result}) {
if (result != RESULT_MATCH_NONE)
++num_of_matches;
}
Expand Down Expand Up @@ -848,6 +868,14 @@ bool AddressField::ParseAddressField(AutofillScanner* scanner,
if (between_streets_result != RESULT_MATCH_NONE) {
return SetFieldAndAdvanceCursor(scanner, &between_streets_);
}
if (between_street_lines12_result != RESULT_MATCH_NONE &&
!between_streets_line_1_) {
return SetFieldAndAdvanceCursor(scanner, &between_streets_line_1_);
}
if (between_street_lines12_result != RESULT_MATCH_NONE &&
!between_streets_line_2_) {
return SetFieldAndAdvanceCursor(scanner, &between_streets_line_2_);
}
if (admin_level2_result != RESULT_MATCH_NONE) {
return SetFieldAndAdvanceCursor(scanner, &admin_level2_);
}
Expand Down Expand Up @@ -909,6 +937,12 @@ bool AddressField::ParseAddressField(AutofillScanner* scanner,
if (between_streets_result == result) {
return SetFieldAndAdvanceCursor(scanner, &between_streets_);
}
if (between_street_lines12_result == result && !between_streets_line_1_) {
return SetFieldAndAdvanceCursor(scanner, &between_streets_line_1_);
}
if (between_street_lines12_result == result && !between_streets_line_2_) {
return SetFieldAndAdvanceCursor(scanner, &between_streets_line_2_);
}
if (admin_level2_result == result) {
return SetFieldAndAdvanceCursor(scanner, &admin_level2_);
}
Expand Down Expand Up @@ -1046,6 +1080,7 @@ AddressField::ParseNameAndLabelForBetweenStreetsOrLandmark(
const LanguageCode& page_language,
PatternSource pattern_source) {
if (between_streets_or_landmark_ || landmark_ || between_streets_ ||
between_streets_line_1_ || between_streets_line_2_ ||
!base::FeatureList::IsEnabled(
features::kAutofillEnableSupportForBetweenStreetsOrLandmark)) {
return RESULT_MATCH_NONE;
Expand Down Expand Up @@ -1124,7 +1159,7 @@ AddressField::ParseNameAndLabelForBetweenStreets(
const LanguageCode& page_language,
PatternSource pattern_source) {
// TODO(crbug.com/1441904) Remove feature check when launched.
if (between_streets_ ||
if (between_streets_ || between_streets_line_1_ ||
!base::FeatureList::IsEnabled(
features::kAutofillEnableSupportForBetweenStreets)) {
return RESULT_MATCH_NONE;
Expand All @@ -1138,6 +1173,39 @@ AddressField::ParseNameAndLabelForBetweenStreets(
{log_manager_, "kBetweenStreetsRe"});
}

AddressField::ParseNameLabelResult
AddressField::ParseNameAndLabelForBetweenStreetsLines12(
AutofillScanner* scanner,
const LanguageCode& page_language,
PatternSource pattern_source) {
// TODO(crbug.com/1441904) Remove feature check when launched.
if (between_streets_line_2_ ||
!base::FeatureList::IsEnabled(
features::kAutofillEnableSupportForBetweenStreets)) {
return RESULT_MATCH_NONE;
}

if (!between_streets_line_1_) {
base::span<const MatchPatternRef> between_streets_patterns_line_1 =
GetMatchPatterns("BETWEEN_STREETS_LINE_1", page_language,
pattern_source);
return ParseNameAndLabelSeparately(
scanner, kBetweenStreetsLine1Re, kBetweenStreetsMatchType,
between_streets_patterns_line_1, &between_streets_line_1_,
{log_manager_, "kBetweenStreetsLine1Re"});
} else if (!between_streets_line_2_) {
base::span<const MatchPatternRef> between_streets_patterns_line_2 =
GetMatchPatterns("BETWEEN_STREETS_LINE_2", page_language,
pattern_source);
return ParseNameAndLabelSeparately(
scanner, kBetweenStreetsLine2Re, kBetweenStreetsMatchType,
between_streets_patterns_line_2, &between_streets_line_2_,
{log_manager_, "kBetweenStreetsLine2Re"});
}

return RESULT_MATCH_NONE;
}

AddressField::ParseNameLabelResult
AddressField::ParseNameAndLabelForAdminLevel2(AutofillScanner* scanner,
const LanguageCode& page_language,
Expand All @@ -1155,4 +1223,20 @@ AddressField::ParseNameAndLabelForAdminLevel2(AutofillScanner* scanner,
&admin_level2_, {log_manager_, "kAdminLevel2Re"});
}

bool AddressField::PossiblyAStructuredAddressForm() const {
// Record success if the house number and at least one of the other
// fields were found because that indicates a structured address form.
if (house_number_ &&
(street_name_ || zip_ || overflow_ || overflow_and_landmark_ ||
between_streets_or_landmark_ || apartment_number_ || between_streets_ ||
between_streets_line_1_ || between_streets_line_2_)) {
return true;
}

return street_location_ &&
(apartment_number_ || overflow_ || overflow_and_landmark_ ||
between_streets_or_landmark_ || between_streets_ ||
between_streets_line_1_ || between_streets_line_2_);
}

} // namespace autofill
14 changes: 14 additions & 0 deletions components/autofill/core/browser/form_parsing/address_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,14 @@ class AddressField : public FormField {
const LanguageCode& page_language,
PatternSource pattern_source);

// Run matches on the name and label for a field and sets
// `between_streets_line_1_` and `between_streets_line_2_` respectively if a
// match is found.
ParseNameLabelResult ParseNameAndLabelForBetweenStreetsLines12(
AutofillScanner* scanner,
const LanguageCode& page_language,
PatternSource pattern_source);

ParseNameLabelResult ParseNameAndLabelForAdminLevel2(
AutofillScanner* scanner,
const LanguageCode& page_language,
Expand All @@ -174,6 +182,10 @@ class AddressField : public FormField {
const LanguageCode& page_language,
PatternSource pattern_source);

// Return true if the form being parsed shows an indication of being a
// structured address form.
bool PossiblyAStructuredAddressForm() const;

raw_ptr<LogManager> log_manager_;

raw_ptr<AutofillField> company_ = nullptr;
Expand All @@ -194,6 +206,8 @@ class AddressField : public FormField {
raw_ptr<AutofillField> country_ = nullptr;
raw_ptr<AutofillField> landmark_ = nullptr;
raw_ptr<AutofillField> between_streets_ = nullptr;
raw_ptr<AutofillField> between_streets_line_1_ = nullptr;
raw_ptr<AutofillField> between_streets_line_2_ = nullptr;
raw_ptr<AutofillField> admin_level2_ = nullptr;
raw_ptr<AutofillField> between_streets_or_landmark_ = nullptr;
raw_ptr<AutofillField> overflow_and_landmark_ = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,38 @@ TEST_P(AddressFieldTest, ParseBetweenStreets) {
enabled.InitAndEnableFeature(
features::kAutofillEnableSupportForBetweenStreets);

AddTextFormFieldData("entre-calle", "Entre calle",
AddTextFormFieldData("entre-calles", "Entre calles",
ADDRESS_HOME_BETWEEN_STREETS);
ClassifyAndVerify();
}

// Tests that multiple between streets field are correctly classified.
TEST_P(AddressFieldTest, ParseBetweenStreetsLines) {
// TODO(crbug.com/1441904): Remove once launched.
base::test::ScopedFeatureList scoped_feature_list{
features::kAutofillEnableSupportForBetweenStreets};

std::vector<std::pair<std::pair<std::string, std::string>,
std::pair<std::string, std::string>>>
// "Name", "Label" for ADDRESS_HOME_BETWEEN_STREETS_1
// "Name", "Label" for ADDRESS_HOME_BETWEEN_STREETS_2
instances = {{{"entre-calle1", "Entre calle 1"},
{"entre-calle2", "Entre calle 2"}},
{{"entre-calle1", ""}, {"entre-calle2", ""}},
{{"entre-calle", ""}, {"entre-calle", ""}},
{{"entre-calle", ""}, {"y-calle", ""}},
{{"", "Entre calle 1"}, {"", "Entre calle 2"}}};

for (const auto& [first_field, second_field] : instances) {
ClearFieldsAndExpectations();
AddTextFormFieldData(first_field.first, first_field.second,
ADDRESS_HOME_BETWEEN_STREETS_1);
AddTextFormFieldData(second_field.first, second_field.second,
ADDRESS_HOME_BETWEEN_STREETS_2);
ClassifyAndVerify();
}
}

// Tests that address level 2 field is correctly classified.
TEST_P(AddressFieldTest, ParseAdminLevel2) {
// TODO(crbug.com/1441904): Remove once launched.
Expand Down Expand Up @@ -267,7 +294,7 @@ TEST_P(AddressFieldTest,
AddTextFormFieldData("country", "Country", ADDRESS_HOME_COUNTRY);
AddTextFormFieldData("zip", "Zip", ADDRESS_HOME_ZIP);
AddTextFormFieldData("landmark", "Landmark", ADDRESS_HOME_LANDMARK);
AddTextFormFieldData("entre-calle", "Entre calle",
AddTextFormFieldData("entre-calles", "Entre calles",
ADDRESS_HOME_BETWEEN_STREETS);
AddTextFormFieldData("municipio", "Municipio", ADDRESS_HOME_ADMIN_LEVEL2);
AddTextFormFieldData("complemento", "Complemento", ADDRESS_HOME_OVERFLOW);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,11 @@ FieldRendererId FormFieldTestBase::MakeFieldRendererId() {
return FieldRendererId(++id_counter_);
}

void FormFieldTestBase::ClearFieldsAndExpectations() {
field_ = nullptr;
list_.clear();
expected_classifications_.clear();
field_candidates_map_.clear();
}

} // namespace autofill
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ class FormFieldTestBase {
void ClassifyAndVerify(ParseResult parse_result = ParseResult::PARSED,
const LanguageCode& page_language = LanguageCode(""));

// Removes all the fields and resets the expectations.
void ClearFieldsAndExpectations();

// Test the parsed verifications against the expectations.
void TestClassificationExpectations();

Expand Down

0 comments on commit 04a91b9

Please sign in to comment.