107 changes: 70 additions & 37 deletions url/url_canon_unittest.cc
Expand Up @@ -608,21 +608,36 @@ TEST(URLCanonTest, Host) {
}

TEST(URLCanonTest, IPv4) {
// clang-format off
IPAddressCase cases[] = {
// Empty is not an IP address.
// Empty is not an IP address.
{"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{".", L".", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// Regular IP addresses in different bases.
// Regular IP addresses in different bases.
{"192.168.0.1", L"192.168.0.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
{"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
{"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
// Non-IP addresses due to invalid characters.
// Non-IP addresses due to invalid characters.
{"192.168.9.com", L"192.168.9.com", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// Invalid characters for the base should be rejected.
{"19a.168.0.1", L"19a.168.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"0308.0250.00.01", L"0308.0250.00.01", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// If there are not enough components, the last one should fill them out.
// Hostnames with a numeric final component but other components that don't
// parse as numbers should be considered broken.
{"19a.168.0.1", L"19a.168.0.1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"19a.168.0.1.", L"19a.168.0.1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0308.0250.00.01", L"0308.0250.00.01", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0308.0250.00.01.", L"0308.0250.00.01.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0xCG.0xA8.0x0.0x1.", L"0xCG.0xA8.0x0.0x1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
// Non-numeric terminal compeonent should be considered not IPv4 hostnames, but valid.
{"19.168.0.1a", L"19.168.0.1a", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"0xC.0xA8.0x0.0x1G", L"0xC.0xA8.0x0.0x1G", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// Hostnames that would be considered broken IPv4 hostnames should be considered valid non-IPv4 hostnames if they end with two dots instead of 0 or 1.
{"19a.168.0.1..", L"19a.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"0308.0250.00.01..", L"0308.0250.00.01..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"0xCG.0xA8.0x0.0x1..", L"0xCG.0xA8.0x0.0x1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// Hosts with components that aren't considered valid IPv4 numbers but are entirely numeric should be considered invalid.
{"1.2.3.08", L"1.2.3.08", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"1.2.3.08.", L"1.2.3.08.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
// If there are not enough components, the last one should fill them out.
{"192", L"192", "0.0.0.192", Component(0, 9), CanonHostInfo::IPV4, 1, "000000C0"},
{"0xC0a80001", L"0xC0a80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
{"030052000001", L"030052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
Expand All @@ -631,15 +646,16 @@ TEST(URLCanonTest, IPv4) {
{"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"},
{"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"},
{"192.168.1", L"192.168.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
// Too many components means not an IP address.
{"192.168.0.0.1", L"192.168.0.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// We allow a single trailing dot.
// Hostnames with too many components, but a numeric final numeric component are invalid.
{"192.168.0.0.1", L"192.168.0.0.1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
// We allow a single trailing dot.
{"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
{"192.168.0.1. hello", L"192.168.0.1. hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"192.168.0.1..", L"192.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// Two dots in a row means not an IP address.
{"192.168..1", L"192.168..1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// Any numerical overflow should be marked as BROKEN.
// Hosts with two dots in a row with a final numeric component are considered invalid.
{"192.168..1", L"192.168..1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"192.168..1.", L"192.168..1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
// Any numerical overflow should be marked as BROKEN.
{"0x100.0", L"0x100.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0x100.0.0", L"0x100.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0x100.0.0.0", L"0x100.0.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
Expand All @@ -649,7 +665,7 @@ TEST(URLCanonTest, IPv4) {
{"0.0.0x10000", L"0.0.0x10000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0.0x1000000", L"0.0x1000000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0x100000000", L"0x100000000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
// Repeat the previous tests, minus 1, to verify boundaries.
// Repeat the previous tests, minus 1, to verify boundaries.
{"0xFF.0", L"0xFF.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 2, "FF000000"},
{"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 3, "FF000000"},
{"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 4, "FF000000"},
Expand All @@ -659,67 +675,84 @@ TEST(URLCanonTest, IPv4) {
{"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", Component(0, 11), CanonHostInfo::IPV4, 3, "0000FFFF"},
{"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", Component(0, 13), CanonHostInfo::IPV4, 2, "00FFFFFF"},
{"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", Component(0, 15), CanonHostInfo::IPV4, 1, "FFFFFFFF"},
// Old trunctations tests. They're all "BROKEN" now.
// Old trunctations tests. They're all "BROKEN" now.
{"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"192.168.0.257", L"192.168.0.257", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"192.168.0xa20001", L"192.168.0xa20001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"192.015052000001", L"192.015052000001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0X12C0a80001", L"0X12C0a80001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"276.1.2", L"276.1.2", "", Component(), CanonHostInfo::BROKEN, -1, ""},
// Spaces should be rejected.
// Too many components should be rejected, in valid ranges or not.
{"255.255.255.255.255", L"255.255.255.255.255", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"256.256.256.256.256", L"256.256.256.256.256", "", Component(), CanonHostInfo::BROKEN, -1, ""},
// Spaces should be rejected.
{"192.168.0.1 hello", L"192.168.0.1 hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// Very large numbers.
// Very large numbers.
{"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300.0x00000000000000fF.00000000000000001", "192.255.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0FF0001"},
{"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", "", Component(0, 11), CanonHostInfo::BROKEN, -1, ""},
// A number has no length limit, but long numbers can still overflow.
// A number has no length limit, but long numbers can still overflow.
{"00000000000000000001", L"00000000000000000001", "0.0.0.1", Component(0, 7), CanonHostInfo::IPV4, 1, "00000001"},
{"0000000000000000100000000000000001", L"0000000000000000100000000000000001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
// If a long component is non-numeric, it's a hostname, *not* a broken IP.
// If a long component is non-numeric, it's a hostname, *not* a broken IP.
{"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// Truncation of all zeros should still result in 0.
// Truncation of all zeros should still result in 0.
{"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", Component(0, 7), CanonHostInfo::IPV4, 4, "00000000"},
// Non-ASCII characters in final component should return NEUTRAL.
{"1.2.3.\xF0\x9F\x92\xA9", L"1.2.3.\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"1.2.3.4\xF0\x9F\x92\xA9", L"1.2.3.4\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"1.2.3.0x\xF0\x9F\x92\xA9", L"1.2.3.0x\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"1.2.3.0\xF0\x9F\x92\xA9", L"1.2.3.0\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
// Non-ASCII characters in other components should result in broken IPs when final component is numeric.
{"1.2.\xF0\x9F\x92\xA9.4", L"1.2.\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"1.2.3\xF0\x9F\x92\xA9.4", L"1.2.3\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"1.2.0x\xF0\x9F\x92\xA9.4", L"1.2.0x\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"1.2.0\xF0\x9F\x92\xA9.4", L"1.2.0\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"\xF0\x9F\x92\xA9.2.3.4", L"\xD83D\xDCA9.2.3.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
};
// clang-format on

for (const auto& test_case : cases) {
SCOPED_TRACE(test_case.input8);

for (size_t i = 0; i < base::size(cases); i++) {
// 8-bit version.
Component component(0, static_cast<int>(strlen(cases[i].input8)));
Component component(0, static_cast<int>(strlen(test_case.input8)));

std::string out_str1;
StdStringCanonOutput output1(&out_str1);
CanonHostInfo host_info;
CanonicalizeIPAddress(cases[i].input8, component, &output1, &host_info);
CanonicalizeIPAddress(test_case.input8, component, &output1, &host_info);
output1.Complete();

EXPECT_EQ(cases[i].expected_family, host_info.family);
EXPECT_EQ(std::string(cases[i].expected_address_hex),
EXPECT_EQ(test_case.expected_family, host_info.family);
EXPECT_EQ(std::string(test_case.expected_address_hex),
BytesToHexString(host_info.address, host_info.AddressLength()));
if (host_info.family == CanonHostInfo::IPV4) {
EXPECT_STREQ(cases[i].expected, out_str1.c_str());
EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
EXPECT_EQ(cases[i].expected_num_ipv4_components,
EXPECT_STREQ(test_case.expected, out_str1.c_str());
EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin);
EXPECT_EQ(test_case.expected_component.len, host_info.out_host.len);
EXPECT_EQ(test_case.expected_num_ipv4_components,
host_info.num_ipv4_components);
}

// 16-bit version.
std::u16string input16(
test_utils::TruncateWStringToUTF16(cases[i].input16));
test_utils::TruncateWStringToUTF16(test_case.input16));
component = Component(0, static_cast<int>(input16.length()));

std::string out_str2;
StdStringCanonOutput output2(&out_str2);
CanonicalizeIPAddress(input16.c_str(), component, &output2, &host_info);
output2.Complete();

EXPECT_EQ(cases[i].expected_family, host_info.family);
EXPECT_EQ(std::string(cases[i].expected_address_hex),
EXPECT_EQ(test_case.expected_family, host_info.family);
EXPECT_EQ(std::string(test_case.expected_address_hex),
BytesToHexString(host_info.address, host_info.AddressLength()));
if (host_info.family == CanonHostInfo::IPV4) {
EXPECT_STREQ(cases[i].expected, out_str2.c_str());
EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
EXPECT_EQ(cases[i].expected_num_ipv4_components,
EXPECT_STREQ(test_case.expected, out_str2.c_str());
EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin);
EXPECT_EQ(test_case.expected_component.len, host_info.out_host.len);
EXPECT_EQ(test_case.expected_num_ipv4_components,
host_info.num_ipv4_components);
}
}
Expand Down