Skip to content

Commit

Permalink
Cherry-pick 85e478a. rdar://problem/100623939
Browse files Browse the repository at this point in the history
    Punycode Arabic diacritics when not preceded by an Arabic code point in a URL host
    https://bugs.webkit.org/show_bug.cgi?id=247461
    rdar://100426863

    Reviewed by Said Abou-Hallawa.

    * Source/WTF/wtf/URLHelpers.cpp:
    (WTF::URLHelpers::isArabicDiacritic):
    (WTF::URLHelpers::isArabicCodePoint):
    (WTF::URLHelpers::isLookalikeCharacter):
    * Tools/TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm:
    (TestWebKitAPI::TEST):

    Canonical link: https://commits.webkit.org/256332@main

Canonical link: https://commits.webkit.org/245886.840@safari-7613.4.1.0-branch
  • Loading branch information
Alex Christensen authored and alancoon committed Nov 9, 2022
1 parent 199ebb7 commit 44e845a
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
19 changes: 17 additions & 2 deletions Source/WTF/wtf/URLHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,20 @@ bool isLookalikeSequence(const std::optional<UChar32>& previousCodePoint, UChar3
|| isLookalikePair(*previousCodePoint, codePoint);
}

template <>
bool isLookalikeSequence<USCRIPT_ARABIC>(const std::optional<UChar32>& previousCodePoint, UChar32 codePoint)
{
auto isArabicDiacritic = [](UChar32 codePoint) {
return 0x064B <= codePoint && codePoint <= 0x065F;
};
auto isArabicCodePoint = [](const std::optional<UChar32>& codePoint) {
if (!codePoint)
return false;
return ublock_getCode(*codePoint) == UBLOCK_ARABIC;
};
return isArabicDiacritic(codePoint) && !isArabicCodePoint(previousCodePoint);
}

static bool isLookalikeCharacter(const std::optional<UChar32>& previousCodePoint, UChar32 codePoint)
{
// This function treats the following as unsafe, lookalike characters:
Expand All @@ -167,7 +181,7 @@ static bool isLookalikeCharacter(const std::optional<UChar32>& previousCodePoint
|| u_hasBinaryProperty(codePoint, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)
|| ublock_getCode(codePoint) == UBLOCK_IPA_EXTENSIONS)
return true;

switch (codePoint) {
case 0x00BC: /* VULGAR FRACTION ONE QUARTER */
case 0x00BD: /* VULGAR FRACTION ONE HALF */
Expand Down Expand Up @@ -299,7 +313,8 @@ static bool isLookalikeCharacter(const std::optional<UChar32>& previousCodePoint
default:
return isLookalikeSequence<USCRIPT_ARMENIAN>(previousCodePoint, codePoint)
|| isLookalikeSequence<USCRIPT_TAMIL>(previousCodePoint, codePoint)
|| isLookalikeSequence<USCRIPT_CANADIAN_ABORIGINAL>(previousCodePoint, codePoint);
|| isLookalikeSequence<USCRIPT_CANADIAN_ABORIGINAL>(previousCodePoint, codePoint)
|| isLookalikeSequence<USCRIPT_ARABIC>(previousCodePoint, codePoint);
}
}

Expand Down
5 changes: 5 additions & 0 deletions Tools/TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@
"xn--o-10e", // 'o' U+0BE6
"xn--a-53i", // U+15AF 'a'
"xn--a-63i", // 'a' U+15AF
"xn--3hb112n", // U+065B
"xn--a-ypc062v", // 'a' U+065B
};
for (const String& host : punycodedSpoofHosts) {
auto url = makeString("http://", host, "/").utf8();
Expand Down Expand Up @@ -160,6 +162,9 @@

// Canadian aboriginal
EXPECT_STREQ("https://\u15AF\u1401abc/", userVisibleString(literalURL("https://\u15AF\u1401abc/")));

// Arabic
EXPECT_STREQ("https://\u0620\u065Babc/", userVisibleString(literalURL("https://\u0620\u065Babc/")));
}

TEST(WTF_URLExtras, URLExtras_DivisionSign)
Expand Down

0 comments on commit 44e845a

Please sign in to comment.