Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
data: URL base64 handling different from atob()
https://bugs.webkit.org/show_bug.cgi?id=175568
rdar://107982669

Reviewed by Alex Christensen.

We really shouldn't have different modes of decoding data: URLs.

https://bugs.webkit.org/show_bug.cgi?id=211671 fixed a set of web-platform-tests by introducing a new fetch-specific code path. It was not investigated whether the existing code path ought to be changed as well, apart from determining that there was no test coverage for the difference.

This commit adds test coverage and aligns WebKit with other browsers and the Fetch standard by removing the legacy code path altogether.

* LayoutTests/imported/w3c/web-platform-tests/fetch/data-urls/navigate.window-expected.txt: Added.
* LayoutTests/imported/w3c/web-platform-tests/fetch/data-urls/navigate.window.html: Added.
* LayoutTests/imported/w3c/web-platform-tests/fetch/data-urls/navigate.window.js: Added.
(forEach):
(async input):
* LayoutTests/imported/w3c/web-platform-tests/fetch/data-urls/w3c-import.log:

These changes are being upstreamed via web-platform-tests/wpt#39542.

* Source/WebCore/loader/ResourceLoader.cpp:
(WebCore::ResourceLoader::loadDataURL):
* Source/WebCore/platform/graphics/avfoundation/objc/WebCoreAVFResourceLoader.mm:
(WebCore::DataURLResourceMediaLoader::DataURLResourceMediaLoader):
* Source/WebCore/platform/network/DataURLDecoder.cpp:
(WebCore::DataURLDecoder::decodeSynchronously):
(WebCore::DataURLDecoder::decode):
(WebCore::DataURLDecoder::decodeBase64): Deleted.
* Source/WebCore/platform/network/DataURLDecoder.h:
* Source/WebKit/NetworkProcess/soup/NetworkDataTaskSoup.cpp:
(WebKit::NetworkDataTaskSoup::resume):
* Source/WebKit/WebProcess/Network/WebLoaderStrategy.cpp:
(WebKit::WebLoaderStrategy::loadDataURLSynchronously):

Canonical link: https://commits.webkit.org/262976@main
  • Loading branch information
annevk authored and Ahmad Saleem committed Apr 14, 2023
1 parent b9f0c87 commit 2518c51
Show file tree
Hide file tree
Showing 10 changed files with 99 additions and 38 deletions.
@@ -0,0 +1,9 @@

PASS Nothing fancy
PASS base64
PASS base64 with code points that differ from base64url
PASS ASCII whitespace in the input is removed
PASS base64 with incorrect padding
PASS base64url is not supported
PASS Vertical tab in the input leads to an error

@@ -0,0 +1 @@
<!-- This file is required for WebKit test infrastructure to run the templated test -->
@@ -0,0 +1,75 @@
// META: timeout=long
//
// Test some edge cases around navigation to data: URLs to ensure they use the same code path

[
{
input: "data:text/html,<script>parent.postMessage(1, '*')</script>",
result: 1,
name: "Nothing fancy",
},
{
input: "data:text/html;base64,PHNjcmlwdD5wYXJlbnQucG9zdE1lc3NhZ2UoMiwgJyonKTwvc2NyaXB0Pg==",
result: 2,
name: "base64",
},
{
input: "data:text/html;base64,PHNjcmlwdD5wYXJlbnQucG9zdE1lc3NhZ2UoNCwgJyonKTwvc2NyaXB0Pr+/",
result: 4,
name: "base64 with code points that differ from base64url"
},
{
input: "data:text/html;base64,PHNjcml%09%20%20%0A%0C%0DwdD5wYXJlbnQucG9zdE1lc3NhZ2UoNiwgJyonKTwvc2NyaXB0Pg==",
result: 6,
name: "ASCII whitespace in the input is removed"
}
].forEach(({ input, result, name }) => {
// Use promise_test so they go sequentially
promise_test(async t => {
const event = await new Promise((resolve, reject) => {
self.addEventListener("message", t.step_func(resolve), { once: true });
const frame = document.body.appendChild(document.createElement("iframe"));
t.add_cleanup(() => frame.remove());

// The assumption is that postMessage() is quicker
t.step_timeout(reject, 500);
frame.src = input;
});
assert_equals(event.data, result);
}, name);
});

// Failure cases
[
{
input: "data:text/html;base64,PHNjcmlwdD5wYXJlbnQucG9zdE1lc3NhZ2UoMywgJyonKTwvc2NyaXB0Pg=",
name: "base64 with incorrect padding",
},
{
input: "data:text/html;base64,PHNjcmlwdD5wYXJlbnQucG9zdE1lc3NhZ2UoNSwgJyonKTwvc2NyaXB0Pr-_",
name: "base64url is not supported"
},
{
input: "data:text/html;base64,%0BPHNjcmlwdD5wYXJlbnQucG9zdE1lc3NhZ2UoNywgJyonKTwvc2NyaXB0Pg==",
name: "Vertical tab in the input leads to an error"
}
].forEach(({ input, name }) => {
// Continue to use promise_test so they go sequentially
promise_test(async t => {
const event = await new Promise((resolve, reject) => {
self.addEventListener("message", t.step_func(reject), { once: true });
const frame = document.body.appendChild(document.createElement("iframe"));
t.add_cleanup(() => frame.remove());

// The assumption is that postMessage() is quicker
t.step_timeout(resolve, 500);
frame.src = input;
});
}, name);
});

// I found some of the interesting code point cases above through brute force:
//
// for (i = 0; i < 256; i++) {
// w(btoa("<script>parent.postMessage(5, '*')<\/script>" + String.fromCodePoint(i) + String.fromCodePoint(i)));
// }
Expand Up @@ -16,4 +16,5 @@ None
List of files:
/LayoutTests/imported/w3c/web-platform-tests/fetch/data-urls/README.md
/LayoutTests/imported/w3c/web-platform-tests/fetch/data-urls/base64.any.js
/LayoutTests/imported/w3c/web-platform-tests/fetch/data-urls/navigate.window.js
/LayoutTests/imported/w3c/web-platform-tests/fetch/data-urls/processing.any.js
5 changes: 1 addition & 4 deletions Source/WebCore/loader/ResourceLoader.cpp
Expand Up @@ -294,10 +294,7 @@ void ResourceLoader::loadDataURL()
if (auto page = m_frame->page())
scheduleContext.scheduledPairs = *page->scheduledRunLoopPairs();
#endif
auto mode = DataURLDecoder::Mode::Legacy;
if (m_request.requester() == ResourceRequestRequester::Fetch)
mode = DataURLDecoder::Mode::ForgivingBase64;
DataURLDecoder::decode(url, scheduleContext, mode, [this, protectedThis = Ref { *this }, url](auto decodeResult) mutable {
DataURLDecoder::decode(url, scheduleContext, [this, protectedThis = Ref { *this }, url](auto decodeResult) mutable {
if (this->reachedTerminalState())
return;
if (!decodeResult) {
Expand Down
Expand Up @@ -233,7 +233,7 @@ void dataSent(PlatformMediaResource&, unsigned long long, unsigned long long) fi
{
RELEASE_ASSERT(request.url().protocolIsData());

if (auto result = DataURLDecoder::decode(request.url(), DataURLDecoder::Mode::ForgivingBase64)) {
if (auto result = DataURLDecoder::decode(request.url())) {
m_response = ResourceResponse::dataURLResponse(request.url(), *result);
m_buffer = SharedBuffer::create(WTFMove(result->data));
}
Expand Down
31 changes: 7 additions & 24 deletions Source/WebCore/platform/network/DataURLDecoder.cpp
Expand Up @@ -147,37 +147,20 @@ static std::unique_ptr<DecodeTask> createDecodeTask(const URL& url, const Schedu
);
}

static std::optional<Vector<uint8_t>> decodeBase64(const DecodeTask& task, Mode mode)
{
switch (mode) {
case Mode::ForgivingBase64:
return base64Decode(PAL::decodeURLEscapeSequences(task.encodedData), { Base64DecodeOptions::ValidatePadding, Base64DecodeOptions::IgnoreSpacesAndNewLines, Base64DecodeOptions::DiscardVerticalTab });

case Mode::Legacy:
// First try base64url.
if (auto decodedData = base64URLDecode(task.encodedData))
return decodedData;
// Didn't work, try unescaping and decoding as base64.
return base64Decode(PAL::decodeURLEscapeSequences(task.encodedData), { Base64DecodeOptions::IgnoreSpacesAndNewLines, Base64DecodeOptions::DiscardVerticalTab });
}

RELEASE_ASSERT_NOT_REACHED();
}

static Vector<uint8_t> decodeEscaped(const DecodeTask& task)
{
PAL::TextEncoding encodingFromCharset(task.result.charset);
auto& encoding = encodingFromCharset.isValid() ? encodingFromCharset : PAL::UTF8Encoding();
return PAL::decodeURLEscapeSequencesAsData(task.encodedData, encoding);
}

static std::optional<Result> decodeSynchronously(DecodeTask& task, Mode mode)
static std::optional<Result> decodeSynchronously(DecodeTask& task)
{
if (!task.process())
return std::nullopt;

if (task.isBase64) {
auto decodedData = decodeBase64(task, mode);
auto decodedData = base64Decode(PAL::decodeURLEscapeSequences(task.encodedData), { Base64DecodeOptions::ValidatePadding, Base64DecodeOptions::IgnoreSpacesAndNewLines, Base64DecodeOptions::DiscardVerticalTab });
if (!decodedData)
return std::nullopt;
task.result.data = WTFMove(*decodedData);
Expand All @@ -188,12 +171,12 @@ static std::optional<Result> decodeSynchronously(DecodeTask& task, Mode mode)
return WTFMove(task.result);
}

void decode(const URL& url, const ScheduleContext& scheduleContext, Mode mode, DecodeCompletionHandler&& completionHandler)
void decode(const URL& url, const ScheduleContext& scheduleContext, DecodeCompletionHandler&& completionHandler)
{
ASSERT(url.protocolIsData());

decodeQueue().dispatch([decodeTask = createDecodeTask(url, scheduleContext, WTFMove(completionHandler)), mode]() mutable {
auto result = decodeSynchronously(*decodeTask, mode);
decodeQueue().dispatch([decodeTask = createDecodeTask(url, scheduleContext, WTFMove(completionHandler))]() mutable {
auto result = decodeSynchronously(*decodeTask);

#if USE(COCOA_EVENT_LOOP)
auto scheduledPairs = decodeTask->scheduleContext.scheduledPairs;
Expand All @@ -211,12 +194,12 @@ void decode(const URL& url, const ScheduleContext& scheduleContext, Mode mode, D
});
}

std::optional<Result> decode(const URL& url, Mode mode)
std::optional<Result> decode(const URL& url)
{
ASSERT(url.protocolIsData());

auto task = createDecodeTask(url, { }, nullptr);
return decodeSynchronously(*task, mode);
return decodeSynchronously(*task);
}

} // namespace DataURLDecoder
Expand Down
5 changes: 2 additions & 3 deletions Source/WebCore/platform/network/DataURLDecoder.h
Expand Up @@ -51,9 +51,8 @@ struct ScheduleContext {
#endif
};

enum class Mode { Legacy, ForgivingBase64 };
void decode(const URL&, const ScheduleContext&, Mode, DecodeCompletionHandler&&);
WEBCORE_EXPORT std::optional<Result> decode(const URL&, Mode);
void decode(const URL&, const ScheduleContext&, DecodeCompletionHandler&&);
WEBCORE_EXPORT std::optional<Result> decode(const URL&);

}

Expand Down
2 changes: 1 addition & 1 deletion Source/WebKit/NetworkProcess/soup/NetworkDataTaskSoup.cpp
Expand Up @@ -340,7 +340,7 @@ void NetworkDataTaskSoup::resume()
if (m_currentRequest.url().protocolIsData() && !m_cancellable) {
m_networkLoadMetrics.fetchStart = MonotonicTime::now();
m_cancellable = adoptGRef(g_cancellable_new());
DataURLDecoder::decode(m_currentRequest.url(), { }, DataURLDecoder::Mode::Legacy, [this, protectedThis = WTFMove(protectedThis)](auto decodeResult) mutable {
DataURLDecoder::decode(m_currentRequest.url(), { }, [this, protectedThis = WTFMove(protectedThis)](auto decodeResult) mutable {
if (m_state == State::Canceling || m_state == State::Completed || !m_client) {
clearRequest();
return;
Expand Down
6 changes: 1 addition & 5 deletions Source/WebKit/WebProcess/Network/WebLoaderStrategy.cpp
Expand Up @@ -651,12 +651,8 @@ static bool shouldClearReferrerOnHTTPSToHTTPRedirect(LocalFrame* frame)

WebLoaderStrategy::SyncLoadResult WebLoaderStrategy::loadDataURLSynchronously(const ResourceRequest& request)
{
auto mode = DataURLDecoder::Mode::Legacy;
if (request.requester() == ResourceRequestRequester::Fetch)
mode = DataURLDecoder::Mode::ForgivingBase64;

SyncLoadResult result;
auto decodeResult = DataURLDecoder::decode(request.url(), mode);
auto decodeResult = DataURLDecoder::decode(request.url());
if (!decodeResult) {
WEBLOADERSTRATEGY_RELEASE_LOG_BASIC("loadDataURLSynchronously: decoding of data failed");
result.error = internalError(request.url());
Expand Down

0 comments on commit 2518c51

Please sign in to comment.