Skip to content

Commit

Permalink
[UnifiedPDF] Prepare for thread-safe access to loaded PDF data
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=271019
rdar://124644460

Reviewed by Tim Horton.

In a future patch I will be attempting to read loaded PDF data off the main thread, rather than
using the semaphore-driven hops to the main thread.

In preparation for this, make accessing the various data members related to loaded PDF data
thread-safe. In particular, add a Lock to protect `m_data` and `m_streamedBytes`, and move
the `RangeSet<Range>>` (which represents ranges of loaded data beyond m_streamedBytes) into
PDFPluginBase to be protected by the same lock.

Now we need the "check data availability and return data pointer" blocks to be thread-safe,
so remove `haveDataForRange()`, and make callers just try to call `dataPtrForRange()` which
internally takes the lock for both the checks and the data access.

There are code paths that want to avoid checking m_validRanges (the "complete unconditionally" code path);
for them, add a CheckValidRanges enum to `dataPtrForRange()`.

* Source/WebKit/WebProcess/Plugins/PDF/PDFIncrementalLoader.h:
* Source/WebKit/WebProcess/Plugins/PDF/PDFIncrementalLoader.mm:
(WebKit::ByteRangeRequest::completeUnconditionally):
(WebKit::PDFIncrementalLoader::dataPtrForRange const):
(WebKit::PDFIncrementalLoader::incrementalPDFStreamDidReceiveData):
(WebKit::PDFIncrementalLoader::requestCompleteIfPossible):
(WebKit::PDFIncrementalLoader::requestDidCompleteWithAccumulatedData):
(WebKit::PDFIncrementalLoader::ensureDataBufferLength): Deleted.
* Source/WebKit/WebProcess/Plugins/PDF/PDFPluginBase.h:
(WebKit::PDFPluginBase::WTF_GUARDED_BY_LOCK):
(WebKit::PDFPluginBase::streamedBytes const): Deleted.
* Source/WebKit/WebProcess/Plugins/PDF/PDFPluginBase.mm:
(WebKit::PDFPluginBase::teardown):
(WebKit::PDFPluginBase::originalData const):
(WebKit::PDFPluginBase::streamedBytes const):
(WebKit::PDFPluginBase::haveStreamedDataForRange const):
(WebKit::PDFPluginBase::copyDataAtPosition const):
(WebKit::PDFPluginBase::dataPtrForRange const):
(WebKit::PDFPluginBase::insertRangeRequestData):
(WebKit::PDFPluginBase::streamDidReceiveData):
(WebKit::PDFPluginBase::streamDidFail):
(WebKit::PDFPluginBase::addArchiveResource):
(WebKit::PDFPluginBase::incrementalLoaderLog):
(WebKit::PDFPluginBase::haveDataForRange const): Deleted.

Canonical link: https://commits.webkit.org/276171@main
  • Loading branch information
smfr committed Mar 15, 2024
1 parent 8acea81 commit 51537ed
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 78 deletions.
5 changes: 3 additions & 2 deletions Source/WebKit/WebProcess/Plugins/PDF/PDFIncrementalLoader.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ enum class ByteRangeRequestIdentifierType { };
using ByteRangeRequestIdentifier = ObjectIdentifier<ByteRangeRequestIdentifierType>;
using DataRequestCompletionHandler = Function<void(const uint8_t*, size_t count)>;

enum class CheckValidRanges : bool;

class PDFIncrementalLoader : public ThreadSafeRefCountedAndCanMakeThreadSafeWeakPtr<PDFIncrementalLoader> {
WTF_MAKE_FAST_ALLOCATED;
WTF_MAKE_NONCOPYABLE(PDFIncrementalLoader);
Expand Down Expand Up @@ -86,10 +88,9 @@ class PDFIncrementalLoader : public ThreadSafeRefCountedAndCanMakeThreadSafeWeak

bool documentFinishedLoading() const;

void ensureDataBufferLength(uint64_t);
void appendAccumulatedDataToDataBuffer(ByteRangeRequest&);

const uint8_t* dataPtrForRange(uint64_t position, size_t count) const;
const uint8_t* dataPtrForRange(uint64_t position, size_t count, CheckValidRanges) const;
uint64_t availableDataSize() const;

void getResourceBytesAtPosition(size_t count, off_t position, DataRequestCompletionHandler&&);
Expand Down
54 changes: 6 additions & 48 deletions Source/WebKit/WebProcess/Plugins/PDF/PDFIncrementalLoader.mm
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@

auto availableRequestBytes = std::min<uint64_t>(m_count, availableBytes - m_position);

const uint8_t* dataPtr = loader.dataPtrForRange(m_position, availableRequestBytes);
const uint8_t* dataPtr = loader.dataPtrForRange(m_position, availableRequestBytes, CheckValidRanges::No);
if (!dataPtr)
return;

Expand Down Expand Up @@ -267,7 +267,6 @@

HashMap<ByteRangeRequestIdentifier, ByteRangeRequest> outstandingByteRangeRequests;
HashMap<RefPtr<WebCore::NetscapePlugInStreamLoader>, ByteRangeRequestIdentifier> streamLoaderMap;
RangeSet<WTF::Range<uint64_t>> completedRanges;
};

#pragma mark -
Expand Down Expand Up @@ -329,17 +328,6 @@
return plugin->documentFinishedLoading();
}

void PDFIncrementalLoader::ensureDataBufferLength(uint64_t targetLength)
{
ASSERT(isMainRunLoop());

RefPtr plugin = m_plugin.get();
if (!plugin)
return;

plugin->ensureDataBufferLength(targetLength);
}

void PDFIncrementalLoader::appendAccumulatedDataToDataBuffer(ByteRangeRequest& request)
{
ASSERT(isMainRunLoop());
Expand All @@ -360,16 +348,13 @@
return plugin->streamedBytes();
}

const uint8_t* PDFIncrementalLoader::dataPtrForRange(uint64_t position, size_t count) const
const uint8_t* PDFIncrementalLoader::dataPtrForRange(uint64_t position, size_t count, CheckValidRanges checkValidRanges) const
{
RefPtr plugin = m_plugin.get();
if (!plugin)
return nullptr;

if (!plugin->haveDataForRange(position, count))
return nullptr;

return plugin->dataPtrForRange(position, count);
return plugin->dataPtrForRange(position, count, checkValidRanges);
}

void PDFIncrementalLoader::incrementalPDFStreamDidFinishLoading()
Expand Down Expand Up @@ -397,10 +382,6 @@
if (!plugin)
return;

// Keep our ranges-lookup-table compact by continuously updating its first range
// as the entire document streams in from the network.
m_requestData->completedRanges.add({ 0, plugin->streamedBytes() - 1 });

HashSet<ByteRangeRequestIdentifier> handledRequests;
for (auto& request : m_requestData->outstandingByteRangeRequests.values()) {
if (request.completeIfPossible(*this))
Expand Down Expand Up @@ -549,27 +530,10 @@
if (!plugin)
return false;

auto haveReceivedDataForRange = [&](const ByteRangeRequest& request) {
if (!plugin->haveDataForRange(request.position(), request.count()))
return false;

if (plugin->haveStreamedDataForRange(request.position(), request.count()))
return true;

if (m_requestData->completedRanges.contains({ request.position(), request.position() + request.count() - 1 })) {
#if !LOG_DISABLED
incrementalLoaderLog(makeString("Completing request %llu with a previously completed range", request.identifier()));
#endif
return true;
}

return false;
};

if (!haveReceivedDataForRange(request))
const uint8_t* dataPtr = plugin->dataPtrForRange(request.position(), request.count(), CheckValidRanges::Yes);
if (!dataPtr)
return false;

const uint8_t* dataPtr = plugin->dataPtrForRange(request.position(), request.count());
request.completeWithBytes(dataPtr, request.count(), *this);
return true;
}
Expand All @@ -595,13 +559,7 @@
incrementalLoaderLog(makeString("Completing range request ", request.identifier(), " (", request.count(), " bytes at ", request.position(), ") with ", request.accumulatedData().size(), " bytes from the network"));
#endif

// Fold this data into the main data buffer so that if something in its range is requested again (which happens quite often)
// we do not need to hit the network layer again.
ensureDataBufferLength(request.position() + request.accumulatedData().size());
if (request.accumulatedData().size()) {
appendAccumulatedDataToDataBuffer(request);
m_requestData->completedRanges.add({ request.position(), request.position() + request.accumulatedData().size() - 1 });
}
appendAccumulatedDataToDataBuffer(request);

if (auto* streamLoader = request.streamLoader())
forgetStreamLoader(*streamLoader);
Expand Down
21 changes: 14 additions & 7 deletions Source/WebKit/WebProcess/Plugins/PDF/PDFPluginBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
#include <WebCore/ScrollableArea.h>
#include <WebCore/TextIndicator.h>
#include <wtf/Identified.h>
#include <wtf/Lock.h>
#include <wtf/Range.h>
#include <wtf/RangeSet.h>
#include <wtf/RetainPtr.h>
#include <wtf/ThreadSafeRefCounted.h>
#include <wtf/TypeTraits.h>
Expand Down Expand Up @@ -81,6 +84,8 @@ struct WebHitTestResultData;
enum class ByteRangeRequestIdentifierType;
using ByteRangeRequestIdentifier = ObjectIdentifier<ByteRangeRequestIdentifierType>;

enum class CheckValidRanges : bool { No, Yes };

class PDFPluginBase : public ThreadSafeRefCountedAndCanMakeThreadSafeWeakPtr<PDFPluginBase>, public WebCore::ScrollableArea, public PDFScriptEvaluator::Client, public Identified<PDFPluginIdentifier> {
WTF_MAKE_FAST_ALLOCATED;
WTF_MAKE_NONCOPYABLE(PDFPluginBase);
Expand Down Expand Up @@ -244,21 +249,21 @@ class PDFPluginBase : public ThreadSafeRefCountedAndCanMakeThreadSafeWeakPtr<PDF
void writeItemsToPasteboard(NSString *pasteboardName, NSArray *items, NSArray *types) const;
#endif

uint64_t streamedBytes() const;

private:
bool documentFinishedLoading() const { return m_documentFinishedLoading; }
uint64_t streamedBytes() const { return m_streamedBytes; }
void ensureDataBufferLength(uint64_t);
void ensureDataBufferLength(uint64_t) WTF_REQUIRES_LOCK(m_streamedDataLock);

bool haveStreamedDataForRange(uint64_t offset, size_t count) const;
bool haveStreamedDataForRange(uint64_t offset, size_t count) const WTF_REQUIRES_LOCK(m_streamedDataLock);
// This just checks whether the CFData is large enough; it doesn't know if we filled this range with data.
bool haveDataForRange(uint64_t offset, size_t count) const;

void insertRangeRequestData(uint64_t offset, const Vector<uint8_t>&);

// Returns the number of bytes copied.
size_t copyDataAtPosition(void* buffer, uint64_t sourcePosition, size_t count) const;
// FIXME: It would be nice to avoid having both the "copy into a buffer" and "return a pointer" ways of getting data.
const uint8_t* dataPtrForRange(uint64_t sourcePosition, size_t count) const;
const uint8_t* dataPtrForRange(uint64_t sourcePosition, size_t count, CheckValidRanges) const;

protected:
explicit PDFPluginBase(WebCore::HTMLPlugInElement&);
Expand Down Expand Up @@ -340,8 +345,10 @@ class PDFPluginBase : public ThreadSafeRefCountedAndCanMakeThreadSafeWeakPtr<PDF
// m_data grows as we receive data in the primary request (PDFPluginBase::streamDidReceiveData())
// but also as byte range requests are received via m_incrementalLoader, so it may have "holes"
// before the main resource is fully loaded.
RetainPtr<CFMutableDataRef> m_data;
uint64_t m_streamedBytes { 0 };
mutable Lock m_streamedDataLock;
RetainPtr<CFMutableDataRef> m_data WTF_GUARDED_BY_LOCK(m_streamedDataLock);
uint64_t m_streamedBytes WTF_GUARDED_BY_LOCK(m_streamedDataLock) { 0 };
RangeSet<WTF::Range<uint64_t>> m_validRanges WTF_GUARDED_BY_LOCK(m_streamedDataLock);

RetainPtr<PDFDocument> m_pdfDocument;
#if PLATFORM(MAC)
Expand Down
88 changes: 67 additions & 21 deletions Source/WebKit/WebProcess/Plugins/PDF/PDFPluginBase.mm
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,10 @@

void PDFPluginBase::teardown()
{
m_data = nil;
{
Locker locker { m_streamedDataLock };
m_data = nil;
}

#if HAVE(INCREMENTAL_PDF_APIS)
if (m_incrementalLoader) {
Expand Down Expand Up @@ -200,6 +203,7 @@

NSData *PDFPluginBase::originalData() const
{
Locker locker { m_streamedDataLock };
return (__bridge NSData *)m_data.get();
}

Expand All @@ -214,21 +218,18 @@
CFDataIncreaseLength(m_data.get(), targetLength - currentLength);
}

bool PDFPluginBase::haveStreamedDataForRange(uint64_t offset, size_t count) const
uint64_t PDFPluginBase::streamedBytes() const
{
if (!m_data)
return false;

return streamedBytes() >= offset + count;
Locker locker { m_streamedDataLock };
return m_streamedBytes;
}

bool PDFPluginBase::haveDataForRange(uint64_t offset, size_t count) const
bool PDFPluginBase::haveStreamedDataForRange(uint64_t offset, size_t count) const
{
if (!m_data)
return false;

uint64_t dataLength = CFDataGetLength(m_data.get());
return offset + count <= dataLength;
return m_streamedBytes >= offset + count;
}

size_t PDFPluginBase::copyDataAtPosition(void* buffer, uint64_t sourcePosition, size_t count) const
Expand All @@ -240,29 +241,55 @@
LOG_WITH_STREAM(IncrementalPDF, stream << "PDFIncrementalLoader::copyDataAtPosition " << sourcePosition << " on main thread - document has not finished loading");
}

Locker locker { m_streamedDataLock };

if (!haveStreamedDataForRange(sourcePosition, count))
return 0;

memcpy(buffer, CFDataGetBytePtr(m_data.get()) + sourcePosition, count);
return count;
}

const uint8_t* PDFPluginBase::dataPtrForRange(uint64_t sourcePosition, size_t count) const
const uint8_t* PDFPluginBase::dataPtrForRange(uint64_t sourcePosition, size_t count, CheckValidRanges checkValidRanges) const
{
ASSERT(isMainRunLoop());
Locker locker { m_streamedDataLock };

auto haveValidData = [&](CheckValidRanges checkValidRanges) WTF_REQUIRES_LOCK(m_streamedDataLock) {
if (!m_data)
return false;

if (haveStreamedDataForRange(sourcePosition, count))
return true;

uint64_t dataLength = CFDataGetLength(m_data.get());
if (sourcePosition + count > dataLength)
return false;

if (checkValidRanges == CheckValidRanges::No)
return true;

return m_validRanges.contains({ sourcePosition, sourcePosition + count - 1 });
};

if (!haveDataForRange(sourcePosition, count))
if (!haveValidData(checkValidRanges))
return nullptr;

return CFDataGetBytePtr(m_data.get()) + sourcePosition;
}

void PDFPluginBase::insertRangeRequestData(uint64_t offset, const Vector<uint8_t>& requestData)
{
if (!requestData.size())
return;

Locker locker { m_streamedDataLock };

auto requiredLength = offset + requestData.size();
ensureDataBufferLength(requiredLength);

memcpy(CFDataGetMutableBytePtr(m_data.get()) + offset, requestData.data(), requestData.size());

m_validRanges.add({ offset, offset + requestData.size() - 1 });
}

void PDFPluginBase::streamDidReceiveResponse(const ResourceResponse& response)
Expand All @@ -276,14 +303,28 @@

void PDFPluginBase::streamDidReceiveData(const SharedBuffer& buffer)
{
if (!m_data)
m_data = adoptCF(CFDataCreateMutable(0, 0));
#if !LOG_DISABLED
uint64_t streamedBytes;
#endif
{
Locker locker { m_streamedDataLock };

ensureDataBufferLength(m_streamedBytes + buffer.size());
memcpy(CFDataGetMutableBytePtr(m_data.get()) + m_streamedBytes, buffer.data(), buffer.size());
m_streamedBytes += buffer.size();
if (!m_data)
m_data = adoptCF(CFDataCreateMutable(0, 0));

LOG_WITH_STREAM(IncrementalPDF, stream << "PDFPluginBase::streamDidReceiveData() - received " << buffer.size() << " bytes, total streamed bytes " << m_streamedBytes);
ensureDataBufferLength(m_streamedBytes + buffer.size());
memcpy(CFDataGetMutableBytePtr(m_data.get()) + m_streamedBytes, buffer.data(), buffer.size());
m_streamedBytes += buffer.size();

// Keep our ranges-lookup-table compact by continuously updating its first range
// as the entire document streams in from the network.
m_validRanges.add({ 0, m_streamedBytes - 1 });
#if !LOG_DISABLED
streamedBytes = m_streamedBytes;
#endif
}

LOG_WITH_STREAM(IncrementalPDF, stream << "PDFPluginBase::streamDidReceiveData() - received " << buffer.size() << " bytes, total streamed bytes " << streamedBytes);

#if HAVE(INCREMENTAL_PDF_APIS)
if (m_incrementalLoader)
Expand Down Expand Up @@ -323,7 +364,10 @@

void PDFPluginBase::streamDidFail()
{
m_data = nullptr;
{
Locker locker { m_streamedDataLock };
m_data = nil;
}
#if HAVE(INCREMENTAL_PDF_APIS)
if (m_incrementalLoader)
m_incrementalLoader->incrementalPDFStreamDidFail();
Expand Down Expand Up @@ -440,7 +484,8 @@
auto response = adoptNS([[NSHTTPURLResponse alloc] initWithURL:m_view->mainResourceURL() statusCode:200 HTTPVersion:(NSString*)kCFHTTPVersion1_1 headerFields:headers]);
ResourceResponse synthesizedResponse(response.get());

auto resource = ArchiveResource::create(SharedBuffer::create(m_data.get()), m_view->mainResourceURL(), "application/pdf"_s, String(), String(), synthesizedResponse);
RetainPtr data = originalData();
auto resource = ArchiveResource::create(SharedBuffer::create(data.get()), m_view->mainResourceURL(), "application/pdf"_s, String(), String(), synthesizedResponse);
m_view->frame()->document()->loader()->addArchiveResource(resource.releaseNonNull());
}

Expand Down Expand Up @@ -1081,8 +1126,9 @@ static void verboseLog(PDFIncrementalLoader* incrementalLoader, uint64_t streame
return;
}

auto streamedBytes = this->streamedBytes();
LOG_WITH_STREAM(IncrementalPDF, stream << message);
verboseLog(m_incrementalLoader.get(), m_streamedBytes, m_documentFinishedLoading);
verboseLog(m_incrementalLoader.get(), streamedBytes, m_documentFinishedLoading);
LOG_WITH_STREAM(IncrementalPDFVerbose, stream << message);
#else
UNUSED_PARAM(message);
Expand Down

0 comments on commit 51537ed

Please sign in to comment.