Skip to content

Commit

Permalink
Save iframe resources to disk
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=262567
rdar://116420990

Reviewed by Ryosuke Niwa.

This patch adds support for saving iframe resource to disk. To achive that:
1. each iframe main resource is saved as a single file, where file name is decided based on frame ID.
2. iframe main resources and subresources are stored in [main_resources_file_name]_files directory as other subresources
of main frame.
3. subresource urls in iframe main resources are changed to relative file paths.
4. iframe urls in main frame resource are changed to relative file paths.

Test: WebArchive.SaveResourcesSubframe

* Source/WebCore/editing/MarkupAccumulator.cpp:
(WebCore::MarkupAccumulator::appendStartTag):
(WebCore::MarkupAccumulator::frameForAttributeReplacement):
(WebCore::MarkupAccumulator::replaceAttributeIfNecessary): replace srcdoc attribute with src attribute whose value is
relative file path.
(WebCore::MarkupAccumulator::appendURLAttributeIfNecessary): if src or srcdoc attribute does not exist, create a new
src attribute with url set to relative file path.
* Source/WebCore/editing/MarkupAccumulator.h:
* Source/WebCore/loader/archive/cf/LegacyWebArchive.cpp:
(WebCore::LegacyWebArchive::create):
* Source/WebCore/loader/archive/cf/LegacyWebArchive.h:
* Tools/TestWebKitAPI/Tests/WebKitCocoa/CreateWebArchive.mm:

Canonical link: https://commits.webkit.org/268858@main
  • Loading branch information
szewai committed Oct 4, 2023
1 parent 1e25aac commit 53dd791
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 12 deletions.
55 changes: 53 additions & 2 deletions Source/WebCore/editing/MarkupAccumulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,13 @@
#include "Comment.h"
#include "CommonAtomStrings.h"
#include "DocumentFragment.h"
#include "DocumentLoader.h"
#include "DocumentType.h"
#include "Editor.h"
#include "ElementInlines.h"
#include "FrameLoader.h"
#include "HTMLElement.h"
#include "HTMLIFrameElement.h"
#include "HTMLNames.h"
#include "HTMLTemplateElement.h"
#include "NodeName.h"
Expand Down Expand Up @@ -447,11 +450,19 @@ void MarkupAccumulator::appendStartTag(StringBuilder& result, const Element& ele
{
appendOpenTag(result, element, namespaces);

bool hasURLAttribute = false;
if (element.hasAttributes()) {
for (const Attribute& attribute : element.attributesIterator())
appendAttribute(result, element, attribute, namespaces);
for (const Attribute& attribute : element.attributesIterator()) {
if (!hasURLAttribute && (element.isURLAttribute(attribute) || element.isHTMLContentAttribute(attribute)))
hasURLAttribute = true;
auto updatedAttribute = replaceAttributeIfNecessary(element, attribute);
appendAttribute(result, element, updatedAttribute, namespaces);
}
}

if (!hasURLAttribute)
appendURLAttributeIfNecessary(result, element, namespaces);

// Give an opportunity to subclasses to add their own attributes.
appendCustomAttributes(result, element, namespaces);

Expand Down Expand Up @@ -541,6 +552,46 @@ QualifiedName MarkupAccumulator::xmlAttributeSerialization(const Attribute& attr
return prefixedName;
}

LocalFrame* MarkupAccumulator::frameForAttributeReplacement(const Element& element)
{
if (inXMLFragmentSerialization() || m_replacementURLStrings.isEmpty())
return nullptr;

auto* currentElement = const_cast<Element*>(&element);
if (!is<HTMLIFrameElement>(currentElement))
return nullptr;

auto& iframeElement = downcast<HTMLIFrameElement>(*currentElement);
return dynamicDowncast<LocalFrame>(iframeElement.contentFrame());
}

Attribute MarkupAccumulator::replaceAttributeIfNecessary(const Element& element, const Attribute& attribute)
{
if (!element.isHTMLContentAttribute(attribute))
return attribute;

auto frame = frameForAttributeReplacement(element);
if (!frame || !frame->loader().documentLoader()->response().url().isAboutSrcDoc())
return attribute;

auto replacementURLString = m_replacementURLStrings.get(frame->frameID().toString());
if (replacementURLString.isNull())
return attribute;

return { srcAttr, AtomString { replacementURLString } };
}

void MarkupAccumulator::appendURLAttributeIfNecessary(StringBuilder& result, const Element& element, Namespaces* namespaces)
{
auto frame = frameForAttributeReplacement(element);
if (!frame)
return;

auto replacementURLString = m_replacementURLStrings.get(frame->frameID().toString());
if (!replacementURLString.isNull())
appendAttribute(result, element, Attribute { srcAttr, AtomString { replacementURLString } }, namespaces);
}

void MarkupAccumulator::appendAttribute(StringBuilder& result, const Element& element, const Attribute& attribute, Namespaces* namespaces)
{
bool isSerializingHTML = !inXMLFragmentSerialization();
Expand Down
4 changes: 4 additions & 0 deletions Source/WebCore/editing/MarkupAccumulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ namespace WebCore {
class Attribute;
class DocumentType;
class Element;
class LocalFrame;
class Node;
class Range;

Expand Down Expand Up @@ -106,6 +107,9 @@ class MarkupAccumulator {
bool inXMLFragmentSerialization() const { return m_serializationSyntax == SerializationSyntax::XML; }
void generateUniquePrefix(QualifiedName&, const Namespaces&);
QualifiedName xmlAttributeSerialization(const Attribute&, Namespaces*);
LocalFrame* frameForAttributeReplacement(const Element&);
Attribute replaceAttributeIfNecessary(const Element&, const Attribute&);
void appendURLAttributeIfNecessary(StringBuilder&, const Element&, Namespaces*);

StringBuilder m_markup;
const ResolveURLs m_resolveURLs;
Expand Down
28 changes: 19 additions & 9 deletions Source/WebCore/loader/archive/cf/LegacyWebArchive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ static void addSubresourcesForAttachmentElementsIfNecessary(LocalFrame& frame, c

#endif

RefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, LocalFrame& frame, Vector<Ref<Node>>&& nodes, Function<bool(LocalFrame&)>&& frameFilter, const String& mainResourceFileName)
RefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, LocalFrame& frame, Vector<Ref<Node>>&& nodes, Function<bool(LocalFrame&)>&& frameFilter, const String& mainFrameFileName)
{
auto& response = frame.loader().documentLoader()->response();
URL responseURL = response.url();
Expand All @@ -535,17 +535,25 @@ RefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, Lo
Vector<Ref<LegacyWebArchive>> subframeArchives;
Vector<Ref<ArchiveResource>> subresources;
HashMap<String, String> uniqueSubresources;
String subresourcesDirectoryName = mainResourceFileName.isNull() ? String { } : makeString(mainResourceFileName, "_files");
String subresourcesDirectoryName = mainFrameFileName.isNull() ? String { } : makeString(mainFrameFileName, "_files");

for (auto& node : nodes) {
RefPtr<LocalFrame> childFrame;
if ((is<HTMLFrameElementBase>(node) || is<HTMLObjectElement>(node))
&& (childFrame = dynamicDowncast<LocalFrame>(downcast<HTMLFrameOwnerElement>(node.get()).contentFrame()))) {
if (frameFilter && !frameFilter(*childFrame))
continue;
if (auto subframeArchive = create(*childFrame->document(), WTFMove(frameFilter)))
if (auto subframeArchive = create(*childFrame->document(), WTFMove(frameFilter), mainFrameFileName)) {
auto subframeMainResource = subframeArchive->mainResource();
auto subframeMainResourceURL = subframeMainResource ? subframeMainResource->url() : URL { };
if (!subframeMainResourceURL.isNull()) {
if (subframeMainResourceURL.isAboutSrcDoc() || subframeMainResourceURL.isAboutBlank())
uniqueSubresources.add(childFrame->frameID().toString(), subframeMainResource->fileName());
else
uniqueSubresources.add(subframeMainResourceURL.string(), subframeMainResource->fileName());
}
subframeArchives.append(subframeArchive.releaseNonNull());
else
} else
LOG_ERROR("Unabled to archive subframe %s", childFrame->tree().uniqueName().string().utf8().data());

} else {
Expand Down Expand Up @@ -575,8 +583,10 @@ RefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, Lo
}

if (!subresourcesDirectoryName.isNull()) {
addResult.iterator->value = makeString(subresourcesDirectoryName, "/", subresourceURL.lastPathComponent());
resource->setFileName(addResult.iterator->value);
String subresourceFileName = subresourceURL.lastPathComponent().toString();
String subresourceFilePath = makeString(subresourcesDirectoryName, "/", subresourceFileName);
resource->setFileName(subresourceFilePath);
addResult.iterator->value = frame.isMainFrame() ? subresourceFilePath : subresourceFileName;
}

subresources.append(resource.releaseNonNull());
Expand All @@ -598,14 +608,14 @@ RefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, Lo
}
}

if (!mainResourceFileName.isNull()) {
if (!mainFrameFileName.isNull()) {
auto* document = frame.document();
if (!document)
return nullptr;
auto fileNameWithExtension = mainResourceFileName;
auto fileNameWithExtension = frame.isMainFrame() ? mainFrameFileName : makeString(subresourcesDirectoryName, "/frame_", frame.frameID().toString());
auto extension = MIMETypeRegistry::preferredExtensionForMIMEType(mainResource->mimeType());
if (!fileNameWithExtension.endsWith(extension))
fileNameWithExtension = makeString(mainResourceFileName, ".", extension);
fileNameWithExtension = makeString(fileNameWithExtension, ".", extension);
uniqueSubresources.add(responseURL.string(), fileNameWithExtension);
String updatedMarkupString = serializeFragment(*document, SerializedNodes::SubtreeIncludingNode, nullptr, ResolveURLs::No, nullptr, std::nullopt, WTFMove(uniqueSubresources));
mainResource = ArchiveResource::create(utf8Buffer(updatedMarkupString), responseURL, response.mimeType(), "UTF-8"_s, frame.tree().uniqueName(), ResourceResponse(), fileNameWithExtension);
Expand Down
2 changes: 1 addition & 1 deletion Source/WebCore/loader/archive/cf/LegacyWebArchive.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class LegacyWebArchive final : public Archive {
WEBCORE_EXPORT static RefPtr<LegacyWebArchive> create(FragmentedSharedBuffer&);
WEBCORE_EXPORT static RefPtr<LegacyWebArchive> create(const URL&, FragmentedSharedBuffer&);
WEBCORE_EXPORT static Ref<LegacyWebArchive> create(Ref<ArchiveResource>&& mainResource, Vector<Ref<ArchiveResource>>&& subresources, Vector<Ref<LegacyWebArchive>>&& subframeArchives);
WEBCORE_EXPORT static RefPtr<LegacyWebArchive> create(Node&, Function<bool(LocalFrame&)>&& frameFilter = { }, const String& mainResourceFileName = { });
WEBCORE_EXPORT static RefPtr<LegacyWebArchive> create(Node&, Function<bool(LocalFrame&)>&& frameFilter = { }, const String& mainFrameFileName = { });
WEBCORE_EXPORT static RefPtr<LegacyWebArchive> create(LocalFrame&);
WEBCORE_EXPORT static RefPtr<LegacyWebArchive> createFromSelection(LocalFrame*);
WEBCORE_EXPORT static RefPtr<LegacyWebArchive> create(const SimpleRange&);
Expand Down
100 changes: 100 additions & 0 deletions Tools/TestWebKitAPI/Tests/WebKitCocoa/CreateWebArchive.mm
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,106 @@
Util::run(&saved);
}

static const char* htmlDataBytes = R"TESTRESOURCE(
<script>
count = 0;
function onFrameLoaded() {
if (++count == 3) {
frame = document.getElementById("iframe1_id");
if (frame && !frame.contentDocument.body.innerHTML)
frame.contentDocument.body.innerHTML = "<p>iframe1</p><img src='image.png'>";
window.webkit.messageHandlers.testHandler.postMessage("done");
}
}
</script>
<iframe onload="onFrameLoaded();" id="iframe1_id"></iframe>
<iframe onload="onFrameLoaded();" src="iframe.html"></iframe>
<iframe onload="onFrameLoaded();" srcdoc="<p>iframe3</p><img src='image.png'>"></iframe>
)TESTRESOURCE";
static const char* iframeHTMLDataBytes = R"TESTRESOURCE(<p>iframe2</p><img src='image.png'>)TESTRESOURCE";

TEST(WebArchive, SaveResourcesSubframe)
{
RetainPtr<NSURL> directoryURL = [NSURL fileURLWithPath:[NSTemporaryDirectory() stringByAppendingPathComponent:@"SaveResourcesTest"] isDirectory:YES];
NSFileManager *fileManager = [NSFileManager defaultManager];
[fileManager removeItemAtURL:directoryURL.get() error:nil];

auto configuration = adoptNS([[WKWebViewConfiguration alloc] init]);
auto schemeHandler = adoptNS([[TestURLSchemeHandler alloc] init]);
[configuration setURLSchemeHandler:schemeHandler.get() forURLScheme:@"webarchivetest"];
NSData *htmlData = [NSData dataWithBytes:htmlDataBytes length:strlen(htmlDataBytes)];
NSData *iframeHTMLData = [NSData dataWithBytes:iframeHTMLDataBytes length:strlen(iframeHTMLDataBytes)];
NSData *imageData = [NSData dataWithContentsOfURL:[[NSBundle mainBundle] URLForResource:@"400x400-green" withExtension:@"png" subdirectory:@"TestWebKitAPI.resources"]];
[schemeHandler setStartURLSchemeTaskHandler:^(WKWebView *, id<WKURLSchemeTask> task) {
NSData *data = nil;
NSString *mimeType = nil;
if ([task.request.URL.absoluteString isEqualToString:@"webarchivetest://host/main.html"]) {
mimeType = @"text/html";
data = htmlData;
} else if ([task.request.URL.absoluteString isEqualToString:@"webarchivetest://host/iframe.html"]) {
mimeType = @"text/html";
data = iframeHTMLData;
} else if ([task.request.URL.absoluteString isEqualToString:@"webarchivetest://host/image.png"]) {
mimeType = @"image/png";
data = imageData;
} else
FAIL();

auto response = adoptNS([[NSURLResponse alloc] initWithURL:task.request.URL MIMEType:mimeType expectedContentLength:data.length textEncodingName:nil]);
[task didReceiveResponse:response.get()];
[task didReceiveData:data];
[task didFinish];
}];

auto webView = adoptNS([[TestWKWebView alloc] initWithFrame:NSMakeRect(0, 0, 800, 600) configuration:configuration.get()]);
static bool messageReceived = false;
[webView performAfterReceivingMessage:@"done" action:[&] {
messageReceived = true;
}];
[webView loadRequest:[NSURLRequest requestWithURL:[NSURL URLWithString:@"webarchivetest://host/main.html"]]];
Util::run(&messageReceived);

static bool saved = false;
[webView _saveResources:directoryURL.get() suggestedFileName:@"host" completionHandler:^(NSError *error) {
EXPECT_NULL(error);
NSString *mainResourcePath = [directoryURL URLByAppendingPathComponent:@"host.html"].path;
EXPECT_TRUE([fileManager fileExistsAtPath:mainResourcePath]);
NSString *savedMainResourceString = [[NSString alloc] initWithData:[NSData dataWithContentsOfFile:mainResourcePath] encoding:NSUTF8StringEncoding];

NSString *resourceDirectoryName = @"host_files";
NSString *resourceDirectoryPath = [directoryURL URLByAppendingPathComponent:resourceDirectoryName].path;
NSArray *resourceFileNames = [fileManager contentsOfDirectoryAtPath:resourceDirectoryPath error:0];
EXPECT_EQ(4llu, resourceFileNames.count);
unsigned frameFileCount = 0;
unsigned imageFileCount = 0;
NSMutableSet *frameResourceContentsToFind = [NSMutableSet set];
[frameResourceContentsToFind addObjectsFromArray:[NSArray arrayWithObjects:@"iframe1", @"iframe2", @"iframe3", nil]];
for (NSString *fileName in resourceFileNames) {
NSString *replacementPath = [resourceDirectoryName stringByAppendingPathComponent:fileName];
if ([fileName containsString:@"frame_"]) {
NSString *resourceFilePath = [resourceDirectoryPath stringByAppendingPathComponent:fileName];
NSString* savedSubframeResourceString = [[NSString alloc] initWithData:[NSData dataWithContentsOfFile:resourceFilePath] encoding:NSUTF8StringEncoding];
[savedSubframeResourceString containsString:replacementPath];
NSRange range = [savedSubframeResourceString rangeOfString:@"iframe"];
EXPECT_NE(NSNotFound, (long)range.location);
NSString *iframeContent = [savedSubframeResourceString substringWithRange:NSMakeRange(range.location, range.length + 1)];
[frameResourceContentsToFind removeObject:iframeContent];
++frameFileCount;
}
if ([fileName isEqualToString:@"image.png"])
++imageFileCount;

// Ensure urls are replaced with file path.
[savedMainResourceString containsString:replacementPath];
}
EXPECT_EQ(3u, frameFileCount);
EXPECT_EQ(1u, imageFileCount);
EXPECT_EQ(0u, frameResourceContentsToFind.count);
saved = true;
}];
Util::run(&saved);
}

} // namespace TestWebKitAPI

#endif // PLATFORM(MAC) || PLATFORM(IOS_FAMILY)

0 comments on commit 53dd791

Please sign in to comment.