Skip to content

Commit

Permalink
Ensure file name is valid when saving web page resources to disk
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=262642
rdar://116419303

Reviewed by Ryosuke Niwa.

We currently use the last component of URL as file name, and that could lead to file system error, because a valid URL
component may not be a valid file name. To fix that, we are going to generate valid file name based on last component
by:
1. encoding escape characters of file system.
2. ensuring length is smaller than 255 bytes.
3. appending sequence number when it's taken by other subresource file.
4. falling back to default file name when last component is empty.

Test: WebArchive.SaveResourcesValidFileName

* Source/WebCore/loader/archive/cf/LegacyWebArchive.cpp:
(WebCore::generateValidFileName):
(WebCore::LegacyWebArchive::create):
* Tools/TestWebKitAPI/Tests/WebKitCocoa/CreateWebArchive.mm:

Canonical link: https://commits.webkit.org/268970@main
  • Loading branch information
szewai committed Oct 6, 2023
1 parent d7c9c0b commit 22dc25b
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 1 deletion.
24 changes: 23 additions & 1 deletion Source/WebCore/loader/archive/cf/LegacyWebArchive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@

namespace WebCore {

static constexpr unsigned maxFileNameSizeInBytes = 255;
static constexpr char defaultFileName[] = "file";
static const CFStringRef LegacyWebArchiveMainResourceKey = CFSTR("WebMainResource");
static const CFStringRef LegacyWebArchiveSubresourcesKey = CFSTR("WebSubresources");
static const CFStringRef LegacyWebArchiveSubframeArchivesKey = CFSTR("WebSubframeArchives");
Expand All @@ -74,6 +76,24 @@ static const CFStringRef LegacyWebArchiveResourceTextEncodingNameKey = CFSTR("We
static const CFStringRef LegacyWebArchiveResourceResponseKey = CFSTR("WebResourceResponse");
static const CFStringRef LegacyWebArchiveResourceResponseVersionKey = CFSTR("WebResourceResponseVersion");

static String generateValidFileName(const URL& url, const HashSet<String>& existingFileNames)
{
auto extractedFileName = url.lastPathComponent().toString();
auto fileName = extractedFileName.isEmpty() ? String::fromLatin1(defaultFileName) : extractedFileName;
fileName = FileSystem::encodeForFileName(fileName);

unsigned count = 0;
do {
if (count)
fileName = makeString(fileName, '(', count, ')');
if (fileName.sizeInBytes() > maxFileNameSizeInBytes)
fileName = fileName.substring(fileName.sizeInBytes() - maxFileNameSizeInBytes, maxFileNameSizeInBytes);
++count;
} while (existingFileNames.contains(fileName));

return fileName;
}

RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(ArchiveResource* resource, MainResourceStatus isMainResource)
{
if (!resource) {
Expand Down Expand Up @@ -535,6 +555,7 @@ RefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, Lo
Vector<Ref<LegacyWebArchive>> subframeArchives;
Vector<Ref<ArchiveResource>> subresources;
HashMap<String, String> uniqueSubresources;
HashSet<String> uniqueFileNames;
String subresourcesDirectoryName = mainFrameFileName.isNull() ? String { } : makeString(mainFrameFileName, "_files");

for (auto& node : nodes) {
Expand Down Expand Up @@ -583,7 +604,8 @@ RefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, Lo
}

if (!subresourcesDirectoryName.isNull()) {
String subresourceFileName = subresourceURL.lastPathComponent().toString();
String subresourceFileName = generateValidFileName(subresourceURL, uniqueFileNames);
uniqueFileNames.add(subresourceFileName);
String subresourceFilePath = makeString(subresourcesDirectoryName, "/", subresourceFileName);
resource->setFileName(subresourceFilePath);
addResult.iterator->value = frame.isMainFrame() ? subresourceFilePath : subresourceFileName;
Expand Down
58 changes: 58 additions & 0 deletions Tools/TestWebKitAPI/Tests/WebKitCocoa/CreateWebArchive.mm
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,64 @@ function onFramesLoaded() {
Util::run(&saved);
}

TEST(WebArchive, SaveResourcesValidFileName)
{
RetainPtr<NSURL> directoryURL = [NSURL fileURLWithPath:[NSTemporaryDirectory() stringByAppendingPathComponent:@"SaveResourcesTest"] isDirectory:YES];
NSFileManager *fileManager = [NSFileManager defaultManager];
[fileManager removeItemAtURL:directoryURL.get() error:nil];

auto configuration = adoptNS([[WKWebViewConfiguration alloc] init]);
auto schemeHandler = adoptNS([[TestURLSchemeHandler alloc] init]);
[configuration setURLSchemeHandler:schemeHandler.get() forURLScheme:@"webarchivetest"];

NSMutableString *mutableFileName = [NSMutableString stringWithCapacity:256];
for (unsigned i = 0; i < 256; ++i)
[mutableFileName appendString:@"x"];
NSArray *tests = [NSArray arrayWithObjects:[NSString stringWithString:mutableFileName], @"a/image.png", @"b/image.png", @"image.png(1)", @"webarchivetest://host/file:image.png", @"image1/", @"image2///", @"image3.png/./", @"image4/content/../", nil];
NSMutableString *mutableHTMLString = [NSMutableString string];
NSString *scriptString = [NSString stringWithFormat:@"<script>count = 0; function onImageLoad() { if (++count == %d) window.webkit.messageHandlers.testHandler.postMessage('done'); }</script>", (int)tests.count];
[mutableHTMLString appendString:scriptString];
for (NSString *item in tests)
[mutableHTMLString appendString:[NSString stringWithFormat:@"<img src='%@' onload='onImageLoad()'>", item]];
NSData *htmlData = [[NSString stringWithString:mutableHTMLString] dataUsingEncoding:NSUTF8StringEncoding];
NSData *imageData = [NSData dataWithContentsOfURL:[[NSBundle mainBundle] URLForResource:@"400x400-green" withExtension:@"png" subdirectory:@"TestWebKitAPI.resources"]];

[schemeHandler setStartURLSchemeTaskHandler:^(WKWebView *, id<WKURLSchemeTask> task) {
NSData *data = nil;
NSString *mimeType = nil;
if ([task.request.URL.absoluteString isEqualToString:@"webarchivetest://host/main.html"]) {
mimeType = @"text/html";
data = htmlData;
} else {
mimeType = @"image/png";
data = imageData;
}
auto response = adoptNS([[NSURLResponse alloc] initWithURL:task.request.URL MIMEType:mimeType expectedContentLength:data.length textEncodingName:nil]);
[task didReceiveResponse:response.get()];
[task didReceiveData:data];
[task didFinish];
}];

auto webView = adoptNS([[TestWKWebView alloc] initWithFrame:NSMakeRect(0, 0, 800, 600) configuration:configuration.get()]);
static bool messageReceived = false;
[webView performAfterReceivingMessage:@"done" action:[&] {
messageReceived = true;
}];
[webView loadRequest:[NSURLRequest requestWithURL:[NSURL URLWithString:@"webarchivetest://host/main.html"]]];
Util::run(&messageReceived);

NSSet *expectedFileNames = [NSSet setWithArray:[NSArray arrayWithObjects:[mutableFileName substringFromIndex:1], @"image.png", @"image.png(1)", @"image.png(1)(1)", @"file%3Aimage.png", @"image1", @"file", @"image3.png", @"image4", nil]];
static bool saved = false;
[webView _saveResources:directoryURL.get() suggestedFileName:@"host" completionHandler:^(NSError *error) {
EXPECT_NULL(error);
NSArray *resourceFileNames = [fileManager contentsOfDirectoryAtPath:[directoryURL URLByAppendingPathComponent:@"host_files"].path error:nil];
NSSet *savedFileNames = [NSSet setWithArray:resourceFileNames];
EXPECT_TRUE([savedFileNames isEqualToSet:expectedFileNames]);
saved = true;
}];
Util::run(&saved);
}

} // namespace TestWebKitAPI

#endif // PLATFORM(MAC) || PLATFORM(IOS_FAMILY)

0 comments on commit 22dc25b

Please sign in to comment.