Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.dotmarketing.exception.DoesNotExistException;
import com.dotmarketing.portlets.workflows.business.WorkflowAPI;
import com.dotmarketing.util.Config;
import com.dotmarketing.util.FileUtil;
import com.dotmarketing.util.Logger;
import com.dotmarketing.util.PageMode;
import com.dotmarketing.util.UtilMethods;
Expand Down Expand Up @@ -57,6 +58,7 @@
import java.io.OutputStream;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -222,8 +224,17 @@ private void saveMultipleBinary(final FormDataMultiPart body, final HttpServletR
}

private static @NotNull String sanitizeFileName(ContentDisposition meta) {
final String sanitize = meta.getFileName().replaceAll("[^\\x00-\\x7F]", StringPool.BLANK);
return sanitize;
// Jersey decodes multipart Content-Disposition filenames as ISO-8859-1.
// Re-interpret those bytes as UTF-8 to recover the original filename,
// then normalize to NFC for consistent Unicode representation.
// ASSUMPTION: modern browsers (HTML5 / RFC 6266) send UTF-8 bytes in
// Content-Disposition filenames. This round-trip silently drops high bytes
// from genuine ISO-8859-1 filenames sent by legacy or non-browser clients.
final String raw = meta.getFileName();
final String utf8Name = new String(raw.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8)
.replace("\uFFFD", "");
final String nfcName = Normalizer.normalize(utf8Name, Normalizer.Form.NFC);
return FileUtil.sanitizeFileName(nfcName);
}

private void printResponseEntityViewResult(final OutputStream outputStream,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import java.io.RandomAccessFile;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -172,6 +173,27 @@ public void test_temp_resource_upload() throws IOException {
assertTrue(dotTempFileOpt.get().length() > 0);
}

@Test
public void test_temp_resource_upload_preserves_unicode_filename() throws IOException {
resetTempResourceConfig();
Config.setProperty(TempFileAPI.TEMP_RESOURCE_ALLOW_ANONYMOUS, true);

// Jersey decodes multipart Content-Disposition filenames as ISO-8859-1.
// Simulate what a macOS browser sends: NFD UTF-8 bytes re-interpreted as ISO-8859-1.
// The expected result is NFC (canonical composition); input is deliberately NFD so that
// the Normalizer.normalize(…, NFC) step in sanitizeFileName is exercised.
final String expectedFileName = "Test_document_``$$#ääöüÄÖÜ.txt";
final String nfdFileName = Normalizer.normalize(expectedFileName, Normalizer.Form.NFD);
final String jerseyEncodedName = new String(
nfdFileName.getBytes(StandardCharsets.UTF_8), StandardCharsets.ISO_8859_1);

final HttpServletRequest request = mockRequest();
final DotTempFile dotTempFile = saveTempFile_usingTempResource(jerseyEncodedName, request);

assertEquals("Unicode characters must be preserved in the uploaded filename",
expectedFileName, dotTempFile.file.getName());
}

@Test
public void test_temp_resource_multifile_upload() throws IOException {
resetTempResourceConfig();
Expand Down
Loading