From 408ee01272c5e5b8c7c986f34c9c4446d505c1f5 Mon Sep 17 00:00:00 2001 From: Romain Grecourt Date: Wed, 16 Mar 2022 19:24:55 -0700 Subject: [PATCH] Update MimeParser to decode body part headers with UTF-8 instead of ISO_8859_1 (#3969) (#3972) Update ContentDisposition.parse to remove ASCII restriction for quoted tokens. --- .../media/multipart/ContentDisposition.java | 5 ++-- .../helidon/media/multipart/MimeParser.java | 2 +- .../multipart/ContentDispositionTest.java | 14 ++++++++++- .../media/multipart/MimeParserTest.java | 24 ++++++++++++++++--- .../multipart/MultiPartDecoderTckTest.java | 3 +-- .../multipart/MultiPartEncoderTckTest.java | 3 +-- 6 files changed, 39 insertions(+), 12 deletions(-) diff --git a/media/multipart/src/main/java/io/helidon/media/multipart/ContentDisposition.java b/media/multipart/src/main/java/io/helidon/media/multipart/ContentDisposition.java index 55d4d4003ff..65ae52138be 100644 --- a/media/multipart/src/main/java/io/helidon/media/multipart/ContentDisposition.java +++ b/media/multipart/src/main/java/io/helidon/media/multipart/ContentDisposition.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Oracle and/or its affiliates. + * Copyright (c) 2020, 2022 Oracle and/or its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,8 +57,7 @@ public final class ContentDisposition { private static final CharMatcher LINEAR_WHITE_SPACE = CharMatcher.anyOf(" \t\r\n"); - private static final CharMatcher QUOTED_TEXT_MATCHER = CharMatcher.ascii() - .and(CharMatcher.noneOf("\"\\\r")); + private static final CharMatcher QUOTED_TEXT_MATCHER = CharMatcher.noneOf("\"\\\r"); private static final String NAME_PARAMETER = "name"; private static final String FILENAME_PARAMETER = "filename"; diff --git a/media/multipart/src/main/java/io/helidon/media/multipart/MimeParser.java b/media/multipart/src/main/java/io/helidon/media/multipart/MimeParser.java index 8b2517de108..0bf3a4cc5f7 100644 --- a/media/multipart/src/main/java/io/helidon/media/multipart/MimeParser.java +++ b/media/multipart/src/main/java/io/helidon/media/multipart/MimeParser.java @@ -250,7 +250,7 @@ private ParsingException(Throwable cause) { } private static final Logger LOGGER = Logger.getLogger(MimeParser.class.getName()); - private static final Charset HEADER_ENCODING = StandardCharsets.ISO_8859_1; + private static final Charset HEADER_ENCODING = StandardCharsets.UTF_8; /** * All states. diff --git a/media/multipart/src/test/java/io/helidon/media/multipart/ContentDispositionTest.java b/media/multipart/src/test/java/io/helidon/media/multipart/ContentDispositionTest.java index 5094d21dae4..19865dff055 100644 --- a/media/multipart/src/test/java/io/helidon/media/multipart/ContentDispositionTest.java +++ b/media/multipart/src/test/java/io/helidon/media/multipart/ContentDispositionTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Oracle and/or its affiliates. + * Copyright (c) 2020, 2022 Oracle and/or its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -246,4 +246,16 @@ public void testDateQuotes() { .build(); assertThat(cd.toString(), is(equalTo(template))); } + + @Test + public void testNonAsciiFilename() { + ContentDisposition cd = ContentDisposition.parse("form-data; name=\"file[]\"; filename=\"\u60A8\u597D.txt\""); + assertThat(cd.type(), is(equalTo("form-data"))); + assertThat(cd.name().isPresent(), is(equalTo(true))); + assertThat(cd.name().get(), is(equalTo("file[]"))); + assertThat(cd.filename().isPresent(), is(equalTo(true))); + assertThat(cd.filename().get(), is(equalTo("\u60A8\u597D.txt"))); + assertThat(cd.parameters(), is(notNullValue())); + assertThat(cd.parameters().size(), is(equalTo(2))); + } } diff --git a/media/multipart/src/test/java/io/helidon/media/multipart/MimeParserTest.java b/media/multipart/src/test/java/io/helidon/media/multipart/MimeParserTest.java index 64ccb4eb7f6..7a8c6dca28f 100644 --- a/media/multipart/src/test/java/io/helidon/media/multipart/MimeParserTest.java +++ b/media/multipart/src/test/java/io/helidon/media/multipart/MimeParserTest.java @@ -638,6 +638,24 @@ public void testHeaderValueWithLeadingWhiteSpace() { assertThat(new String(part1.content), is(equalTo("part1"))); } + @Test + public void testHeaderUTF8() { + String boundary = "boundary"; + final byte[] chunk1 = ("--" + boundary + "\n" + + "Content-Disposition: form-data; name=\"file[]\"; filename=\"\u60A8\u597D.txt\"\n" + + "\n" + + "part1\n" + + "--" + boundary + "--").getBytes(); + List parts = parse(boundary, chunk1).parts; + assertThat(parts.size(), is(equalTo(1))); + MimePart part1 = parts.get(0); + assertThat(part1.headers.size(), is(equalTo(1))); + assertThat(part1.headers.get("Content-Disposition"), + hasItems("form-data; name=\"file[]\"; filename=\"\u60A8\u597D.txt\"")); + assertThat(part1.content, is(notNullValue())); + assertThat(new String(part1.content), is(equalTo("part1"))); + } + @Test public void testHeaderValueWithWhiteSpacesOnly() { String boundary = "boundary"; @@ -764,9 +782,9 @@ static final class ParserResult { final MimeParser.ParserEvent lastEvent; ParserResult(List parts, - Map> partHeaders, - byte[] partContent, - MimeParser.ParserEvent lastEvent) { + Map> partHeaders, + byte[] partContent, + MimeParser.ParserEvent lastEvent) { this.parts = parts; this.partHeaders = partHeaders; this.partContent = partContent; diff --git a/media/multipart/src/test/java/io/helidon/media/multipart/MultiPartDecoderTckTest.java b/media/multipart/src/test/java/io/helidon/media/multipart/MultiPartDecoderTckTest.java index 536f7569d6f..89d4e50ee62 100644 --- a/media/multipart/src/test/java/io/helidon/media/multipart/MultiPartDecoderTckTest.java +++ b/media/multipart/src/test/java/io/helidon/media/multipart/MultiPartDecoderTckTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022 Oracle and/or its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package io.helidon.media.multipart; diff --git a/media/multipart/src/test/java/io/helidon/media/multipart/MultiPartEncoderTckTest.java b/media/multipart/src/test/java/io/helidon/media/multipart/MultiPartEncoderTckTest.java index 40fae6a6363..2f86466c64b 100644 --- a/media/multipart/src/test/java/io/helidon/media/multipart/MultiPartEncoderTckTest.java +++ b/media/multipart/src/test/java/io/helidon/media/multipart/MultiPartEncoderTckTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022 Oracle and/or its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * */ package io.helidon.media.multipart;