From 3f3da1d347a963b5336418879b4b15ef37bffa07 Mon Sep 17 00:00:00 2001 From: Tomas Langer Date: Tue, 8 Aug 2023 00:51:18 +0200 Subject: [PATCH] Encode all characters that should be encoded in URI, including % itself. (#7314) * Encode all characters that should be encoded in URI, including % itself. * Fix test to really validate that data was not encoded over the network. --- common/uri/README.md | 12 +++--- .../io/helidon/common/uri/UriEncoding.java | 33 ++++++--------- .../io/helidon/common/uri/UriPathTest.java | 40 +++++++++++++++++++ .../integration/webclient/UriPartTest.java | 22 +++++----- 4 files changed, 71 insertions(+), 36 deletions(-) create mode 100644 common/uri/src/test/java/io/helidon/common/uri/UriPathTest.java diff --git a/common/uri/README.md b/common/uri/README.md index 34613c1d3ff..cacb1aa5eda 100644 --- a/common/uri/README.md +++ b/common/uri/README.md @@ -4,7 +4,7 @@ URI abstraction as required for routing and request handling. # URI Encoding URI has a limited set of allowed characters, with some characters having special meaning -(such as `#`, `/`, `;`), also the URI is expected to be ASCII string. +(such as `#`, `/`, `;`, `?`), also the URI is expected to be ASCII string. Special characters (and UTF-8) can be used through encoding (using the `%##` format). For example space character is encoded into `%20`. @@ -12,24 +12,26 @@ For example space character is encoded into `%20`. We support encoding of paths, with access to raw (encoded) and decoded values. Usage in components: -- Path +- Path (`UriPath`) - raw path - encoded path including path parameters - raw path no params - encoded path without path parameters - decoded path - decoded path without path parameters - decoded path parameter by name - path segments (may have path parameters associated) - ordered sequence of path parts (each segment is a section between `/`) -- Query +- Query (`UriQuery`) - raw query - full encoded query - raw parameter by name - decoded parameter by name -- Fragment +- Fragment (`UriFragment`) - raw fragment - decoded fragment Method names - if a method returns raw (encoded) value, it will always be prefixed with the word `raw`. All other -methods are returning decoded values. +methods are returning decoded values. + +There must be factory methods to create an instance from both encoded and decoded value. # URI Components ## Scheme diff --git a/common/uri/src/main/java/io/helidon/common/uri/UriEncoding.java b/common/uri/src/main/java/io/helidon/common/uri/UriEncoding.java index d8a0623c4ed..654faac2a7e 100644 --- a/common/uri/src/main/java/io/helidon/common/uri/UriEncoding.java +++ b/common/uri/src/main/java/io/helidon/common/uri/UriEncoding.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2022 Oracle and/or its affiliates. + * Copyright (c) 2000, 2023 Oracle and/or its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Objects; /** * Support for encoding and decoding of URI in HTTP. @@ -79,34 +80,24 @@ public static String encodeUri(String uriSegment) { * must be percent-encoded * @return the encoded string */ - public static String encode(final String s, final Type t) { - final boolean[] table = ENCODING_TABLES[t.ordinal()]; + public static String encode(String s, Type t) { + Objects.requireNonNull(s, "String to encode must not be null"); + Objects.requireNonNull(t, "Type of encoded component must not be null"); + boolean[] table = ENCODING_TABLES[t.ordinal()]; - StringBuilder sb = null; + StringBuilder sb = new StringBuilder(); for (int offset = 0, codePoint; offset < s.length(); offset += Character.charCount(codePoint)) { codePoint = s.codePointAt(offset); if (codePoint < 0x80 && table[codePoint]) { - if (sb != null) { - sb.append((char) codePoint); - } + sb.append((char) codePoint); } else { - if (codePoint == '%' - && offset + 2 < s.length() - && isHexCharacter(s.charAt(offset + 1)) - && isHexCharacter(s.charAt(offset + 2))) { - if (sb != null) { - sb.append('%').append(s.charAt(offset + 1)).append(s.charAt(offset + 2)); - } - offset += 2; + // we need to encode percent to %25, as otherwise we are ignoring the decoded string + if (codePoint == '%') { + sb.append("%25"); continue; } - if (sb == null) { - sb = new StringBuilder(); - sb.append(s, 0, offset); - } - if (codePoint < 0x80) { if (codePoint == ' ' && t == Type.QUERY_PARAM) { sb.append('+'); @@ -119,7 +110,7 @@ && isHexCharacter(s.charAt(offset + 2))) { } } - return (sb == null) ? s : sb.toString(); + return sb.toString(); } private static String encode(String uriSegment) { diff --git a/common/uri/src/test/java/io/helidon/common/uri/UriPathTest.java b/common/uri/src/test/java/io/helidon/common/uri/UriPathTest.java new file mode 100644 index 00000000000..d230b964f1a --- /dev/null +++ b/common/uri/src/test/java/io/helidon/common/uri/UriPathTest.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023 Oracle and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.helidon.common.uri; + +import org.junit.jupiter.api.Test; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +class UriPathTest { + @Test + void testEncodePercent() { + // the path contains string %20, not a space! + UriPath path = UriPath.createFromDecoded("/my%20path"); + assertThat(path.path(), is("/my%20path")); + assertThat(path.rawPath(), is("/my%2520path")); + } + + @Test + void testEncodeSpace() { + // the path contains string %20, not a space! + UriPath path = UriPath.createFromDecoded("/my path"); + assertThat(path.path(), is("/my path")); + assertThat(path.rawPath(), is("/my%20path")); + } +} diff --git a/tests/integration/webclient/src/test/java/io/helidon/tests/integration/webclient/UriPartTest.java b/tests/integration/webclient/src/test/java/io/helidon/tests/integration/webclient/UriPartTest.java index 5e761665124..ee92c8939ad 100644 --- a/tests/integration/webclient/src/test/java/io/helidon/tests/integration/webclient/UriPartTest.java +++ b/tests/integration/webclient/src/test/java/io/helidon/tests/integration/webclient/UriPartTest.java @@ -17,12 +17,12 @@ package io.helidon.tests.integration.webclient; import io.helidon.common.http.Http; +import io.helidon.nima.webclient.api.ClientResponseTyped; import io.helidon.nima.webclient.api.HttpClientResponse; import io.helidon.nima.webclient.api.WebClient; import io.helidon.nima.webclient.http1.Http1Client; import io.helidon.nima.webserver.WebServer; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import static org.hamcrest.CoreMatchers.is; @@ -124,7 +124,10 @@ void testQueryNotDecoded() { @Test void testQueryNotDoubleEncoded() { Http1Client webClient = createNewClient((chain, request) -> { - assertThat(request.uri().query().rawValue(), is("first%26second%26=val%26ue%26")); + // this is the value we have provided, it must be given back + assertThat(request.uri().query().value(), is("first&second%26=val&ue%26")); + // what goes over the network is driven by the skipUriEncoding parameter, and must not be encoded + assertThat(request.uri().pathWithQueryAndFragment(), is("/greet?first&second%26=val&ue%26")); return chain.proceed(request); }); String response = webClient.get() @@ -135,20 +138,19 @@ void testQueryNotDoubleEncoded() { } @Test - @Disabled void testPathNotDecoded() { Http1Client webClient = createNewClient((chain, request) -> { - assertThat(request.uri().path().rawPath(), is("/greet/path%26")); + assertThat(request.uri().path().path(), is("/greet/path%26")); return chain.proceed(request); }); - // as the %26 is a valid encoding of path, when we encode it, it does not change - // as a result, the path is stored as this in encoded state, and the decoded state is different - // to fix this, we would need to have support for skip URI encoding all the way to UriPath, so we store - // the same value for decoded/encoded and we never re-encode it when skipped - webClient.get() + + ClientResponseTyped response = webClient.get() .skipUriEncoding(true) .path("path%26") - .requestEntity(String.class); + .request(String.class); + + // the path is not valid + assertThat(response.status(), is(Http.Status.NOT_FOUND_404)); } }