Skip to content

Commit

Permalink
Encode all characters that should be encoded in URI, including % itse…
Browse files Browse the repository at this point in the history
…lf. (helidon-io#7314)

* Encode all characters that should be encoded in URI, including % itself.
* Fix test to really validate that data was not encoded over the network.
  • Loading branch information
tomas-langer authored and dalexandrov committed Aug 9, 2023
1 parent cd2098f commit 3f3da1d
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 36 deletions.
12 changes: 7 additions & 5 deletions common/uri/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,34 @@ URI abstraction as required for routing and request handling.

# URI Encoding
URI has a limited set of allowed characters, with some characters having special meaning
(such as `#`, `/`, `;`), also the URI is expected to be ASCII string.
(such as `#`, `/`, `;`, `?`), also the URI is expected to be ASCII string.

Special characters (and UTF-8) can be used through encoding (using the `%##` format).
For example space character is encoded into `%20`.

We support encoding of paths, with access to raw (encoded) and decoded values.

Usage in components:
- Path
- Path (`UriPath`)
- raw path - encoded path including path parameters
- raw path no params - encoded path without path parameters
- decoded path - decoded path without path parameters
- decoded path parameter by name
- path segments (may have path parameters associated) - ordered sequence
of path parts (each segment is a section between `/`)
- Query
- Query (`UriQuery`)
- raw query - full encoded query
- raw parameter by name
- decoded parameter by name
- Fragment
- Fragment (`UriFragment`)
- raw fragment
- decoded fragment

Method names - if a method returns raw (encoded) value,
it will always be prefixed with the word `raw`. All other
methods are returning decoded values.
methods are returning decoded values.

There must be factory methods to create an instance from both encoded and decoded value.

# URI Components
## Scheme
Expand Down
33 changes: 12 additions & 21 deletions common/uri/src/main/java/io/helidon/common/uri/UriEncoding.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2022 Oracle and/or its affiliates.
* Copyright (c) 2000, 2023 Oracle and/or its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,6 +22,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;

/**
* Support for encoding and decoding of URI in HTTP.
Expand Down Expand Up @@ -79,34 +80,24 @@ public static String encodeUri(String uriSegment) {
* must be percent-encoded
* @return the encoded string
*/
public static String encode(final String s, final Type t) {
final boolean[] table = ENCODING_TABLES[t.ordinal()];
public static String encode(String s, Type t) {
Objects.requireNonNull(s, "String to encode must not be null");
Objects.requireNonNull(t, "Type of encoded component must not be null");
boolean[] table = ENCODING_TABLES[t.ordinal()];

StringBuilder sb = null;
StringBuilder sb = new StringBuilder();
for (int offset = 0, codePoint; offset < s.length(); offset += Character.charCount(codePoint)) {
codePoint = s.codePointAt(offset);

if (codePoint < 0x80 && table[codePoint]) {
if (sb != null) {
sb.append((char) codePoint);
}
sb.append((char) codePoint);
} else {
if (codePoint == '%'
&& offset + 2 < s.length()
&& isHexCharacter(s.charAt(offset + 1))
&& isHexCharacter(s.charAt(offset + 2))) {
if (sb != null) {
sb.append('%').append(s.charAt(offset + 1)).append(s.charAt(offset + 2));
}
offset += 2;
// we need to encode percent to %25, as otherwise we are ignoring the decoded string
if (codePoint == '%') {
sb.append("%25");
continue;
}

if (sb == null) {
sb = new StringBuilder();
sb.append(s, 0, offset);
}

if (codePoint < 0x80) {
if (codePoint == ' ' && t == Type.QUERY_PARAM) {
sb.append('+');
Expand All @@ -119,7 +110,7 @@ && isHexCharacter(s.charAt(offset + 2))) {
}
}

return (sb == null) ? s : sb.toString();
return sb.toString();
}

private static String encode(String uriSegment) {
Expand Down
40 changes: 40 additions & 0 deletions common/uri/src/test/java/io/helidon/common/uri/UriPathTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright (c) 2023 Oracle and/or its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.helidon.common.uri;

import org.junit.jupiter.api.Test;

import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;

class UriPathTest {
@Test
void testEncodePercent() {
// the path contains string %20, not a space!
UriPath path = UriPath.createFromDecoded("/my%20path");
assertThat(path.path(), is("/my%20path"));
assertThat(path.rawPath(), is("/my%2520path"));
}

@Test
void testEncodeSpace() {
// the path contains string %20, not a space!
UriPath path = UriPath.createFromDecoded("/my path");
assertThat(path.path(), is("/my path"));
assertThat(path.rawPath(), is("/my%20path"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
package io.helidon.tests.integration.webclient;

import io.helidon.common.http.Http;
import io.helidon.nima.webclient.api.ClientResponseTyped;
import io.helidon.nima.webclient.api.HttpClientResponse;
import io.helidon.nima.webclient.api.WebClient;
import io.helidon.nima.webclient.http1.Http1Client;
import io.helidon.nima.webserver.WebServer;

import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

import static org.hamcrest.CoreMatchers.is;
Expand Down Expand Up @@ -124,7 +124,10 @@ void testQueryNotDecoded() {
@Test
void testQueryNotDoubleEncoded() {
Http1Client webClient = createNewClient((chain, request) -> {
assertThat(request.uri().query().rawValue(), is("first%26second%26=val%26ue%26"));
// this is the value we have provided, it must be given back
assertThat(request.uri().query().value(), is("first&second%26=val&ue%26"));
// what goes over the network is driven by the skipUriEncoding parameter, and must not be encoded
assertThat(request.uri().pathWithQueryAndFragment(), is("/greet?first&second%26=val&ue%26"));
return chain.proceed(request);
});
String response = webClient.get()
Expand All @@ -135,20 +138,19 @@ void testQueryNotDoubleEncoded() {
}

@Test
@Disabled
void testPathNotDecoded() {
Http1Client webClient = createNewClient((chain, request) -> {
assertThat(request.uri().path().rawPath(), is("/greet/path%26"));
assertThat(request.uri().path().path(), is("/greet/path%26"));
return chain.proceed(request);
});
// as the %26 is a valid encoding of path, when we encode it, it does not change
// as a result, the path is stored as this in encoded state, and the decoded state is different
// to fix this, we would need to have support for skip URI encoding all the way to UriPath, so we store
// the same value for decoded/encoded and we never re-encode it when skipped
webClient.get()

ClientResponseTyped<String> response = webClient.get()
.skipUriEncoding(true)
.path("path%26")
.requestEntity(String.class);
.request(String.class);

// the path is not valid
assertThat(response.status(), is(Http.Status.NOT_FOUND_404));
}

}

0 comments on commit 3f3da1d

Please sign in to comment.