Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #10397 CharsetStringBuilder end vs length #10399

Merged
merged 1 commit into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import java.util.Objects;

/**
* <p>Build a string from a sequence of bytes.</p>
* <p>Build a string from a sequence of bytes and/or characters.</p>
* <p>Implementations of this interface are optimized for processing a mix of calls to already decoded
* character based appends (e.g. {@link #append(char)} and calls to undecoded byte methods (e.g. {@link #append(byte)}.
* This is particularly useful for decoding % encoded strings that are mostly already decoded but may contain
Expand All @@ -36,29 +36,51 @@
*/
public interface CharsetStringBuilder
{
/**
* @param b An encoded byte to append
*/
void append(byte b);

/**
* @param c A decoded character to append
*/
void append(char c);

/**
* @param bytes Array of encoded bytes to append
*/
default void append(byte[] bytes)
{
append(bytes, 0, bytes.length);
}

/**
* @param b Array of encoded bytes
* @param offset offset into the array
* @param length the number of bytes to append from the array.
*/
default void append(byte[] b, int offset, int length)
{
int end = offset + length;
for (int i = offset; i < end; i++)
append(b[i]);
}

/**
* @param chars sequence of decoded characters
* @param offset offset into the array
* @param length the number of character to append from the sequence.
*/
default void append(CharSequence chars, int offset, int length)
{
int end = offset + length;
for (int i = offset; i < end; i++)
append(chars.charAt(i));
}

/**
* @param buf Buffer of encoded bytes to append. The bytes are consumed from the buffer.
*/
default void append(ByteBuffer buf)
{
int end = buf.position() + buf.remaining();
Expand All @@ -75,6 +97,10 @@ default void append(ByteBuffer buf)

void reset();

/**
* @param charset The charset
* @return A {@link CharsetStringBuilder} suitable for the charset.
*/
static CharsetStringBuilder forCharset(Charset charset)
{
Objects.requireNonNull(charset);
Expand Down Expand Up @@ -106,7 +132,7 @@ public void append(char c)
@Override
public void append(CharSequence chars, int offset, int length)
{
_builder.append(chars, offset, length);
_builder.append(chars, offset, offset + length);
}

@Override
Expand Down Expand Up @@ -145,7 +171,7 @@ public void append(char c)
@Override
public void append(CharSequence chars, int offset, int length)
{
_builder.append(chars, offset, length);
_builder.append(chars, offset, offset + length);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
//
// ========================================================================
// Copyright (c) 1995 Mort Bay Consulting Pty Ltd and others.
//
// This program and the accompanying materials are made available under the
// terms of the Eclipse Public License v. 2.0 which is available at
// https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
// which is available at https://www.apache.org/licenses/LICENSE-2.0.
//
// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
// ========================================================================
//

package org.eclipse.jetty.util;

import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.stream.Stream;

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;

// @checkstyle-disable-check : AvoidEscapedUnicodeCharactersCheck
public class CharsetStringBuilderTest
{
public static Stream<Arguments> tests()
{
return Stream.of(
Arguments.of("Hello World \uC2B5@\uC39F\uC3A4\uC3BC\uC3A0\uC3A1-UTF-16 Æ\tÿ!!!", StandardCharsets.UTF_16),
Arguments.of("Hello World \uC2B5@\uC39F\uC3A4\uC3BC\uC3A0\uC3A1-UTF-8 Æ\tÿ!!!", StandardCharsets.UTF_8),
Arguments.of("Now is the time for all good men to test US_ASCII \r\n\t!", StandardCharsets.US_ASCII),
Arguments.of("How Now Brown Cow. Test iso 8859 Æ\tÿ!", StandardCharsets.ISO_8859_1)
);
}

@ParameterizedTest
@MethodSource("tests")
public void testBuilder(String test, Charset charset) throws Exception
{
byte[] bytes = test.getBytes(charset);

CharsetStringBuilder builder = CharsetStringBuilder.forCharset(charset);

builder.append(bytes);
assertThat(builder.build(), equalTo(test));

for (byte b : bytes)
builder.append(b);
assertThat(builder.build(), equalTo(test));

builder.append(bytes[0]);
builder.append(bytes, 1, bytes.length - 1);
assertThat(builder.build(), equalTo(test));
}

public static Stream<Charset> charsets()
{
return Stream.of(
StandardCharsets.UTF_8,
StandardCharsets.ISO_8859_1,
StandardCharsets.US_ASCII,
StandardCharsets.UTF_16
);
}

@ParameterizedTest
@MethodSource("charsets")
public void testBasicApi(Charset charset) throws Exception
{
CharsetStringBuilder builder = CharsetStringBuilder.forCharset(charset);
ByteBuffer encoded = charset.encode("1");
while (encoded.hasRemaining())
builder.append(encoded.get());

builder.append('2');

builder.append(charset.encode("34"));

encoded = charset.encode("abc");
int offset = encoded.remaining();
encoded = charset.encode("abc56");
int length = encoded.remaining() - offset;
encoded = charset.encode("abc56xyz");
byte[] bytes = new byte[1028];
encoded.get(bytes, 0, encoded.remaining());
builder.append(bytes, offset, length);

encoded = charset.encode("abc78xyz");
encoded.position(offset);
encoded.limit(offset + length);
builder.append(encoded);

builder.append("9A", 0, 2);
builder.append("xyzBCpqy", 3, 2);

assertThat(builder.build(), is("123456789ABC"));
}
}