Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix reading UTF-8 strings (from OPC UA nodes) #832

Merged
merged 3 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -473,26 +473,9 @@ public BigDecimal readBigDecimal(String logicalName, int bitLength, WithReaderAr
public String readString(String logicalName, int bitLength, WithReaderArgs... readerArgs) throws ParseException {
String encoding = extractEncoding(readerArgs).orElse("UTF-8");
encoding = encoding.replaceAll("[^a-zA-Z0-9]", "");
switch (encoding.toUpperCase()) {
case "ASCII": {
byte[] strBytes = new byte[bitLength / 8];
int realLength = 0;
boolean finishedReading = false;
for (int i = 0; (i < (bitLength / 8)) && hasMore(8); i++) {
try {
byte b = readByte(logicalName);
if (!disable0Termination() && (b == 0x00)) {
finishedReading = true;
} else if (!finishedReading) {
strBytes[i] = b;
realLength++;
}
} catch (Exception e) {
throw new PlcRuntimeException(e);
}
}
return new String(strBytes, StandardCharsets.US_ASCII).substring(0, realLength);
}
encoding = encoding.toUpperCase();
switch (encoding) {
case "ASCII":
case "UTF8": {
byte[] strBytes = new byte[bitLength / 8];
int realLength = 0;
Expand All @@ -510,7 +493,15 @@ public String readString(String logicalName, int bitLength, WithReaderArgs... re
throw new PlcRuntimeException(e);
}
}
return new String(strBytes, StandardCharsets.UTF_8).substring(0, realLength);
Charset charset;
switch (encoding) {
case "UTF8":
charset = StandardCharsets.UTF_8;
break;
default:
charset = StandardCharsets.US_ASCII;
}
return new String(strBytes, 0, realLength, charset);
}
case "UTF16":
case "UTF16LE":
Expand All @@ -527,7 +518,7 @@ public String readString(String logicalName, int bitLength, WithReaderArgs... re
} else if (!finishedReading) {
strBytes[(i * 2)] = b1;
strBytes[(i * 2) + 1] = b2;
realLength++;
realLength += 2;
}
} catch (Exception e) {
throw new PlcRuntimeException(e);
Expand All @@ -544,7 +535,7 @@ public String readString(String logicalName, int bitLength, WithReaderArgs... re
default:
charset = StandardCharsets.UTF_16;
}
return new String(strBytes, charset).substring(0, realLength);
return new String(strBytes, 0, realLength, charset);
}
default:
throw new ParseException("Unsupported encoding: " + encoding);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,17 @@ void readString() throws ParseException {

assertEquals(value, answer);
}

/**
* Test which makes sure that UTF8 encoding with multi-byte characters works
*/
@Test
void readStringUtf8() throws ParseException {
String value = "molybdän";
final var serialized = value.getBytes(StandardCharsets.UTF_8);
final ReadBuffer buffer = new ReadBufferByteBased(serialized);
String answer = buffer.readString("", serialized.length * 8, WithOption.WithEncoding(StandardCharsets.UTF_8.name()));

assertEquals(value, answer);
}
}