apache · sruehl · Mar 20, 2023 · Mar 7, 2023 · Mar 7, 2023 · Mar 7, 2023
diff --git a/plc4j/spi/src/main/java/org/apache/plc4x/java/spi/generation/ReadBufferByteBased.java b/plc4j/spi/src/main/java/org/apache/plc4x/java/spi/generation/ReadBufferByteBased.java
@@ -473,26 +473,9 @@ public BigDecimal readBigDecimal(String logicalName, int bitLength, WithReaderAr
     public String readString(String logicalName, int bitLength, WithReaderArgs... readerArgs) throws ParseException {
         String encoding = extractEncoding(readerArgs).orElse("UTF-8");
         encoding = encoding.replaceAll("[^a-zA-Z0-9]", "");
-        switch (encoding.toUpperCase()) {
-            case "ASCII": {
-                byte[] strBytes = new byte[bitLength / 8];
-                int realLength = 0;
-                boolean finishedReading = false;
-                for (int i = 0; (i < (bitLength / 8)) && hasMore(8); i++) {
-                    try {
-                        byte b = readByte(logicalName);
-                        if (!disable0Termination() && (b == 0x00)) {
-                            finishedReading = true;
-                        } else if (!finishedReading) {
-                            strBytes[i] = b;
-                            realLength++;
-                        }
-                    } catch (Exception e) {
-                        throw new PlcRuntimeException(e);
-                    }
-                }
-                return new String(strBytes, StandardCharsets.US_ASCII).substring(0, realLength);
-            }
+        encoding = encoding.toUpperCase();
+        switch (encoding) {
+            case "ASCII":
             case "UTF8": {
                 byte[] strBytes = new byte[bitLength / 8];
                 int realLength = 0;
@@ -510,7 +493,15 @@ public String readString(String logicalName, int bitLength, WithReaderArgs... re
                         throw new PlcRuntimeException(e);
                     }
                 }
-                return new String(strBytes, StandardCharsets.UTF_8).substring(0, realLength);
+                Charset charset;
+                switch (encoding) {
+                    case "UTF8":
+                        charset = StandardCharsets.UTF_8;
+                        break;
+                    default:
+                        charset = StandardCharsets.US_ASCII;
+                }
+                return new String(strBytes, 0, realLength, charset);
             }
             case "UTF16":
             case "UTF16LE":
@@ -527,7 +518,7 @@ public String readString(String logicalName, int bitLength, WithReaderArgs... re
                         } else if (!finishedReading) {
                             strBytes[(i * 2)] = b1;
                             strBytes[(i * 2) + 1] = b2;
-                            realLength++;
+                            realLength += 2;
                         }
                     } catch (Exception e) {
                         throw new PlcRuntimeException(e);
@@ -544,7 +535,7 @@ public String readString(String logicalName, int bitLength, WithReaderArgs... re
                     default:
                         charset = StandardCharsets.UTF_16;
                 }
-                return new String(strBytes, charset).substring(0, realLength);
+                return new String(strBytes, 0, realLength, charset);
             }
             default:
                 throw new ParseException("Unsupported encoding: " + encoding);

diff --git a/plc4j/spi/src/test/java/org/apache/plc4x/java/spi/generation/ReadBufferTest.java b/plc4j/spi/src/test/java/org/apache/plc4x/java/spi/generation/ReadBufferTest.java
@@ -38,4 +38,17 @@ void readString() throws ParseException {
 
         assertEquals(value, answer);
     }
+
+    /**
+     * Test which makes sure that UTF8 encoding with multi-byte characters works
+     */
+    @Test
+    void readStringUtf8() throws ParseException {
+        String value = "molybdän";
+        final var serialized = value.getBytes(StandardCharsets.UTF_8);
+        final ReadBuffer buffer = new ReadBufferByteBased(serialized);
+        String answer = buffer.readString("", serialized.length * 8, WithOption.WithEncoding(StandardCharsets.UTF_8.name()));
+
+        assertEquals(value, answer);
+    }
 }