diff --git a/components/Encoding/composer.json b/components/Encoding/composer.json
index 187b7559..72058a5c 100644
--- a/components/Encoding/composer.json
+++ b/components/Encoding/composer.json
@@ -20,8 +20,7 @@
"files": [
"utf8.php",
"compat-utf8.php",
- "utf8-encoder.php",
- "utf8-decoder.php"
+ "utf8-encoder.php"
],
"exclude-from-classmap": [
"/Tests/"
diff --git a/components/Encoding/utf8-decoder.php b/components/Encoding/utf8-decoder.php
deleted file mode 100644
index ddcc4acc..00000000
--- a/components/Encoding/utf8-decoder.php
+++ /dev/null
@@ -1,154 +0,0 @@
-
- * - `UTF8_DECODER_ACCEPT`: Decoder is ready for a new code point.
- * - `UTF8_DECODER_REJECT`: An error has occurred.
- * Any other positive value: Decoder is waiting for additional bytes.
- * @param int|null $codepoint Optional. If provided, will accumulate the decoded code point as
- * each byte is processed. If not provided or unable to decode, will
- * not be set, or will be set to invalid and unusable data.
- *
- * @return int Next decoder state after processing the current byte.
- */
-function utf8_decoder_apply_byte( string $byte, int $state, int &$codepoint = 0 ): int {
- /**
- * State classification and transition table for UTF-8 validation.
- *
- * > The first part of the table maps bytes to character classes that
- * > to reduce the size of the transition table and create bitmasks.
- * >
- * > The second part is a transition table that maps a combination
- * > of a state of the automaton and a character class to a state.
- *
- * @see http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
- */
- static $state_table = (
- "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" .
- "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" .
- "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" .
- "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" .
- "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x09\x09\x09\x09\x09\x09\x09\x09\x09\x09\x09\x09\x09\x09\x09\x09" .
- "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" .
- "\x08\x08\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02" .
- "\x10\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x04\x03\x03" .
- "\x11\x06\x06\x06\x05\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" .
- "\x00\x01\x02\x03\x05\x08\x07\x01\x01\x01\x04\x06\x01\x01\x01\x01" .
- "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00\x01\x01\x01\x01\x01\x00\x01\x00\x01\x01\x01\x01\x01\x01" .
- "\x01\x02\x01\x01\x01\x01\x01\x02\x01\x02\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x02\x01\x01\x01\x01\x01\x01\x01\x01" .
- "\x01\x02\x01\x01\x01\x01\x01\x01\x01\x02\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x03\x01\x03\x01\x01\x01\x01\x01\x01" .
- "\x01\x03\x01\x01\x01\x01\x01\x03\x01\x03\x01\x01\x01\x01\x01\x01\x01\x03\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
- );
-
- $byte = ord( $byte );
- $type = ord( $state_table[ $byte ] );
- $codepoint = ( UTF8_DECODER_ACCEPT === $state )
- ? ( ( 0xFF >> $type ) & $byte )
- : ( ( $byte & 0x3F ) | ( $codepoint << 6 ) );
-
- return ord( $state_table[ 256 + ( $state * 16 ) + $type ] );
-}
diff --git a/components/Encoding/utf8.php b/components/Encoding/utf8.php
index 0c74f7c1..2c266703 100644
--- a/components/Encoding/utf8.php
+++ b/components/Encoding/utf8.php
@@ -197,3 +197,31 @@ function wp_has_noncharacters( string $text ): bool {
return _wp_has_noncharacters_fallback( $text );
}
endif;
+
+/**
+ * Convert a UTF-8 byte sequence to its Unicode codepoint.
+ *
+ * @param string $character UTF-8 encoded byte sequence representing a single Unicode character.
+ *
+ * @return int Unicode codepoint.
+ */
+function utf8_ord( string $character ): int {
+ // Convert the byte sequence to its binary representation.
+ $bytes = unpack( 'C*', $character );
+
+ // Initialize the codepoint.
+ $codepoint = 0;
+
+ // Calculate the codepoint based on the number of bytes.
+ if ( 1 === count( $bytes ) ) {
+ $codepoint = $bytes[1];
+ } elseif ( 2 === count( $bytes ) ) {
+ $codepoint = ( ( $bytes[1] & 0x1F ) << 6 ) | ( $bytes[2] & 0x3F );
+ } elseif ( 3 === count( $bytes ) ) {
+ $codepoint = ( ( $bytes[1] & 0x0F ) << 12 ) | ( ( $bytes[2] & 0x3F ) << 6 ) | ( $bytes[3] & 0x3F );
+ } elseif ( 4 === count( $bytes ) ) {
+ $codepoint = ( ( $bytes[1] & 0x07 ) << 18 ) | ( ( $bytes[2] & 0x3F ) << 12 ) | ( ( $bytes[3] & 0x3F ) << 6 ) | ( $bytes[4] & 0x3F );
+ }
+
+ return $codepoint;
+}
diff --git a/composer-ci-matrix-tests.json b/composer-ci-matrix-tests.json
index 948e04a0..de4747f7 100644
--- a/composer-ci-matrix-tests.json
+++ b/composer-ci-matrix-tests.json
@@ -51,7 +51,6 @@
"components/Encoding/utf8.php",
"components/Encoding/compat-utf8.php",
"components/Encoding/utf8-encoder.php",
- "components/Encoding/utf8-decoder.php",
"components/Filesystem/functions.php",
"components/Zip/functions.php",
"components/Polyfill/wordpress.php",
diff --git a/composer.json b/composer.json
index 1423cfa3..6d52bb4b 100644
--- a/composer.json
+++ b/composer.json
@@ -63,7 +63,6 @@
"components/Encoding/utf8.php",
"components/Encoding/compat-utf8.php",
"components/Encoding/utf8-encoder.php",
- "components/Encoding/utf8-decoder.php",
"components/Filesystem/functions.php",
"components/Zip/functions.php",
"components/Polyfill/wordpress.php",