Skip to content

Commit

Permalink
feat(node): buffer isUtf8/isAscii (#23928)
Browse files Browse the repository at this point in the history
Fixes: #23657

Implements `isUtf8` and `isAscii` as ops.
  • Loading branch information
devsnek committed May 21, 2024
1 parent db82e8b commit 8f2d171
Show file tree
Hide file tree
Showing 11 changed files with 219 additions and 2 deletions.
2 changes: 2 additions & 0 deletions ext/node/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ deno_core::extension!(deno_node,
deps = [ deno_io, deno_fs ],
parameters = [P: NodePermissions],
ops = [
ops::buffer::op_is_ascii,
ops::buffer::op_is_utf8,
ops::crypto::op_node_create_decipheriv,
ops::crypto::op_node_cipheriv_encrypt,
ops::crypto::op_node_cipheriv_final,
Expand Down
13 changes: 13 additions & 0 deletions ext/node/ops/buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

use deno_core::op2;

#[op2(fast)]
pub fn op_is_ascii(#[buffer] buf: &[u8]) -> bool {
buf.is_ascii()
}

#[op2(fast)]
pub fn op_is_utf8(#[buffer] buf: &[u8]) -> bool {
std::str::from_utf8(buf).is_ok()
}
1 change: 1 addition & 0 deletions ext/node/ops/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

pub mod buffer;
pub mod crypto;
pub mod fs;
pub mod http;
Expand Down
2 changes: 2 additions & 0 deletions ext/node/polyfills/buffer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ export {
Buffer,
constants,
default,
isAscii,
isUtf8,
kMaxLength,
kStringMaxLength,
SlowBuffer,
Expand Down
49 changes: 49 additions & 0 deletions ext/node/polyfills/internal/buffer.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// deno-lint-ignore-file prefer-primordials

import { core } from "ext:core/mod.js";
import { op_is_ascii, op_is_utf8 } from "ext:core/ops";

import { TextDecoder, TextEncoder } from "ext:deno_web/08_text_encoding.js";
import { codes } from "ext:deno_node/internal/error_codes.ts";
Expand All @@ -26,10 +27,12 @@ import {
import {
isAnyArrayBuffer,
isArrayBufferView,
isTypedArray,
} from "ext:deno_node/internal/util/types.ts";
import { normalizeEncoding } from "ext:deno_node/internal/util.mjs";
import { validateBuffer } from "ext:deno_node/internal/validators.mjs";
import { isUint8Array } from "ext:deno_node/internal/util/types.ts";
import { ERR_INVALID_STATE } from "ext:deno_node/internal/errors.ts";
import {
forgivingBase64Encode,
forgivingBase64UrlEncode,
Expand Down Expand Up @@ -2536,12 +2539,58 @@ export function writeU_Int24LE(buf, value, offset, min, max) {
return offset;
}

export function isUtf8(input) {
if (isTypedArray(input)) {
if (input.buffer.detached) {
throw new ERR_INVALID_STATE("Cannot validate on a detached buffer");
}
return op_is_utf8(input);
}

if (isAnyArrayBuffer(input)) {
if (input.detached) {
throw new ERR_INVALID_STATE("Cannot validate on a detached buffer");
}
return op_is_utf8(new Uint8Array(input));
}

throw new codes.ERR_INVALID_ARG_TYPE("input", [
"ArrayBuffer",
"Buffer",
"TypedArray",
], input);
}

export function isAscii(input) {
if (isTypedArray(input)) {
if (input.buffer.detached) {
throw new ERR_INVALID_STATE("Cannot validate on a detached buffer");
}
return op_is_ascii(input);
}

if (isAnyArrayBuffer(input)) {
if (input.detached) {
throw new ERR_INVALID_STATE("Cannot validate on a detached buffer");
}
return op_is_ascii(new Uint8Array(input));
}

throw new codes.ERR_INVALID_ARG_TYPE("input", [
"ArrayBuffer",
"Buffer",
"TypedArray",
], input);
}

export default {
atob,
btoa,
Blob,
Buffer,
constants,
isAscii,
isUtf8,
kMaxLength,
kStringMaxLength,
SlowBuffer,
Expand Down
7 changes: 7 additions & 0 deletions ext/node/polyfills/internal/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2564,6 +2564,12 @@ export class ERR_HTTP_SOCKET_ASSIGNED extends NodeError {
}
}

export class ERR_INVALID_STATE extends NodeError {
constructor(message: string) {
super("ERR_INVALID_STATE", `Invalid state: ${message}`);
}
}

interface UvExceptionContext {
syscall: string;
path?: string;
Expand Down Expand Up @@ -2824,6 +2830,7 @@ export default {
ERR_INVALID_RETURN_PROPERTY,
ERR_INVALID_RETURN_PROPERTY_VALUE,
ERR_INVALID_RETURN_VALUE,
ERR_INVALID_STATE,
ERR_INVALID_SYNC_FORK_INPUT,
ERR_INVALID_THIS,
ERR_INVALID_TUPLE,
Expand Down
2 changes: 2 additions & 0 deletions tests/node_compat/config.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@
"test-buffer-includes.js",
"test-buffer-indexof.js",
"test-buffer-inheritance.js",
"test-buffer-isascii.js",
"test-buffer-isencoding.js",
"test-buffer-isutf8.js",
"test-buffer-iterator.js",
"test-buffer-new.js",
"test-buffer-no-negative-allocation.js",
Expand Down
2 changes: 0 additions & 2 deletions tests/node_compat/runner/TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,6 @@ NOTE: This file should not be manually edited. Please edit `tests/node_compat/co
- [parallel/test-buffer-constructor-outside-node-modules.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-constructor-outside-node-modules.js)
- [parallel/test-buffer-fill.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-fill.js)
- [parallel/test-buffer-inspect.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-inspect.js)
- [parallel/test-buffer-isascii.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-isascii.js)
- [parallel/test-buffer-isutf8.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-isutf8.js)
- [parallel/test-buffer-pending-deprecation.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-pending-deprecation.js)
- [parallel/test-buffer-pool-untransferable.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-pool-untransferable.js)
- [parallel/test-buffer-prototype-inspect.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-prototype-inspect.js)
Expand Down
49 changes: 49 additions & 0 deletions tests/node_compat/test/parallel/test-buffer-isascii.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// deno-fmt-ignore-file
// deno-lint-ignore-file

// Copyright Joyent and Node contributors. All rights reserved. MIT license.
// Taken from Node 18.12.1
// This file is automatically generated by `tests/node_compat/runner/setup.ts`. Do not modify this file manually.

'use strict';

require('../common');
const assert = require('assert');
const { isAscii, Buffer } = require('buffer');
const { TextEncoder } = require('util');

const encoder = new TextEncoder();

assert.strictEqual(isAscii(encoder.encode('hello')), true);
assert.strictEqual(isAscii(encoder.encode('ğ')), false);
assert.strictEqual(isAscii(Buffer.from([])), true);

[
undefined,
'', 'hello',
false, true,
0, 1,
0n, 1n,
Symbol(),
() => {},
{}, [], null,
].forEach((input) => {
assert.throws(
() => { isAscii(input); },
{
code: 'ERR_INVALID_ARG_TYPE',
},
);
});

{
// Test with detached array buffers
const arrayBuffer = new ArrayBuffer(1024);
structuredClone(arrayBuffer, { transfer: [arrayBuffer] });
assert.throws(
() => { isAscii(arrayBuffer); },
{
code: 'ERR_INVALID_STATE'
}
);
}
93 changes: 93 additions & 0 deletions tests/node_compat/test/parallel/test-buffer-isutf8.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// deno-fmt-ignore-file
// deno-lint-ignore-file

// Copyright Joyent and Node contributors. All rights reserved. MIT license.
// Taken from Node 18.12.1
// This file is automatically generated by `tests/node_compat/runner/setup.ts`. Do not modify this file manually.

'use strict';

require('../common');
const assert = require('assert');
const { isUtf8, Buffer } = require('buffer');
const { TextEncoder } = require('util');

const encoder = new TextEncoder();

assert.strictEqual(isUtf8(encoder.encode('hello')), true);
assert.strictEqual(isUtf8(encoder.encode('ğ')), true);
assert.strictEqual(isUtf8(Buffer.from([])), true);

// Taken from test/fixtures/wpt/encoding/textdecoder-fatal.any.js
[
[0xFF], // 'invalid code'
[0xC0], // 'ends early'
[0xE0], // 'ends early 2'
[0xC0, 0x00], // 'invalid trail'
[0xC0, 0xC0], // 'invalid trail 2'
[0xE0, 0x00], // 'invalid trail 3'
[0xE0, 0xC0], // 'invalid trail 4'
[0xE0, 0x80, 0x00], // 'invalid trail 5'
[0xE0, 0x80, 0xC0], // 'invalid trail 6'
[0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], // '> 0x10FFFF'
[0xFE, 0x80, 0x80, 0x80, 0x80, 0x80], // 'obsolete lead byte'

// Overlong encodings
[0xC0, 0x80], // 'overlong U+0000 - 2 bytes'
[0xE0, 0x80, 0x80], // 'overlong U+0000 - 3 bytes'
[0xF0, 0x80, 0x80, 0x80], // 'overlong U+0000 - 4 bytes'
[0xF8, 0x80, 0x80, 0x80, 0x80], // 'overlong U+0000 - 5 bytes'
[0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], // 'overlong U+0000 - 6 bytes'

[0xC1, 0xBF], // 'overlong U+007F - 2 bytes'
[0xE0, 0x81, 0xBF], // 'overlong U+007F - 3 bytes'
[0xF0, 0x80, 0x81, 0xBF], // 'overlong U+007F - 4 bytes'
[0xF8, 0x80, 0x80, 0x81, 0xBF], // 'overlong U+007F - 5 bytes'
[0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF], // 'overlong U+007F - 6 bytes'

[0xE0, 0x9F, 0xBF], // 'overlong U+07FF - 3 bytes'
[0xF0, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 4 bytes'
[0xF8, 0x80, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 5 bytes'
[0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 6 bytes'

[0xF0, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 4 bytes'
[0xF8, 0x80, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 5 bytes'
[0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 6 bytes'

[0xF8, 0x84, 0x8F, 0xBF, 0xBF], // 'overlong U+10FFFF - 5 bytes'
[0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF], // 'overlong U+10FFFF - 6 bytes'

// UTF-16 surrogates encoded as code points in UTF-8
[0xED, 0xA0, 0x80], // 'lead surrogate'
[0xED, 0xB0, 0x80], // 'trail surrogate'
[0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], // 'surrogate pair'
].forEach((input) => {
assert.strictEqual(isUtf8(Buffer.from(input)), false);
});

[
null,
undefined,
'hello',
true,
false,
].forEach((input) => {
assert.throws(
() => { isUtf8(input); },
{
code: 'ERR_INVALID_ARG_TYPE',
},
);
});

{
// Test with detached array buffers
const arrayBuffer = new ArrayBuffer(1024);
structuredClone(arrayBuffer, { transfer: [arrayBuffer] });
assert.throws(
() => { isUtf8(arrayBuffer); },
{
code: 'ERR_INVALID_STATE'
}
);
}
1 change: 1 addition & 0 deletions tools/copyright_checker.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ export async function checkCopyright() {
// show all the errors at the same time to prevent overlap with
// other running scripts that may be outputting
console.error(errors.join("\n"));
console.error(`Expected copyright:\n\`\`\`\n${COPYRIGHT_LINE}\n\`\`\``);
throw new Error(`Copyright checker had ${errors.length} errors.`);
}
}
Expand Down

0 comments on commit 8f2d171

Please sign in to comment.