Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
a35ea1e
WIP: add binaryview and uft8view support
GeorgeLeePatterson Oct 29, 2025
5c5640a
feat: Add support for BinaryView and Utf8View types
GeorgeLeePatterson Oct 30, 2025
675b2f2
Add Apache license headers to fix RAT check
GeorgeLeePatterson Nov 1, 2025
73bda86
Fix Jest dynamic import errors by removing moduleResolution: NodeNext…
GeorgeLeePatterson Nov 1, 2025
456f85d
chore: Trigger CI validation on fork
GeorgeLeePatterson Nov 1, 2025
dfe9d56
fix: Add new files to RAT exclusion list
GeorgeLeePatterson Nov 1, 2025
21a778f
Revert "fix: Add new files to RAT exclusion list"
GeorgeLeePatterson Nov 1, 2025
e9d180b
fix: Correct license header format in update_flatbuffers.sh
GeorgeLeePatterson Nov 1, 2025
8d5bf77
fix: Export BinaryView and Utf8View types
GeorgeLeePatterson Nov 1, 2025
41f2d3e
fix: Export BinaryView and Utf8View in Arrow.dom.ts
GeorgeLeePatterson Nov 1, 2025
7cfb4dc
Address code review feedback
GeorgeLeePatterson Nov 2, 2025
2b3396e
Add BinaryView/Utf8View builders with comprehensive tests
GeorgeLeePatterson Oct 31, 2025
a28f69f
fix: Use toHaveLength() for jest length assertions
GeorgeLeePatterson Nov 1, 2025
5b312d5
Add BinaryViewBuilder and Utf8ViewBuilder exports
GeorgeLeePatterson Nov 1, 2025
5344b8f
Simplify byteLength calculation in view builders
GeorgeLeePatterson Nov 2, 2025
0576c00
ci: Enable BinaryView integration tests in Archery
GeorgeLeePatterson Nov 4, 2025
9502316
fix: Add Apache license header to patch file
GeorgeLeePatterson Nov 4, 2025
38bbee6
fix: Add BinaryView and Utf8View support to JSON type parser
GeorgeLeePatterson Nov 4, 2025
f174463
fix: Add readVariadicBuffers method to JSONVectorLoader
GeorgeLeePatterson Nov 4, 2025
86b58d8
feat: Add JSON format support for BinaryView/Utf8View variadic buffers
GeorgeLeePatterson Nov 4, 2025
f3817f5
feat: Add JSONVectorLoader support for BinaryView/Utf8View VIEWS buffer
GeorgeLeePatterson Nov 4, 2025
c664a79
feat: Add JSONVectorAssembler support for BinaryView/Utf8View (JSON w…
GeorgeLeePatterson Nov 4, 2025
00bb3c9
Merge remote-tracking branch 'origin/feat/binary-utf8-view-builders' …
GeorgeLeePatterson Nov 4, 2025
fe417a6
fix: Complete BinaryView/Utf8View JSON format support
GeorgeLeePatterson Nov 4, 2025
4c399d0
refactor: Extract hexStringToBytes helper and improve documentation
GeorgeLeePatterson Nov 4, 2025
e5290aa
feat: Add ListView and LargeListView type support
GeorgeLeePatterson Nov 1, 2025
02144ff
Add ListView and LargeListView read support
GeorgeLeePatterson Nov 1, 2025
77131b4
Add ListView and LargeListView exports
GeorgeLeePatterson Nov 1, 2025
233f233
Add ListView and LargeListView type enum entries
GeorgeLeePatterson Nov 2, 2025
819c2bd
Add ListView and LargeListView builders
GeorgeLeePatterson Nov 1, 2025
cf67aae
fix: Use toHaveLength() for jest length assertions
GeorgeLeePatterson Nov 1, 2025
61d3169
Add ListViewBuilder and LargeListViewBuilder exports to Arrow.dom.ts
GeorgeLeePatterson Nov 1, 2025
de1e8a7
fix: Replace BigInt literals with BigInt() constructor for ES5 compat…
GeorgeLeePatterson Nov 1, 2025
ef91586
feat: Add LargeList type support
GeorgeLeePatterson Nov 2, 2025
3212b9a
feat: Export LargeList and LargeListBuilder from main module
GeorgeLeePatterson Nov 2, 2025
031e370
feat: Add RunEndEncoded (Type 22) support
GeorgeLeePatterson Nov 2, 2025
8ccb655
feat: Add RunEndEncoded and LargeList to Arrow.dom.ts exports
GeorgeLeePatterson Nov 2, 2025
d1a4b63
feat: Add RunEndEncodedIterator with O(1) amortized sequential access
GeorgeLeePatterson Nov 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/patches/enable-binaryview-integration-tests.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 83913dc379..7ace28e1be 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -2003,7 +2003,6 @@ def get_generated_json_files(tempdir=None):
.skip_tester('Rust'),

generate_binary_view_case()
- .skip_tester('JS')
# TODO(https://github.com/apache/arrow-nanoarrow/issues/618)
.skip_tester('nanoarrow')
.skip_tester('Rust'),
3 changes: 3 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,9 @@ jobs:
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: 3
- name: Patch Archery to enable BinaryView tests
run: |
patch -p1 < js/.github/patches/enable-binaryview-integration-tests.patch
- name: Setup Archery
run: pip install -e dev/archery[docker]
- name: Execute Docker Build
Expand Down
77 changes: 77 additions & 0 deletions scripts/update_flatbuffers.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Regenerate the FlatBuffers helper files used by arrow-js. Requires a sibling
# checkout of apache/arrow (../arrow) if not provided in env and a working flatc on PATH.

set -euo pipefail

PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
FORMAT_DIR="${PROJECT_ROOT}/../arrow/format"

if [[ ! -d "${FORMAT_DIR}" ]]; then
echo "error: expected FlatBuffers schemas in ../arrow/format" >&2
exit 1
fi

if ! command -v flatc >/dev/null 2>&1; then
echo "error: flatc not found on PATH" >&2
exit 1
fi

TMPDIR="$(mktemp -d "${PROJECT_ROOT}/.flatc.XXXXXX")"
cleanup() {
rm -rf "${TMPDIR}"
}
trap cleanup EXIT

schemas=(File Schema Message Tensor SparseTensor)

for schema in "${schemas[@]}"; do
cp "${FORMAT_DIR}/${schema}.fbs" "${TMPDIR}/${schema}.fbs"
sed -i '' \
-e 's/namespace org.apache.arrow.flatbuf;//g' \
-e 's/org\.apache\.arrow\.flatbuf\.//g' \
"${TMPDIR}/${schema}.fbs"
done

flatc --ts --ts-flat-files --ts-omit-entrypoint \
-o "${TMPDIR}" \
"${TMPDIR}"/{File,Schema,Message,Tensor,SparseTensor}.fbs

rm -f "${TMPDIR}"/{File,Schema,Message,Tensor,SparseTensor}.fbs

generated_files=(
binary-view.ts
list-view.ts
large-list-view.ts
message.ts
record-batch.ts
schema.ts
type.ts
utf8-view.ts
)

for file in "${generated_files[@]}"; do
if [[ ! -f "${TMPDIR}/${file}" ]]; then
echo "error: expected generated file ${file} not found" >&2
exit 1
fi
install -m 0644 "${TMPDIR}/${file}" "${PROJECT_ROOT}/src/fb/${file}"
done
13 changes: 7 additions & 6 deletions src/Arrow.dom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,16 @@ export {
Bool,
Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
Float, Float16, Float32, Float64,
Utf8, LargeUtf8,
Binary, LargeBinary,
Utf8, LargeUtf8, Utf8View,
Binary, LargeBinary, BinaryView,
FixedSizeBinary,
Date_, DateDay, DateMillisecond,
Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
Decimal,
List,
List, LargeList, ListView, LargeListView,
Struct, StructRow,
RunEndEncoded,
Union, DenseUnion, SparseUnion,
Dictionary,
Interval, IntervalDayTime, IntervalYearMonth, IntervalMonthDayNano,
Expand All @@ -81,7 +82,7 @@ export {
} from './Arrow.js';

export {
BinaryBuilder, LargeBinaryBuilder,
BinaryBuilder, BinaryViewBuilder, LargeBinaryBuilder,
BoolBuilder,
DateBuilder, DateDayBuilder, DateMillisecondBuilder,
DecimalBuilder,
Expand All @@ -92,12 +93,12 @@ export {
IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder, IntervalMonthDayNanoBuilder,
DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, DurationMicrosecondBuilder, DurationNanosecondBuilder,
IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder,
ListBuilder,
ListBuilder, ListViewBuilder, LargeListViewBuilder,
MapBuilder,
NullBuilder,
StructBuilder,
TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder,
TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder,
UnionBuilder, DenseUnionBuilder, SparseUnionBuilder,
Utf8Builder, LargeUtf8Builder
Utf8Builder, Utf8ViewBuilder, LargeUtf8Builder
} from './Arrow.js';
13 changes: 9 additions & 4 deletions src/Arrow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,22 @@ export {
Bool,
Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
Float, Float16, Float32, Float64,
Utf8, LargeUtf8,
Binary, LargeBinary,
Utf8, LargeUtf8, Utf8View,
Binary, LargeBinary, BinaryView,
FixedSizeBinary,
Date_, DateDay, DateMillisecond,
Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
Decimal,
List,
List, LargeList, ListView, LargeListView,
Struct,
Union, DenseUnion, SparseUnion,
Dictionary,
Interval, IntervalDayTime, IntervalYearMonth, IntervalMonthDayNano,
Duration, DurationSecond, DurationMillisecond, DurationMicrosecond, DurationNanosecond,
FixedSizeList,
Map_
Map_,
RunEndEncoded
} from './type.js';

export { Table, makeTable, tableFromArrays } from './table.js';
Expand Down Expand Up @@ -79,10 +80,14 @@ export { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder,
export { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder, IntervalMonthDayNanoBuilder } from './builder/interval.js';
export { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, DurationMicrosecondBuilder, DurationNanosecondBuilder } from './builder/duration.js';
export { Utf8Builder } from './builder/utf8.js';
export { Utf8ViewBuilder } from './builder/utf8view.js';
export { LargeUtf8Builder } from './builder/largeutf8.js';
export { BinaryBuilder } from './builder/binary.js';
export { BinaryViewBuilder } from './builder/binaryview.js';
export { LargeBinaryBuilder } from './builder/largebinary.js';
export { ListBuilder } from './builder/list.js';
export { LargeListBuilder } from './builder/largelist.js';
export { ListViewBuilder, LargeListViewBuilder } from './builder/listview.js';
export { FixedSizeListBuilder } from './builder/fixedsizelist.js';
export { MapBuilder } from './builder/map.js';
export { StructBuilder } from './builder/struct.js';
Expand Down
4 changes: 2 additions & 2 deletions src/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import {
DataType, strideForType,
Float, Int, Decimal, FixedSizeBinary,
Date_, Time, Timestamp, Interval, Duration,
Utf8, LargeUtf8, Binary, LargeBinary, List, Map_,
Utf8, LargeUtf8, Binary, LargeBinary, List, LargeList, Map_,
} from './type.js';
import { createIsValidFunction } from './builder/valid.js';
import { BufferBuilder, BitmapBufferBuilder, DataBufferBuilder, OffsetsBufferBuilder } from './builder/buffer.js';
Expand Down Expand Up @@ -357,7 +357,7 @@ export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary
}

/** @ignore */
export abstract class VariableWidthBuilder<T extends Binary | LargeBinary | Utf8 | LargeUtf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
export abstract class VariableWidthBuilder<T extends Binary | LargeBinary | Utf8 | LargeUtf8 | List | LargeList | Map_, TNull = any> extends Builder<T, TNull> {
protected _pendingLength = 0;
protected _offsets: OffsetsBufferBuilder<T>;
protected _pending: Map<number, any> | undefined;
Expand Down
169 changes: 169 additions & 0 deletions src/builder/binaryview.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import { BinaryView } from '../type.js';
import { Builder, BuilderOptions } from '../builder.js';
import { BufferBuilder } from './buffer.js';
import { toUint8Array } from '../util/buffer.js';
import { makeData } from '../data.js';

/** @ignore */
export class BinaryViewBuilder<TNull = any> extends Builder<BinaryView, TNull> {
protected _views: BufferBuilder<Uint8Array>;
protected _variadicBuffers: Uint8Array[] = [];
protected _currentBuffer: BufferBuilder<Uint8Array> | null = null;
protected _currentBufferIndex = 0;
protected _currentBufferOffset = 0;
protected readonly _bufferSize = 32 * 1024 * 1024; // 32MB per buffer as per spec recommendation

constructor(opts: BuilderOptions<BinaryView, TNull>) {
super(opts);
this._views = new BufferBuilder(Uint8Array);
}

public get byteLength(): number {
let size = 0;
this._views && (size += this._views.byteLength);
this._nulls && (size += this._nulls.byteLength);
for (const buffer of this._variadicBuffers) {
size += buffer.byteLength;
}
this._currentBuffer && (size += this._currentBuffer.byteLength);
return size;
}

public setValue(index: number, value: Uint8Array) {
const data = toUint8Array(value);
const length = data.length;

// Ensure views buffer has space up to this index (similar to FixedWidthBuilder)
const bytesNeeded = (index + 1) * BinaryView.ELEMENT_WIDTH;
const currentBytes = this._views.length;
if (bytesNeeded > currentBytes) {
this._views.reserve(bytesNeeded - currentBytes);
}

const viewBuffer = this._views.buffer;
const viewOffset = index * BinaryView.ELEMENT_WIDTH;
const view = new DataView(viewBuffer.buffer, viewBuffer.byteOffset + viewOffset, BinaryView.ELEMENT_WIDTH);

// Write length (4 bytes, little-endian)
view.setInt32(BinaryView.LENGTH_OFFSET, length, true);

if (length <= BinaryView.INLINE_CAPACITY) {
// Inline: store data directly in view struct (up to 12 bytes)
viewBuffer.set(data, viewOffset + BinaryView.INLINE_OFFSET);
// Zero out remaining bytes
for (let i = length; i < BinaryView.INLINE_CAPACITY; i++) {
viewBuffer[viewOffset + BinaryView.INLINE_OFFSET + i] = 0;
}
} else {
// Out-of-line: store in variadic buffer
// Write prefix (first 4 bytes of data)
const prefix = new DataView(data.buffer, data.byteOffset, Math.min(4, length));
view.setUint32(BinaryView.INLINE_OFFSET, prefix.getUint32(0, true), true);

// Allocate space in variadic buffer
if (!this._currentBuffer || this._currentBufferOffset + length > this._bufferSize) {
// Start a new buffer
if (this._currentBuffer) {
this._variadicBuffers.push(this._currentBuffer.buffer.slice(0, this._currentBufferOffset));
}
this._currentBuffer = new BufferBuilder(Uint8Array);
this._currentBufferIndex = this._variadicBuffers.length;
this._currentBufferOffset = 0;
}

// Write data to current buffer
const bufferData = this._currentBuffer.reserve(length).buffer;
bufferData.set(data, this._currentBufferOffset);

// Write buffer index and offset to view struct
view.setInt32(BinaryView.BUFFER_INDEX_OFFSET, this._currentBufferIndex, true);
view.setInt32(BinaryView.BUFFER_OFFSET_OFFSET, this._currentBufferOffset, true);

this._currentBufferOffset += length;
}

return this;
}

public setValid(index: number, isValid: boolean) {
if (!super.setValid(index, isValid)) {
// For null values, write a zero-length view
// Ensure space is allocated
const bytesNeeded = (index + 1) * BinaryView.ELEMENT_WIDTH;
const currentBytes = this._views.length;
if (bytesNeeded > currentBytes) {
this._views.reserve(bytesNeeded - currentBytes);
}

const viewBuffer = this._views.buffer;
const viewOffset = index * BinaryView.ELEMENT_WIDTH;
// Zero out the entire view struct
for (let i = 0; i < BinaryView.ELEMENT_WIDTH; i++) {
viewBuffer[viewOffset + i] = 0;
}
return false;
}
return true;
}

public clear() {
this._variadicBuffers = [];
this._currentBuffer = null;
this._currentBufferIndex = 0;
this._currentBufferOffset = 0;
this._views.clear();
return super.clear();
}

public flush() {
const { type, length, nullCount, _views, _nulls } = this;

// Finalize current buffer if it exists
if (this._currentBuffer && this._currentBufferOffset > 0) {
this._variadicBuffers.push(this._currentBuffer.buffer.slice(0, this._currentBufferOffset));
this._currentBuffer = null;
this._currentBufferOffset = 0;
}

const views = _views.flush(length * BinaryView.ELEMENT_WIDTH);
const nullBitmap = nullCount > 0 ? _nulls.flush(length) : undefined;
const variadicBuffers = this._variadicBuffers.slice();

// Reset variadic buffers for next batch
this._variadicBuffers = [];
this._currentBufferIndex = 0;

this.clear();

return makeData({
type,
length,
nullCount,
nullBitmap,
views,
variadicBuffers
});
}

public finish() {
this.finished = true;
return this;
}
}
Loading