diff --git a/ci/travis_env_common.sh b/ci/travis_env_common.sh index 568070e0433ae..42a3bbc4b5ed7 100755 --- a/ci/travis_env_common.sh +++ b/ci/travis_env_common.sh @@ -17,6 +17,8 @@ # specific language governing permissions and limitations # under the License. +# hide nodejs experimental-feature warnings +export NODE_NO_WARNINGS=1 export MINICONDA=$HOME/miniconda export PATH="$MINICONDA/bin:$PATH" export CONDA_PKGS_DIRS=$HOME/.conda_packages diff --git a/integration/integration_test.py b/integration/integration_test.py index 301017cac3d40..173fe549092b7 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -1092,15 +1092,19 @@ def file_to_stream(self, file_path, stream_path): os.system(cmd) class JSTester(Tester): - PRODUCER = False + PRODUCER = True CONSUMER = True - INTEGRATION_EXE = os.path.join(ARROW_HOME, 'js/bin/integration.js') + EXE_PATH = os.path.join(ARROW_HOME, 'js/bin') + VALIDATE = os.path.join(EXE_PATH, 'integration.js') + JSON_TO_ARROW = os.path.join(EXE_PATH, 'json-to-arrow.js') + STREAM_TO_FILE = os.path.join(EXE_PATH, 'stream-to-file.js') + FILE_TO_STREAM = os.path.join(EXE_PATH, 'file-to-stream.js') name = 'JS' - def _run(self, arrow_path=None, json_path=None, command='VALIDATE'): - cmd = [self.INTEGRATION_EXE] + def _run(self, exe_cmd, arrow_path=None, json_path=None, command='VALIDATE'): + cmd = [exe_cmd] if arrow_path is not None: cmd.extend(['-a', arrow_path]) @@ -1108,7 +1112,7 @@ def _run(self, arrow_path=None, json_path=None, command='VALIDATE'): if json_path is not None: cmd.extend(['-j', json_path]) - cmd.extend(['--mode', command]) + cmd.extend(['--mode', command, '-t', 'es5', '-m', 'umd']) if self.debug: print(' '.join(cmd)) @@ -1116,11 +1120,24 @@ def _run(self, arrow_path=None, json_path=None, command='VALIDATE'): run_cmd(cmd) def validate(self, json_path, arrow_path): - return self._run(arrow_path, json_path, 'VALIDATE') + return self._run(self.VALIDATE, arrow_path, json_path, 'VALIDATE') + + def json_to_file(self, json_path, arrow_path): + cmd = ['node', self.JSON_TO_ARROW, '-a', arrow_path, '-j', json_path] + cmd = ' '.join(cmd) + if self.debug: + print(cmd) + os.system(cmd) def stream_to_file(self, stream_path, file_path): - # Just copy stream to file, we can read the stream directly - cmd = ['cp', stream_path, file_path] + cmd = ['cat', stream_path, '|', 'node', self.STREAM_TO_FILE, '>', file_path] + cmd = ' '.join(cmd) + if self.debug: + print(cmd) + os.system(cmd) + + def file_to_stream(self, file_path, stream_path): + cmd = ['cat', file_path, '|', 'node', self.FILE_TO_STREAM, '>', stream_path] cmd = ' '.join(cmd) if self.debug: print(cmd) diff --git a/js/DEVELOP.md b/js/DEVELOP.md index 1dd999a9efbb6..17eb8e1d00f4d 100644 --- a/js/DEVELOP.md +++ b/js/DEVELOP.md @@ -64,13 +64,11 @@ This argument configuration also applies to `clean` and `test` scripts. * `npm run deploy` -Uses [learna](https://github.com/lerna/lerna) to publish each build target to npm with [conventional](https://conventionalcommits.org/) [changelogs](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/conventional-changelog-cli). +Uses [lerna](https://github.com/lerna/lerna) to publish each build target to npm with [conventional](https://conventionalcommits.org/) [changelogs](https://github.com/conventional-changelog/conventional-changelog/tree/master/packages/conventional-changelog-cli). # Updating the Arrow format flatbuffers generated code -Once generated, the flatbuffers format code needs to be adjusted for our TS and JS build environments. - -## TypeScript +Once generated, the flatbuffers format code needs to be adjusted for our build scripts. 1. Generate the flatbuffers TypeScript source from the Arrow project root directory: ```sh @@ -101,193 +99,3 @@ Once generated, the flatbuffers format code needs to be adjusted for our TS and ``` 1. Add `/* tslint:disable:class-name */` to the top of `Schema.ts` 1. Execute `npm run lint` to fix all the linting errors - -## JavaScript (for Google Closure Compiler builds) - -1. Generate the flatbuffers JS source from the Arrow project root directory - ```sh - cd $ARROW_HOME - - flatc --js --no-js-exports -o ./js/src/format ./format/*.fbs - - cd ./js/src/format - - # Delete Tensor_generated.js (skip this when we support Tensors) - rm Tensor_generated.js - - # append an ES6 export to Schema_generated.js - echo "$(cat Schema_generated.js) - export { org }; - " > Schema_generated.js - - # import Schema's "org" namespace and - # append an ES6 export to File_generated.js - echo "import { org } from './Schema'; - $(cat File_generated.js) - export { org }; - " > File_generated.js - - # import Schema's "org" namespace and - # append an ES6 export to Message_generated.js - echo "import { org } from './Schema'; - $(cat Message_generated.js) - export { org }; - " > Message_generated.js - ``` -1. Fixup the generated JS enums with the reverse value-to-key mappings to match TypeScript - `Message_generated.js` - ```js - // Replace this - org.apache.arrow.flatbuf.MessageHeader = { - NONE: 0, - Schema: 1, - DictionaryBatch: 2, - RecordBatch: 3, - Tensor: 4 - }; - // With this - org.apache.arrow.flatbuf.MessageHeader = { - NONE: 0, 0: 'NONE', - Schema: 1, 1: 'Schema', - DictionaryBatch: 2, 2: 'DictionaryBatch', - RecordBatch: 3, 3: 'RecordBatch', - Tensor: 4, 4: 'Tensor' - }; - ``` - `Schema_generated.js` - ```js - /** - * @enum - */ - org.apache.arrow.flatbuf.MetadataVersion = { - /** - * 0.1.0 - */ - V1: 0, 0: 'V1', - - /** - * 0.2.0 - */ - V2: 1, 1: 'V2', - - /** - * 0.3.0 -> 0.7.1 - */ - V3: 2, 2: 'V3', - - /** - * >= 0.8.0 - */ - V4: 3, 3: 'V4' - }; - - /** - * @enum - */ - org.apache.arrow.flatbuf.UnionMode = { - Sparse: 0, 0: 'Sparse', - Dense: 1, 1: 'Dense', - }; - - /** - * @enum - */ - org.apache.arrow.flatbuf.Precision = { - HALF: 0, 0: 'HALF', - SINGLE: 1, 1: 'SINGLE', - DOUBLE: 2, 2: 'DOUBLE', - }; - - /** - * @enum - */ - org.apache.arrow.flatbuf.DateUnit = { - DAY: 0, 0: 'DAY', - MILLISECOND: 1, 1: 'MILLISECOND', - }; - - /** - * @enum - */ - org.apache.arrow.flatbuf.TimeUnit = { - SECOND: 0, 0: 'SECOND', - MILLISECOND: 1, 1: 'MILLISECOND', - MICROSECOND: 2, 2: 'MICROSECOND', - NANOSECOND: 3, 3: 'NANOSECOND', - }; - - /** - * @enum - */ - org.apache.arrow.flatbuf.IntervalUnit = { - YEAR_MONTH: 0, 0: 'YEAR_MONTH', - DAY_TIME: 1, 1: 'DAY_TIME', - }; - - /** - * ---------------------------------------------------------------------- - * Top-level Type value, enabling extensible type-specific metadata. We can - * add new logical types to Type without breaking backwards compatibility - * - * @enum - */ - org.apache.arrow.flatbuf.Type = { - NONE: 0, 0: 'NONE', - Null: 1, 1: 'Null', - Int: 2, 2: 'Int', - FloatingPoint: 3, 3: 'FloatingPoint', - Binary: 4, 4: 'Binary', - Utf8: 5, 5: 'Utf8', - Bool: 6, 6: 'Bool', - Decimal: 7, 7: 'Decimal', - Date: 8, 8: 'Date', - Time: 9, 9: 'Time', - Timestamp: 10, 10: 'Timestamp', - Interval: 11, 11: 'Interval', - List: 12, 12: 'List', - Struct_: 13, 13: 'Struct_', - Union: 14, 14: 'Union', - FixedSizeBinary: 15, 15: 'FixedSizeBinary', - FixedSizeList: 16, 16: 'FixedSizeList', - Map: 17, 17: 'Map' - }; - - /** - * ---------------------------------------------------------------------- - * The possible types of a vector - * - * @enum - */ - org.apache.arrow.flatbuf.VectorType = { - /** - * used in List type, Dense Union and variable length primitive types (String, Binary) - */ - OFFSET: 0, 0: 'OFFSET', - - /** - * actual data, either wixed width primitive types in slots or variable width delimited by an OFFSET vector - */ - DATA: 1, 1: 'DATA', - - /** - * Bit vector indicating if each value is null - */ - VALIDITY: 2, 2: 'VALIDITY', - - /** - * Type vector used in Union type - */ - TYPE: 3, 3: 'TYPE' - }; - - /** - * ---------------------------------------------------------------------- - * Endianness of the platform producing the data - * - * @enum - */ - org.apache.arrow.flatbuf.Endianness = { - Little: 0, 0: 'Little', - Big: 1, 1: 'Big', - }; - ``` diff --git a/js/bin/file-to-stream.js b/js/bin/file-to-stream.js new file mode 100755 index 0000000000000..fa4e5d17bbd3a --- /dev/null +++ b/js/bin/file-to-stream.js @@ -0,0 +1,37 @@ +#! /usr/bin/env node + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +const fs = require('fs'); +const path = require('path'); + +const encoding = 'binary'; +const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; +const { util: { PipeIterator } } = require(`../index${ext}`); +const { Table, serializeStream, fromReadableStream } = require(`../index${ext}`); + +(async () => { + // Todo (ptaylor): implement `serializeStreamAsync` that accepts an + // AsyncIterable, rather than aggregating into a Table first + const in_ = process.argv.length < 3 + ? process.stdin : fs.createReadStream(path.resolve(process.argv[2])); + const out = process.argv.length < 4 + ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3])); + new PipeIterator(serializeStream(await Table.fromAsync(fromReadableStream(in_))), encoding).pipe(out); + +})().catch((e) => { console.error(e); process.exit(1); }); diff --git a/js/bin/integration.js b/js/bin/integration.js index 73162b6a8c3a3..6c064deac258d 100755 --- a/js/bin/integration.js +++ b/js/bin/integration.js @@ -17,61 +17,15 @@ // specific language governing permissions and limitations // under the License. -var fs = require('fs'); -var glob = require('glob'); -var path = require('path'); -var gulp = require.resolve(path.join(`..`, `node_modules/gulp/bin/gulp.js`)); -var child_process = require(`child_process`); -var optionList = [ - { - type: String, - name: 'mode', - description: 'The integration test to run' - }, - { - type: String, - name: 'arrow', alias: 'a', - multiple: true, defaultValue: [], - description: 'The Arrow file[s] to read/write' - }, - { - type: String, - name: 'json', alias: 'j', - multiple: true, defaultValue: [], - description: 'The JSON file[s] to read/write' - } -]; - -var argv = require(`command-line-args`)(optionList, { partial: true }); - -function print_usage() { - console.log(require('command-line-usage')([ - { - header: 'integration', - content: 'Script for running Arrow integration tests' - }, - { - header: 'Synopsis', - content: [ - '$ integration.js -j file.json -a file.arrow --mode validate' - ] - }, - { - header: 'Options', - optionList: [ - ...optionList, - { - name: 'help', - description: 'Print this usage guide.' - } - ] - }, - ])); - process.exit(1); -} +const fs = require('fs'); +const glob = require('glob'); +const path = require('path'); +const child_process = require(`child_process`); +const argv = require(`command-line-args`)(cliOpts(), { partial: true }); +const gulpPath = require.resolve(path.join(`..`, `node_modules/gulp/bin/gulp.js`)); -let jsonPaths = argv.json; -let arrowPaths = argv.arrow; +let jsonPaths = [...(argv.json || [])]; +let arrowPaths = [...(argv.arrow || [])]; if (!argv.mode) { return print_usage(); @@ -89,12 +43,13 @@ if (mode === 'VALIDATE' && !jsonPaths.length) { if (fs.existsSync(arrowPath)) { jsonPaths.push(jsonPath); arrowPaths.push(arrowPath); - console.log('-j', jsonPath, '-a', arrowPath, '\\'); } } } return [jsonPaths, arrowPaths]; }, [[], []]); + console.log(`jsonPaths: [\n\t${jsonPaths.join('\n\t')}\n]`); + console.log(`arrowPaths: [\n\t${arrowPaths.join('\n\t')}\n]`); } } else if (!jsonPaths.length) { return print_usage(); @@ -107,24 +62,61 @@ switch (mode) { args.push('-j', p, '-a', arrowPaths[i]); }); process.exitCode = child_process.spawnSync( - gulp, args, + gulpPath, args, { cwd: path.resolve(__dirname, '..'), stdio: ['ignore', 'inherit', 'inherit'] } ).status || process.exitCode || 0; - // for (let i = -1, n = jsonPaths.length; ++i < n;) { - // const jsonPath = jsonPaths[i]; - // const arrowPath = arrowPaths[i]; - // child_process.spawnSync( - // gulp, args.concat(['-j', jsonPath, '-a', arrowPath]), - // { - // cwd: path.resolve(__dirname, '..'), - // stdio: ['ignore', 'inherit', 'inherit'] - // } - // ); - // } break; default: print_usage(); } + +function cliOpts() { + return [ + { + type: String, + name: 'mode', + description: 'The integration test to run' + }, + { + type: String, + name: 'arrow', alias: 'a', + multiple: true, defaultValue: [], + description: 'The Arrow file[s] to read/write' + }, + { + type: String, + name: 'json', alias: 'j', + multiple: true, defaultValue: [], + description: 'The JSON file[s] to read/write' + } + ]; +} + +function print_usage() { + console.log(require('command-line-usage')([ + { + header: 'integration', + content: 'Script for running Arrow integration tests' + }, + { + header: 'Synopsis', + content: [ + '$ integration.js -j file.json -a file.arrow --mode validate' + ] + }, + { + header: 'Options', + optionList: [ + ...cliOpts(), + { + name: 'help', + description: 'Print this usage guide.' + } + ] + }, + ])); + process.exit(1); +} diff --git a/js/bin/json-to-arrow.js b/js/bin/json-to-arrow.js new file mode 100755 index 0000000000000..f28b4145ffaed --- /dev/null +++ b/js/bin/json-to-arrow.js @@ -0,0 +1,99 @@ +#! /usr/bin/env node + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +const fs = require('fs'); +const glob = require('glob'); +const path = require('path'); +const { promisify } = require('util'); +const { parse } = require('json-bignum'); +const argv = require(`command-line-args`)(cliOpts(), { partial: true }); + +const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; +const { Table } = require(`../index${ext}`); + +const encoding = 'binary'; +const stream = argv.format === 'stream'; +const jsonPaths = [...(argv.json || [])]; +const arrowPaths = [...(argv.arrow || [])]; + +if (!jsonPaths.length || !arrowPaths.length || (jsonPaths.length !== arrowPaths.length)) { + return print_usage(); +} + +const readFile = callResolved(promisify(fs.readFile)); +const writeFile = callResolved(promisify(fs.writeFile)); + +(async () => await Promise.all(jsonPaths.map(async (jPath, i) => { + const aPath = arrowPaths[i]; + const arrowTable = Table.from(parse('' + (await readFile(jPath)))); + await writeFile(aPath, arrowTable.serialize(encoding, stream), encoding); +})))().catch((e) => { console.error(e); process.exit(1); }); + +function callResolved(fn) { + return async (path_, ...xs) => await fn(path.resolve(path_), ...xs); +} + +function cliOpts() { + return [ + { + type: String, + name: 'format', alias: 'f', + multiple: false, defaultValue: 'file', + description: 'The Arrow format to write, either "file" or "stream"' + }, + { + type: String, + name: 'arrow', alias: 'a', + multiple: true, defaultValue: [], + description: 'The Arrow file[s] to write' + }, + { + type: String, + name: 'json', alias: 'j', + multiple: true, defaultValue: [], + description: 'The JSON file[s] to read' + } + ]; +} + +function print_usage() { + console.log(require('command-line-usage')([ + { + header: 'json-to-arrow', + content: 'Script for converting an JSON Arrow file to a binary Arrow file' + }, + { + header: 'Synopsis', + content: [ + '$ json-to-arrow.js -j in.json -a out.arrow -f stream' + ] + }, + { + header: 'Options', + optionList: [ + ...cliOpts(), + { + name: 'help', + description: 'Print this usage guide.' + } + ] + }, + ])); + process.exit(1); +} diff --git a/js/bin/print-buffer-alignment.js b/js/bin/print-buffer-alignment.js new file mode 100755 index 0000000000000..a4cd9bb2351e7 --- /dev/null +++ b/js/bin/print-buffer-alignment.js @@ -0,0 +1,50 @@ +#! /usr/bin/env node + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +const fs = require('fs'); +const path = require('path'); + +const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; +const base = process.env.ARROW_JS_DEBUG === 'src' ? '../src' : '../targets/apache-arrow'; +const { Message } = require(`${base}/ipc/metadata${ext}`); +const { readBuffersAsync } = require(`${base}/ipc/reader/binary${ext}`); +const { Table, VectorVisitor, fromReadableStream } = require(`../index${ext}`); + +(async () => { + const in_ = process.argv.length < 3 + ? process.stdin : fs.createReadStream(path.resolve(process.argv[2])); + + let recordBatchIndex = 0; + let dictionaryBatchIndex = 0; + + for await (let { message, loader } of readBuffersAsync(fromReadableStream(in_))) { + + if (Message.isRecordBatch(message)) { + console.log(`record batch ${++recordBatchIndex}, offset ${loader.messageOffset}`); + } else if (Message.isDictionaryBatch(message)) { + message = message.data; + console.log(`dictionary batch ${++dictionaryBatchIndex}, offset ${loader.messageOffset}`); + } else { continue; } + + message.buffers.forEach(({offset, length}, i) => { + console.log(`\tbuffer ${i+1}: { offset: ${offset}, length: ${length} }`); + }); + } + +})().catch((e) => { console.error(e); process.exit(1); }); diff --git a/js/bin/stream-to-file.js b/js/bin/stream-to-file.js new file mode 100755 index 0000000000000..f33646ac61a41 --- /dev/null +++ b/js/bin/stream-to-file.js @@ -0,0 +1,37 @@ +#! /usr/bin/env node + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +const fs = require('fs'); +const path = require('path'); + +const encoding = 'binary'; +const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; +const { util: { PipeIterator } } = require(`../index${ext}`); +const { Table, serializeFile, fromReadableStream } = require(`../index${ext}`); + +(async () => { + // Todo (ptaylor): implement `serializeFileAsync` that accepts an + // AsyncIterable, rather than aggregating into a Table first + const in_ = process.argv.length < 3 + ? process.stdin : fs.createReadStream(path.resolve(process.argv[2])); + const out = process.argv.length < 4 + ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3])); + new PipeIterator(serializeFile(await Table.fromAsync(fromReadableStream(in_))), encoding).pipe(out); + +})().catch((e) => { console.error(e); process.exit(1); }); diff --git a/js/gulp/argv.js b/js/gulp/argv.js index 8a83820c1fe59..7dceb0f74c587 100644 --- a/js/gulp/argv.js +++ b/js/gulp/argv.js @@ -35,10 +35,14 @@ const argv = require(`command-line-args`)([ const { targets, modules } = argv; -argv.target && !targets.length && targets.push(argv.target); -argv.module && !modules.length && modules.push(argv.module); -(argv.all || !targets.length) && targets.push(`all`); -(argv.all || !modules.length) && modules.push(`all`); +if (argv.target === `src`) { + argv.target && !targets.length && targets.push(argv.target); +} else { + argv.target && !targets.length && targets.push(argv.target); + argv.module && !modules.length && modules.push(argv.module); + (argv.all || !targets.length) && targets.push(`all`); + (argv.all || !modules.length) && modules.push(`all`); +} if (argv.coverage && (!argv.json_files || !argv.json_files.length)) { diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js index eb83a6da31028..95fc1eed0f84e 100644 --- a/js/gulp/arrow-task.js +++ b/js/gulp/arrow-task.js @@ -20,10 +20,13 @@ const { targetDir, observableFromStreams } = require('./util'); +const del = require('del'); const gulp = require('gulp'); const path = require('path'); +const { promisify } = require('util'); const gulpRename = require(`gulp-rename`); const { memoizeTask } = require('./memoize-task'); +const exec = promisify(require('child_process').exec); const { Observable, ReplaySubject } = require('rxjs'); const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target, format) { @@ -48,8 +51,11 @@ const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target, forma ).publish(new ReplaySubject()).refCount(); }))({}); -const arrowTSTask = ((cache) => memoizeTask(cache, function copyTS(target, format) { - return observableFromStreams(gulp.src(`src/**/*.ts`), gulp.dest(targetDir(target, format))); +const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target, format) { + const out = targetDir(target, format); + await exec(`mkdirp ${out}`); + await exec(`shx cp -r src/* ${out}`); + await del(`${out}/**/*.js`); }))({}); diff --git a/js/gulp/build-task.js b/js/gulp/build-task.js index 01152e662fcec..5eabb825c231c 100644 --- a/js/gulp/build-task.js +++ b/js/gulp/build-task.js @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +const { Observable } = require('rxjs'); const { npmPkgName } = require('./util'); const { memoizeTask } = require('./memoize-task'); @@ -24,7 +25,8 @@ const typescriptTask = require('./typescript-task'); const { arrowTask, arrowTSTask } = require('./arrow-task'); const buildTask = ((cache) => memoizeTask(cache, function build(target, format, ...args) { - return target === npmPkgName ? arrowTask(target, format, ...args)() + return target === `src` ? Observable.empty() + : target === npmPkgName ? arrowTask(target, format, ...args)() : target === `ts` ? arrowTSTask(target, format, ...args)() : format === `umd` ? target === `es5` ? closureTask(target, format, ...args)() : uglifyTask(target, format, ...args)() diff --git a/js/gulp/closure-task.js b/js/gulp/closure-task.js index 8833c2c2228ff..547e760a7fa8a 100644 --- a/js/gulp/closure-task.js +++ b/js/gulp/closure-task.js @@ -28,8 +28,6 @@ const path = require('path'); const sourcemaps = require('gulp-sourcemaps'); const { memoizeTask } = require('./memoize-task'); const { compileBinFiles } = require('./typescript-task'); -const ASTBuilders = require('ast-types').builders; -const transformAST = require('gulp-transform-js-ast'); const { Observable, ReplaySubject } = require('rxjs'); const closureCompiler = require('google-closure-compiler').gulp(); @@ -50,9 +48,6 @@ const closureTask = ((cache) => memoizeTask(cache, function closure(target, form ], { base: `./` }), sourcemaps.init(), closureCompiler(createClosureArgs(entry, externs)), - // Strip out closure compiler's error-throwing iterator-return methods - // see this issue: https://github.com/google/closure-compiler/issues/2728 - transformAST(iteratorReturnVisitor), // rename the sourcemaps from *.js.map files to *.min.js.map sourcemaps.write(`.`, { mapFile: (mapPath) => mapPath.replace(`.js.map`, `.${target}.min.js.map`) }), gulp.dest(out) @@ -105,24 +100,3 @@ const createClosureArgs = (entry, externs) => ({ module.exports = closureTask; module.exports.closureTask = closureTask; - -const iteratorReturnVisitor = { - visitObjectExpression(p) { - const node = p.node, value = p.value; - if (!node.properties || !(node.properties.length === 3)) { return value; } - if (!propertyIsThrowingIteratorReturn(node.properties[2])) { return value; } - value.properties = value.properties.slice(0, 2); - return value; - } -}; - -function propertyIsThrowingIteratorReturn(p) { - if (!p || !(p.kind === 'init')) { return false; } - if (!p.key || !(p.key.type === 'Identifier') || !(p.key.name === 'return')) { return false; } - if (!p.value || !(p.value.type === 'FunctionExpression') || !p.value.params || !(p.value.params.length === 0)) { return false; } - if (!p.value.body || !p.value.body.body || !(p.value.body.body.length === 1) || !(p.value.body.body[0].type === 'ThrowStatement')) { return false; } - if (!p.value.body.body[0].argument || !(p.value.body.body[0].argument.type === 'CallExpression')) { return false; } - if (!p.value.body.body[0].argument.arguments || !(p.value.body.body[0].argument.arguments.length === 1)) { return false; } - if (!p.value.body.body[0].argument.arguments[0] || !(p.value.body.body[0].argument.arguments[0].type === 'Literal')) { return false; } - return p.value.body.body[0].argument.arguments[0].value === 'Not yet implemented'; -} \ No newline at end of file diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js index 3390da01923a8..8c0f8fb0e4767 100644 --- a/js/gulp/package-task.js +++ b/js/gulp/package-task.js @@ -27,6 +27,7 @@ const { Observable, ReplaySubject } = require('rxjs'); const gulpJsonTransform = require('gulp-json-transform'); const packageTask = ((cache) => memoizeTask(cache, function bundle(target, format) { + if (target === `src`) return Observable.empty(); const out = targetDir(target, format); const jsonTransform = gulpJsonTransform(target === npmPkgName ? createMainPackageJson(target, format) : target === `ts` ? createTypeScriptPackageJson(target, format) @@ -43,17 +44,19 @@ module.exports.packageTask = packageTask; const createMainPackageJson = (target, format) => (orig) => ({ ...createTypeScriptPackageJson(target, format)(orig), + bin: orig.bin, name: npmPkgName, main: mainExport, types: `${mainExport}.d.ts`, module: `${mainExport}.mjs`, unpkg: `${mainExport}.es5.min.js`, - [`@std/esm`]: { esm: `mjs`, warnings: false, sourceMap: true } + [`@std/esm`]: { mode: `all`, warnings: false, sourceMap: true } }); const createTypeScriptPackageJson = (target, format) => (orig) => ({ ...createScopedPackageJSON(target, format)(orig), main: `${mainExport}.ts`, types: `${mainExport}.ts`, + bin: undefined, dependencies: { '@types/flatbuffers': '*', '@types/node': '*', @@ -77,6 +80,6 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) => const conditionallyAddStandardESMEntry = (target, format) => (packageJSON) => ( format !== `esm` && format !== `cls` ? packageJSON - : { ...packageJSON, [`@std/esm`]: { esm: `js`, warnings: false, sourceMap: true } } + : { ...packageJSON, [`@std/esm`]: { mode: `js`, warnings: false, sourceMap: true } } ); \ No newline at end of file diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js index 7f655548eb8ef..b0e34f8c94426 100644 --- a/js/gulp/test-task.js +++ b/js/gulp/test-task.js @@ -50,7 +50,7 @@ const testTask = ((cache, execArgv, testOptions) => memoizeTask(cache, function opts.env = { ...opts.env, TEST_TARGET: target, TEST_MODULE: format, - TEST_TS_SOURCE: !!argv.coverage, + TEST_TS_SOURCE: !!argv.coverage || (target === 'src') || (opts.env.TEST_TS_SOURCE === 'true'), JSON_PATHS: JSON.stringify(Array.isArray(argv.json_files) ? argv.json_files : [argv.json_files]), ARROW_PATHS: JSON.stringify(Array.isArray(argv.arrow_files) ? argv.arrow_files : [argv.arrow_files]), }; @@ -80,13 +80,18 @@ const javaFilesDir = path.join(testFilesDir, 'java'); const jsonFilesDir = path.join(testFilesDir, 'json'); async function cleanTestData() { - return await del([`${testFilesDir}/**`, `${snapshotsDir}/**`]); + return await del([ + `${cppFilesDir}/**`, + `${javaFilesDir}/**`, + `${jsonFilesDir}/**`, + `${snapshotsDir}/**` + ]); } async function createTestJSON() { await mkdirp(jsonFilesDir); await exec(`shx cp ${ARROW_INTEGRATION_DIR}/data/*.json ${jsonFilesDir}`); - await exec(`python ${ARROW_INTEGRATION_DIR}/integration_test.py --write_generated_json ${jsonFilesDir}`); + await exec(`python3 ${ARROW_INTEGRATION_DIR}/integration_test.py --write_generated_json ${jsonFilesDir}`); } async function createTestData() { diff --git a/js/gulp/typescript-task.js b/js/gulp/typescript-task.js index 0fdd1c7326650..beffab8a08ce0 100644 --- a/js/gulp/typescript-task.js +++ b/js/gulp/typescript-task.js @@ -33,7 +33,6 @@ const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target const tsconfigPath = path.join(`tsconfig`, `tsconfig.${tsconfigName(target, format)}.json`); return compileTypescript(out, tsconfigPath) .merge(compileBinFiles(target, format)).takeLast(1) - .concat(maybeCopyRawJSArrowFormatFiles(target, format)) .publish(new ReplaySubject()).refCount(); }))({}); @@ -57,18 +56,3 @@ function compileTypescript(out, tsconfigPath) { module.exports = typescriptTask; module.exports.typescriptTask = typescriptTask; module.exports.compileBinFiles = compileBinFiles; - -function maybeCopyRawJSArrowFormatFiles(target, format) { - if (target !== `es5` || format !== `cls`) { - return Observable.empty(); - } - return Observable.defer(async () => { - const outFormatDir = path.join(targetDir(target, format), `fb`); - await del(path.join(outFormatDir, '*.js')); - await observableFromStreams( - gulp.src(path.join(`src`, `fb`, `*_generated.js`)), - gulpRename((p) => { p.basename = p.basename.replace(`_generated`, ``); }), - gulp.dest(outFormatDir) - ).toPromise(); - }); -} diff --git a/js/gulp/util.js b/js/gulp/util.js index f35a447e70830..25bf2c2371602 100644 --- a/js/gulp/util.js +++ b/js/gulp/util.js @@ -27,7 +27,10 @@ const npmOrgName = `@${npmPkgName}`; const releasesRootDir = `targets`; const knownTargets = [`es5`, `es2015`, `esnext`]; const knownModules = [`cjs`, `esm`, `cls`, `umd`]; -const moduleFormatsToSkipCombosOf = { cls: { test: true, integration: true } }; +const tasksToSkipPerTargetOrFormat = { + src: { clean: true, build: true }, + cls: { test: true, integration: true } +}; const packageJSONFields = [ `version`, `license`, `description`, `author`, `homepage`, `repository`, @@ -131,8 +134,14 @@ function* combinations(_targets, _modules) { const targets = known(knownTargets, _targets || [`all`]); const modules = known(knownModules, _modules || [`all`]); - if (_targets[0] === `all` && _modules[0] === `all`) { + if (_targets.indexOf(`src`) > -1) { + yield [`src`, ``]; + return; + } + + if (_targets.indexOf(`all`) > -1 && _modules.indexOf(`all`) > -1) { yield [`ts`, ``]; + yield [`src`, ``]; yield [npmPkgName, ``]; } @@ -143,8 +152,8 @@ function* combinations(_targets, _modules) { } function known(known, values) { - return ~values.indexOf(`all`) - ? known + return ~values.indexOf(`all`) ? known + : ~values.indexOf(`src`) ? [`src`] : Object.keys( values.reduce((map, arg) => (( (known.indexOf(arg) !== -1) && @@ -159,7 +168,7 @@ module.exports = { mainExport, npmPkgName, npmOrgName, metadataFiles, packageJSONFields, - knownTargets, knownModules, moduleFormatsToSkipCombosOf, + knownTargets, knownModules, tasksToSkipPerTargetOrFormat, ESKeywords, gCCLanguageNames, UMDSourceTargets, uglifyLanguageNames, taskName, packageName, tsconfigName, targetDir, combinations, observableFromStreams, diff --git a/js/gulpfile.js b/js/gulpfile.js index 891d6c79b3866..dfebfaeb8bb0e 100644 --- a/js/gulpfile.js +++ b/js/gulpfile.js @@ -29,7 +29,7 @@ const { taskName, combinations, knownTargets, knownModules, npmPkgName, UMDSourceTargets, - moduleFormatsToSkipCombosOf + tasksToSkipPerTargetOrFormat } = require('./gulp/util'); for (const [target, format] of combinations([`all`], [`all`])) { @@ -99,9 +99,8 @@ function getTasks(name) { if (targets.indexOf(`ts`) !== -1) tasks.push(`${name}:ts`); if (targets.indexOf(npmPkgName) !== -1) tasks.push(`${name}:${npmPkgName}`); for (const [target, format] of combinations(targets, modules)) { - if (moduleFormatsToSkipCombosOf[format] && moduleFormatsToSkipCombosOf[format][name]) { - continue; - } + if (tasksToSkipPerTargetOrFormat[target] && tasksToSkipPerTargetOrFormat[target][name]) continue; + if (tasksToSkipPerTargetOrFormat[format] && tasksToSkipPerTargetOrFormat[format][name]) continue; tasks.push(`${name}:${taskName(target, format)}`); } return tasks.length && tasks || [(done) => done()]; diff --git a/js/index.js b/js/index.js new file mode 100644 index 0000000000000..e42cb32d1f968 --- /dev/null +++ b/js/index.js @@ -0,0 +1,18 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +module.exports = require('./targets/apache-arrow'); \ No newline at end of file diff --git a/js/index.mjs b/js/index.mjs new file mode 100644 index 0000000000000..304353712424e --- /dev/null +++ b/js/index.mjs @@ -0,0 +1,18 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +export * from './targets/apache-arrow'; \ No newline at end of file diff --git a/js/index.ts b/js/index.ts new file mode 100644 index 0000000000000..51b8676abbd9d --- /dev/null +++ b/js/index.ts @@ -0,0 +1,18 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +export * from './src/Arrow'; \ No newline at end of file diff --git a/js/package.json b/js/package.json index 2f222e9d3e546..15869394f1ee0 100644 --- a/js/package.json +++ b/js/package.json @@ -2,6 +2,7 @@ "version": "0.3.0", "name": "apache-arrow", "description": "Apache Arrow columnar in-memory format", + "main": "./index", "bin": { "arrow2csv": "bin/arrow2csv.js" }, @@ -53,7 +54,7 @@ ], "dependencies": { "@types/flatbuffers": "1.6.5", - "@types/node": "9.3.0", + "@types/node": "10.0.8", "@types/text-encoding-utf-8": "1.0.1", "command-line-args": "5.0.1", "command-line-usage": "4.1.0", @@ -65,22 +66,20 @@ "devDependencies": { "@std/esm": "0.26.0", "@types/glob": "5.0.35", - "@types/jest": "22.1.0", - "ast-types": "0.10.1", - "babel-jest": "22.4.1", + "@types/jest": "22.2.3", + "babel-jest": "22.4.3", "benchmark": "2.1.4", "coveralls": "3.0.0", "del": "3.0.0", "glob": "7.1.2", - "google-closure-compiler": "20180101.0.0", + "google-closure-compiler": "20180506.0.0", "gulp": "github:gulpjs/gulp#6d71a658c61edb3090221579d8f97dbe086ba2ed", "gulp-json-transform": "0.4.5", "gulp-rename": "1.2.2", "gulp-sourcemaps": "2.6.3", - "gulp-transform-js-ast": "1.0.2", "gulp-typescript": "3.2.4", "ix": "2.3.4", - "jest": "22.1.4", + "jest": "22.4.3", "jest-environment-node-debug": "2.0.0", "json": "9.0.6", "lerna": "2.7.1", @@ -94,7 +93,8 @@ "shx": "0.2.2", "source-map-loader": "0.2.3", "trash": "4.2.1", - "ts-jest": "22.0.1", + "ts-jest": "22.4.6", + "ts-node": "6.0.3", "tslint": "5.9.1", "typedoc": "0.10.0", "typescript": "2.7.1", @@ -113,8 +113,10 @@ }, "jest": { "verbose": false, + "testEnvironment": "node", "globals": { "ts-jest": { + "skipBabel": true, "tsConfigFile": "test/tsconfig.json" } }, diff --git a/js/perf/index.js b/js/perf/index.js index 42cb6abe29cb7..2c07591925328 100644 --- a/js/perf/index.js +++ b/js/perf/index.js @@ -138,7 +138,7 @@ function createGetByIndexTest(vector, name) { function createDataFrameDirectCountTest(table, column, test, value) { let sum, colidx = table.schema.fields.findIndex((c)=>c.name === column); - if (test == 'gteq') { + if (test == 'gt') { op = function () { sum = 0; let batches = table.batches; @@ -195,8 +195,8 @@ function createDataFrameFilterCountTest(table, column, test, value) { let colidx = table.schema.fields.findIndex((c)=> c.name === column); let df; - if (test == 'gteq') { - df = table.filter(col(column).gteq(value)); + if (test == 'gt') { + df = table.filter(col(column).gt(value)); } else if (test == 'eq') { df = table.filter(col(column).eq(value)); } else { diff --git a/js/perf/table_config.js b/js/perf/table_config.js index e3c332c870f38..190908bc32869 100644 --- a/js/perf/table_config.js +++ b/js/perf/table_config.js @@ -27,9 +27,9 @@ countBys = { } counts = { "tracks": [ - {col: 'lat', test: 'gteq', value: 0 }, - {col: 'lng', test: 'gteq', value: 0 }, - {col: 'origin', test: 'eq', value: 'Seattle'}, + {col: 'lat', test: 'gt', value: 0 }, + {col: 'lng', test: 'gt', value: 0 }, + {col: 'origin', test: 'eq', value: 'Seattle'}, ] } diff --git a/js/src/Arrow.externs.js b/js/src/Arrow.externs.js index b1ebd2d3c0ba7..067a61ce1dd0d 100644 --- a/js/src/Arrow.externs.js +++ b/js/src/Arrow.externs.js @@ -65,6 +65,8 @@ Table.prototype.batches; Table.prototype.countBy; /** @type {?} */ Table.prototype.scan; +/** @type {?} */ +Table.prototype.serialize; var CountByResult = function() {}; /** @type {?} */ @@ -122,9 +124,13 @@ Predicate.prototype.not; Predicate.prototype.ands; var Literal = function() {}; -var TableToStringIterator = function() {}; +var PipeIterator = function() {}; +/** @type {?} */ +PipeIterator.prototype.pipe; + +var AsyncPipeIterator = function() {}; /** @type {?} */ -TableToStringIterator.prototype.pipe; +AsyncPipeIterator.prototype.pipe; var RecordBatch = function() {}; /** @type {?} */ @@ -232,6 +238,8 @@ Type.Int = function() {}; /** @type {?} */ Type.Float = function() {}; /** @type {?} */ +Type.FloatingPoint = function() {}; +/** @type {?} */ Type.Binary = function() {}; /** @type {?} */ Type.Utf8 = function() {}; @@ -252,6 +260,8 @@ Type.List = function() {}; /** @type {?} */ Type.Struct = function() {}; /** @type {?} */ +Type.Struct_ = function() {}; +/** @type {?} */ Type.Union = function() {}; /** @type {?} */ Type.FixedSizeBinary = function() {}; @@ -539,7 +549,11 @@ var Utf8Vector = function() {}; /** @type {?} */ Utf8Vector.prototype.asBinary; var ListVector = function() {}; +/** @type {?} */ +ListVector.prototype.getChildAt; var FixedSizeListVector = function() {}; +/** @type {?} */ +FixedSizeListVector.prototype.getChildAt; var MapVector = function() {}; /** @type {?} */ MapVector.prototype.asStruct; @@ -613,6 +627,10 @@ ValidityView.prototype.isValid; ValidityView.prototype.toArray; /** @type {?} */ ValidityView.prototype.set; +/** @type {?} */ +ValidityView.prototype.size; +/** @type {?} */ +ValidityView.prototype.getChildAt; var DictionaryView = function() {}; /** @type {?} */ diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts index e4cf97539059a..58ec6aa47aef2 100644 --- a/js/src/Arrow.ts +++ b/js/src/Arrow.ts @@ -20,6 +20,7 @@ import * as data_ from './data'; import * as vector_ from './vector'; import * as util_int_ from './util/int'; import * as util_bit_ from './util/bit'; +import * as util_node from './util/node'; import * as visitor_ from './visitor'; import * as view_ from './vector/view'; import * as predicate_ from './predicate'; @@ -27,7 +28,9 @@ import { Vector } from './vector'; import { RecordBatch } from './recordbatch'; import { Schema, Field, Type } from './type'; import { Table, DataFrame, NextFunc, BindFunc, CountByResult } from './table'; -import { read, readAsync } from './ipc/reader/arrow'; +import { fromReadableStream } from './ipc/reader/node'; +import { read, readAsync, readStream } from './ipc/reader/arrow'; +import { serializeFile, serializeStream } from './ipc/writer/binary'; export import View = vector_.View; export import VectorLike = vector_.VectorLike; @@ -36,7 +39,9 @@ export import IntBitWidth = type_.IntBitWidth; export import TimeBitWidth = type_.TimeBitWidth; export import TypedArrayConstructor = type_.TypedArrayConstructor; -export { read, readAsync }; +export { fromReadableStream }; +export { read, readAsync, readStream }; +export { serializeFile, serializeStream }; export { Table, DataFrame, NextFunc, BindFunc, CountByResult }; export { Field, Schema, RecordBatch, Vector, Type }; @@ -45,6 +50,8 @@ export namespace util { export import Int64 = util_int_.Int64; export import Int128 = util_int_.Int128; export import packBools = util_bit_.packBools; + export import PipeIterator = util_node.PipeIterator; + export import AsyncPipeIterator = util_node.AsyncPipeIterator; } export namespace data { @@ -202,6 +209,11 @@ try { Arrow['read'] = read; Arrow['readAsync'] = readAsync; + Arrow['readStream'] = readStream; + Arrow['fromReadableStream'] = fromReadableStream; + + Arrow['serializeFile'] = serializeFile; + Arrow['serializeStream'] = serializeStream; Arrow['Type'] = Type; Arrow['Field'] = Field; diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts index 6d197c7b6b4f9..510f00740fed0 100644 --- a/js/src/bin/arrow2csv.ts +++ b/js/src/bin/arrow2csv.ts @@ -20,29 +20,65 @@ /* tslint:disable */ import * as fs from 'fs'; -import * as Arrow from '../Arrow'; +import { promisify } from 'util'; +import { Table, readStream } from '../Arrow'; +const readFile = promisify(fs.readFile); const { parse } = require('json-bignum'); -const optionList = [ - { - type: String, - name: 'schema', alias: 's', - optional: true, multiple: true, - typeLabel: '[underline]{columns}', - description: 'A space-delimited list of column names' - }, - { - type: String, - name: 'file', alias: 'f', - optional: false, multiple: true, - description: 'The Arrow file to read' +const argv = require(`command-line-args`)(cliOpts(), { partial: true }); +const files = [...(argv.file || []), ...(argv._unknown || [])].filter(Boolean); + +(async () => { + let hasRecords = false; + if (files.length > 0) { + hasRecords = true; + for (let input of files) { + printTable(await readFile(input)); + } + } else { + let rowOffset = 0; + let maxColumnWidths: number[] = []; + for await (const recordBatch of readStream(process.stdin)) { + hasRecords = true; + recordBatch.rowsToString(' | ', rowOffset, maxColumnWidths).pipe(process.stdout); + rowOffset += recordBatch.length; + } } -]; + return hasRecords ? null : print_usage(); +})().catch((e) => { console.error(e); process.exit(1); }); -const argv = require(`command-line-args`)(optionList, { partial: true }); -const files = [...argv.file, ...(argv._unknown || [])].filter(Boolean); +function printTable(input: any) { + let table: Table; + try { + table = Table.from(input); + } catch (e) { + table = Table.from(parse(input + '')); + } + if (argv.schema && argv.schema.length) { + table = table.select(...argv.schema); + } + table.rowsToString().pipe(process.stdout); +} + +function cliOpts() { + return [ + { + type: String, + name: 'schema', alias: 's', + optional: true, multiple: true, + typeLabel: '[underline]{columns}', + description: 'A space-delimited list of column names' + }, + { + type: String, + name: 'file', alias: 'f', + optional: false, multiple: true, + description: 'The Arrow file to read' + } + ]; +} -if (!files.length) { +function print_usage() { console.log(require('command-line-usage')([ { header: 'arrow2csv', @@ -60,7 +96,7 @@ if (!files.length) { { header: 'Options', optionList: [ - ...optionList, + ...cliOpts(), { name: 'help', description: 'Print this usage guide.' @@ -81,17 +117,4 @@ if (!files.length) { } ])); process.exit(1); -} - -files.forEach((source) => { - let table: Arrow.Table, input = fs.readFileSync(source); - try { - table = Arrow.Table.from(input); - } catch (e) { - table = Arrow.Table.from(parse(input + '')); - } - if (argv.schema && argv.schema.length) { - table = table.select(...argv.schema); - } - table.rowsToString().pipe(process.stdout); -}); +} \ No newline at end of file diff --git a/js/src/data.ts b/js/src/data.ts index 3bfb3209b696b..963a6a476bafb 100644 --- a/js/src/data.ts +++ b/js/src/data.ts @@ -17,8 +17,8 @@ import { popcnt_bit_range } from './util/bit'; import { VectorLike, Vector } from './vector'; +import { Int, Bool, FlatListType, List, Struct, Map_ } from './type'; import { VectorType, TypedArray, TypedArrayConstructor, Dictionary } from './type'; -import { Int, Bool, FlatListType, List, FixedSizeList, Struct, Map_ } from './type'; import { DataType, FlatType, ListType, NestedType, SingleNestedType, DenseUnion, SparseUnion } from './type'; export function toTypedArray(ArrayType: TypedArrayConstructor, values?: T | ArrayLike | Iterable | null): T { @@ -46,7 +46,7 @@ export interface DataTypes { /* [Type.Struct]*/ 13: NestedData; /* [Type.Union]*/ 14: UnionData; /* [Type.FixedSizeBinary]*/ 15: FlatData; -/* [Type.FixedSizeList]*/ 16: SingleNestedData>; +/* [Type.FixedSizeList]*/ 16: SingleNestedData; /* [Type.Map]*/ 17: NestedData; /* [Type.DenseUnion]*/ DenseUnion: DenseUnionData; /*[Type.SparseUnion]*/ SparseUnion: SparseUnionData; diff --git a/js/src/fb/File_generated.js b/js/src/fb/File_generated.js deleted file mode 100644 index 12aae293ea4eb..0000000000000 --- a/js/src/fb/File_generated.js +++ /dev/null @@ -1,256 +0,0 @@ -import { org } from './Schema'; -// automatically generated by the FlatBuffers compiler, do not modify - -/** - * @const - * @namespace - */ -org.apache = org.apache || {}; - -/** - * @const - * @namespace - */ -org.apache.arrow = org.apache.arrow || {}; - -/** - * @const - * @namespace - */ -org.apache.arrow.flatbuf = org.apache.arrow.flatbuf || {}; - -/** - * ---------------------------------------------------------------------- - * Arrow File metadata - * - * - * @constructor - */ -org.apache.arrow.flatbuf.Footer = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Footer} - */ -org.apache.arrow.flatbuf.Footer.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Footer=} obj - * @returns {org.apache.arrow.flatbuf.Footer} - */ -org.apache.arrow.flatbuf.Footer.getRootAsFooter = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Footer).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @returns {org.apache.arrow.flatbuf.MetadataVersion} - */ -org.apache.arrow.flatbuf.Footer.prototype.version = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? /** @type {org.apache.arrow.flatbuf.MetadataVersion} */ (this.bb.readInt16(this.bb_pos + offset)) : org.apache.arrow.flatbuf.MetadataVersion.V1; -}; - -/** - * @param {org.apache.arrow.flatbuf.Schema=} obj - * @returns {org.apache.arrow.flatbuf.Schema|null} - */ -org.apache.arrow.flatbuf.Footer.prototype.schema = function(obj) { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? (obj || new org.apache.arrow.flatbuf.Schema).__init(this.bb.__indirect(this.bb_pos + offset), this.bb) : null; -}; - -/** - * @param {number} index - * @param {org.apache.arrow.flatbuf.Block=} obj - * @returns {org.apache.arrow.flatbuf.Block} - */ -org.apache.arrow.flatbuf.Footer.prototype.dictionaries = function(index, obj) { - var offset = this.bb.__offset(this.bb_pos, 8); - return offset ? (obj || new org.apache.arrow.flatbuf.Block).__init(this.bb.__vector(this.bb_pos + offset) + index * 24, this.bb) : null; -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.Footer.prototype.dictionariesLength = function() { - var offset = this.bb.__offset(this.bb_pos, 8); - return offset ? this.bb.__vector_len(this.bb_pos + offset) : 0; -}; - -/** - * @param {number} index - * @param {org.apache.arrow.flatbuf.Block=} obj - * @returns {org.apache.arrow.flatbuf.Block} - */ -org.apache.arrow.flatbuf.Footer.prototype.recordBatches = function(index, obj) { - var offset = this.bb.__offset(this.bb_pos, 10); - return offset ? (obj || new org.apache.arrow.flatbuf.Block).__init(this.bb.__vector(this.bb_pos + offset) + index * 24, this.bb) : null; -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.Footer.prototype.recordBatchesLength = function() { - var offset = this.bb.__offset(this.bb_pos, 10); - return offset ? this.bb.__vector_len(this.bb_pos + offset) : 0; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Footer.startFooter = function(builder) { - builder.startObject(4); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.MetadataVersion} version - */ -org.apache.arrow.flatbuf.Footer.addVersion = function(builder, version) { - builder.addFieldInt16(0, version, org.apache.arrow.flatbuf.MetadataVersion.V1); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} schemaOffset - */ -org.apache.arrow.flatbuf.Footer.addSchema = function(builder, schemaOffset) { - builder.addFieldOffset(1, schemaOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} dictionariesOffset - */ -org.apache.arrow.flatbuf.Footer.addDictionaries = function(builder, dictionariesOffset) { - builder.addFieldOffset(2, dictionariesOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} numElems - */ -org.apache.arrow.flatbuf.Footer.startDictionariesVector = function(builder, numElems) { - builder.startVector(24, numElems, 8); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} recordBatchesOffset - */ -org.apache.arrow.flatbuf.Footer.addRecordBatches = function(builder, recordBatchesOffset) { - builder.addFieldOffset(3, recordBatchesOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} numElems - */ -org.apache.arrow.flatbuf.Footer.startRecordBatchesVector = function(builder, numElems) { - builder.startVector(24, numElems, 8); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Footer.endFooter = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} offset - */ -org.apache.arrow.flatbuf.Footer.finishFooterBuffer = function(builder, offset) { - builder.finish(offset); -}; - -/** - * @constructor - */ -org.apache.arrow.flatbuf.Block = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Block} - */ -org.apache.arrow.flatbuf.Block.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * Index to the start of the RecordBlock (note this is past the Message header) - * - * @returns {flatbuffers.Long} - */ -org.apache.arrow.flatbuf.Block.prototype.offset = function() { - return this.bb.readInt64(this.bb_pos); -}; - -/** - * Length of the metadata - * - * @returns {number} - */ -org.apache.arrow.flatbuf.Block.prototype.metaDataLength = function() { - return this.bb.readInt32(this.bb_pos + 8); -}; - -/** - * Length of the data (this is aligned so there can be a gap between this and - * the metatdata). - * - * @returns {flatbuffers.Long} - */ -org.apache.arrow.flatbuf.Block.prototype.bodyLength = function() { - return this.bb.readInt64(this.bb_pos + 16); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Long} offset - * @param {number} metaDataLength - * @param {flatbuffers.Long} bodyLength - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Block.createBlock = function(builder, offset, metaDataLength, bodyLength) { - builder.prep(8, 24); - builder.writeInt64(bodyLength); - builder.pad(4); - builder.writeInt32(metaDataLength); - builder.writeInt64(offset); - return builder.offset(); -}; -export { org }; - diff --git a/js/src/fb/Message_generated.js b/js/src/fb/Message_generated.js deleted file mode 100644 index ef46c98057c9f..0000000000000 --- a/js/src/fb/Message_generated.js +++ /dev/null @@ -1,497 +0,0 @@ -import { org } from './Schema'; -// automatically generated by the FlatBuffers compiler, do not modify - -/** - * @const - * @namespace - */ -org.apache = org.apache || {}; - -/** - * @const - * @namespace - */ -org.apache.arrow = org.apache.arrow || {}; - -/** - * @const - * @namespace - */ -org.apache.arrow.flatbuf = org.apache.arrow.flatbuf || {}; - -/** - * ---------------------------------------------------------------------- - * The root Message type - * This union enables us to easily send different message types without - * redundant storage, and in the future we can easily add new message types. - * - * Arrow implementations do not need to implement all of the message types, - * which may include experimental metadata types. For maximum compatibility, - * it is best to send data using RecordBatch - * - * @enum - */ -org.apache.arrow.flatbuf.MessageHeader = { - NONE: 0, 0: 'NONE', - Schema: 1, 1: 'Schema', - DictionaryBatch: 2, 2: 'DictionaryBatch', - RecordBatch: 3, 3: 'RecordBatch', - Tensor: 4, 4: 'Tensor', -}; - -/** - * ---------------------------------------------------------------------- - * Data structures for describing a table row batch (a collection of - * equal-length Arrow arrays) - * Metadata about a field at some level of a nested type tree (but not - * its children). - * - * For example, a List with values [[1, 2, 3], null, [4], [5, 6], null] - * would have {length: 5, null_count: 2} for its List node, and {length: 6, - * null_count: 0} for its Int16 node, as separate FieldNode structs - * - * @constructor - */ -org.apache.arrow.flatbuf.FieldNode = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.FieldNode} - */ -org.apache.arrow.flatbuf.FieldNode.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * The number of value slots in the Arrow array at this level of a nested - * tree - * - * @returns {flatbuffers.Long} - */ -org.apache.arrow.flatbuf.FieldNode.prototype.length = function() { - return this.bb.readInt64(this.bb_pos); -}; - -/** - * The number of observed nulls. Fields with null_count == 0 may choose not - * to write their physical validity bitmap out as a materialized buffer, - * instead setting the length of the bitmap buffer to 0. - * - * @returns {flatbuffers.Long} - */ -org.apache.arrow.flatbuf.FieldNode.prototype.nullCount = function() { - return this.bb.readInt64(this.bb_pos + 8); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Long} length - * @param {flatbuffers.Long} null_count - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.FieldNode.createFieldNode = function(builder, length, null_count) { - builder.prep(8, 16); - builder.writeInt64(null_count); - builder.writeInt64(length); - return builder.offset(); -}; - -/** - * A data header describing the shared memory layout of a "record" or "row" - * batch. Some systems call this a "row batch" internally and others a "record - * batch". - * - * @constructor - */ -org.apache.arrow.flatbuf.RecordBatch = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.RecordBatch} - */ -org.apache.arrow.flatbuf.RecordBatch.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.RecordBatch=} obj - * @returns {org.apache.arrow.flatbuf.RecordBatch} - */ -org.apache.arrow.flatbuf.RecordBatch.getRootAsRecordBatch = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.RecordBatch).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * number of records / rows. The arrays in the batch should all have this - * length - * - * @returns {flatbuffers.Long} - */ -org.apache.arrow.flatbuf.RecordBatch.prototype.length = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? this.bb.readInt64(this.bb_pos + offset) : this.bb.createLong(0, 0); -}; - -/** - * Nodes correspond to the pre-ordered flattened logical schema - * - * @param {number} index - * @param {org.apache.arrow.flatbuf.FieldNode=} obj - * @returns {org.apache.arrow.flatbuf.FieldNode} - */ -org.apache.arrow.flatbuf.RecordBatch.prototype.nodes = function(index, obj) { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? (obj || new org.apache.arrow.flatbuf.FieldNode).__init(this.bb.__vector(this.bb_pos + offset) + index * 16, this.bb) : null; -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.RecordBatch.prototype.nodesLength = function() { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? this.bb.__vector_len(this.bb_pos + offset) : 0; -}; - -/** - * Buffers correspond to the pre-ordered flattened buffer tree - * - * The number of buffers appended to this list depends on the schema. For - * example, most primitive arrays will have 2 buffers, 1 for the validity - * bitmap and 1 for the values. For struct arrays, there will only be a - * single buffer for the validity (nulls) bitmap - * - * @param {number} index - * @param {org.apache.arrow.flatbuf.Buffer=} obj - * @returns {org.apache.arrow.flatbuf.Buffer} - */ -org.apache.arrow.flatbuf.RecordBatch.prototype.buffers = function(index, obj) { - var offset = this.bb.__offset(this.bb_pos, 8); - return offset ? (obj || new org.apache.arrow.flatbuf.Buffer).__init(this.bb.__vector(this.bb_pos + offset) + index * 16, this.bb) : null; -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.RecordBatch.prototype.buffersLength = function() { - var offset = this.bb.__offset(this.bb_pos, 8); - return offset ? this.bb.__vector_len(this.bb_pos + offset) : 0; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.RecordBatch.startRecordBatch = function(builder) { - builder.startObject(3); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Long} length - */ -org.apache.arrow.flatbuf.RecordBatch.addLength = function(builder, length) { - builder.addFieldInt64(0, length, builder.createLong(0, 0)); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} nodesOffset - */ -org.apache.arrow.flatbuf.RecordBatch.addNodes = function(builder, nodesOffset) { - builder.addFieldOffset(1, nodesOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} numElems - */ -org.apache.arrow.flatbuf.RecordBatch.startNodesVector = function(builder, numElems) { - builder.startVector(16, numElems, 8); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} buffersOffset - */ -org.apache.arrow.flatbuf.RecordBatch.addBuffers = function(builder, buffersOffset) { - builder.addFieldOffset(2, buffersOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} numElems - */ -org.apache.arrow.flatbuf.RecordBatch.startBuffersVector = function(builder, numElems) { - builder.startVector(16, numElems, 8); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.RecordBatch.endRecordBatch = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * For sending dictionary encoding information. Any Field can be - * dictionary-encoded, but in this case none of its children may be - * dictionary-encoded. - * There is one vector / column per dictionary, but that vector / column - * may be spread across multiple dictionary batches by using the isDelta - * flag - * - * @constructor - */ -org.apache.arrow.flatbuf.DictionaryBatch = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.DictionaryBatch} - */ -org.apache.arrow.flatbuf.DictionaryBatch.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.DictionaryBatch=} obj - * @returns {org.apache.arrow.flatbuf.DictionaryBatch} - */ -org.apache.arrow.flatbuf.DictionaryBatch.getRootAsDictionaryBatch = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.DictionaryBatch).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @returns {flatbuffers.Long} - */ -org.apache.arrow.flatbuf.DictionaryBatch.prototype.id = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? this.bb.readInt64(this.bb_pos + offset) : this.bb.createLong(0, 0); -}; - -/** - * @param {org.apache.arrow.flatbuf.RecordBatch=} obj - * @returns {org.apache.arrow.flatbuf.RecordBatch|null} - */ -org.apache.arrow.flatbuf.DictionaryBatch.prototype.data = function(obj) { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? (obj || new org.apache.arrow.flatbuf.RecordBatch).__init(this.bb.__indirect(this.bb_pos + offset), this.bb) : null; -}; - -/** - * If isDelta is true the values in the dictionary are to be appended to a - * dictionary with the indicated id - * - * @returns {boolean} - */ -org.apache.arrow.flatbuf.DictionaryBatch.prototype.isDelta = function() { - var offset = this.bb.__offset(this.bb_pos, 8); - return offset ? !!this.bb.readInt8(this.bb_pos + offset) : false; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.DictionaryBatch.startDictionaryBatch = function(builder) { - builder.startObject(3); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Long} id - */ -org.apache.arrow.flatbuf.DictionaryBatch.addId = function(builder, id) { - builder.addFieldInt64(0, id, builder.createLong(0, 0)); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} dataOffset - */ -org.apache.arrow.flatbuf.DictionaryBatch.addData = function(builder, dataOffset) { - builder.addFieldOffset(1, dataOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {boolean} isDelta - */ -org.apache.arrow.flatbuf.DictionaryBatch.addIsDelta = function(builder, isDelta) { - builder.addFieldInt8(2, +isDelta, +false); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.DictionaryBatch.endDictionaryBatch = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @constructor - */ -org.apache.arrow.flatbuf.Message = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Message} - */ -org.apache.arrow.flatbuf.Message.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Message=} obj - * @returns {org.apache.arrow.flatbuf.Message} - */ -org.apache.arrow.flatbuf.Message.getRootAsMessage = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Message).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @returns {org.apache.arrow.flatbuf.MetadataVersion} - */ -org.apache.arrow.flatbuf.Message.prototype.version = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? /** @type {org.apache.arrow.flatbuf.MetadataVersion} */ (this.bb.readInt16(this.bb_pos + offset)) : org.apache.arrow.flatbuf.MetadataVersion.V1; -}; - -/** - * @returns {org.apache.arrow.flatbuf.MessageHeader} - */ -org.apache.arrow.flatbuf.Message.prototype.headerType = function() { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? /** @type {org.apache.arrow.flatbuf.MessageHeader} */ (this.bb.readUint8(this.bb_pos + offset)) : org.apache.arrow.flatbuf.MessageHeader.NONE; -}; - -/** - * @param {flatbuffers.Table} obj - * @returns {?flatbuffers.Table} - */ -org.apache.arrow.flatbuf.Message.prototype.header = function(obj) { - var offset = this.bb.__offset(this.bb_pos, 8); - return offset ? this.bb.__union(obj, this.bb_pos + offset) : null; -}; - -/** - * @returns {flatbuffers.Long} - */ -org.apache.arrow.flatbuf.Message.prototype.bodyLength = function() { - var offset = this.bb.__offset(this.bb_pos, 10); - return offset ? this.bb.readInt64(this.bb_pos + offset) : this.bb.createLong(0, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Message.startMessage = function(builder) { - builder.startObject(4); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.MetadataVersion} version - */ -org.apache.arrow.flatbuf.Message.addVersion = function(builder, version) { - builder.addFieldInt16(0, version, org.apache.arrow.flatbuf.MetadataVersion.V1); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.MessageHeader} headerType - */ -org.apache.arrow.flatbuf.Message.addHeaderType = function(builder, headerType) { - builder.addFieldInt8(1, headerType, org.apache.arrow.flatbuf.MessageHeader.NONE); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} headerOffset - */ -org.apache.arrow.flatbuf.Message.addHeader = function(builder, headerOffset) { - builder.addFieldOffset(2, headerOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Long} bodyLength - */ -org.apache.arrow.flatbuf.Message.addBodyLength = function(builder, bodyLength) { - builder.addFieldInt64(3, bodyLength, builder.createLong(0, 0)); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Message.endMessage = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} offset - */ -org.apache.arrow.flatbuf.Message.finishMessageBuffer = function(builder, offset) { - builder.finish(offset); -}; -export { org }; - diff --git a/js/src/fb/Schema_generated.js b/js/src/fb/Schema_generated.js deleted file mode 100644 index ebed8a90645c8..0000000000000 --- a/js/src/fb/Schema_generated.js +++ /dev/null @@ -1,2231 +0,0 @@ -// automatically generated by the FlatBuffers compiler, do not modify - -/** - * @const - * @namespace - */ -var org = org || {}; - -/** - * @const - * @namespace - */ -org.apache = org.apache || {}; - -/** - * @const - * @namespace - */ -org.apache.arrow = org.apache.arrow || {}; - -/** - * @const - * @namespace - */ -org.apache.arrow.flatbuf = org.apache.arrow.flatbuf || {}; - -/** - * @enum - */ -org.apache.arrow.flatbuf.MetadataVersion = { - /** - * 0.1.0 - */ - 'V1': 0, 0: 'V1', - - /** - * 0.2.0 - */ - 'V2': 1, 1: 'V2', - - /** - * 0.3.0 -> 0.7.1 - */ - 'V3': 2, 2: 'V3', - - /** - * >= 0.8.0 - */ - 'V4': 3, 3: 'V4' -}; - -/** - * @enum - */ -org.apache.arrow.flatbuf.UnionMode = { - 'Sparse': 0, 0: 'Sparse', - 'Dense': 1, 1: 'Dense', -}; - -/** - * @enum - */ -org.apache.arrow.flatbuf.Precision = { - 'HALF': 0, 0: 'HALF', - 'SINGLE': 1, 1: 'SINGLE', - 'DOUBLE': 2, 2: 'DOUBLE', -}; - -/** - * @enum - */ -org.apache.arrow.flatbuf.DateUnit = { - 'DAY': 0, 0: 'DAY', - 'MILLISECOND': 1, 1: 'MILLISECOND', -}; - -/** - * @enum - */ -org.apache.arrow.flatbuf.TimeUnit = { - 'SECOND': 0, 0: 'SECOND', - 'MILLISECOND': 1, 1: 'MILLISECOND', - 'MICROSECOND': 2, 2: 'MICROSECOND', - 'NANOSECOND': 3, 3: 'NANOSECOND', -}; - -/** - * @enum - */ -org.apache.arrow.flatbuf.IntervalUnit = { - 'YEAR_MONTH': 0, 0: 'YEAR_MONTH', - 'DAY_TIME': 1, 1: 'DAY_TIME', -}; - -/** - * ---------------------------------------------------------------------- - * Top-level Type value, enabling extensible type-specific metadata. We can - * add new logical types to Type without breaking backwards compatibility - * - * @enum - */ -org.apache.arrow.flatbuf.Type = { - 'NONE': 0, 0: 'NONE', - 'Null': 1, 1: 'Null', - 'Int': 2, 2: 'Int', - 'FloatingPoint': 3, 3: 'FloatingPoint', - 'Binary': 4, 4: 'Binary', - 'Utf8': 5, 5: 'Utf8', - 'Bool': 6, 6: 'Bool', - 'Decimal': 7, 7: 'Decimal', - 'Date': 8, 8: 'Date', - 'Time': 9, 9: 'Time', - 'Timestamp': 10, 10: 'Timestamp', - 'Interval': 11, 11: 'Interval', - 'List': 12, 12: 'List', - 'Struct_': 13, 13: 'Struct_', - 'Union': 14, 14: 'Union', - 'FixedSizeBinary': 15, 15: 'FixedSizeBinary', - 'FixedSizeList': 16, 16: 'FixedSizeList', - 'Map': 17, 17: 'Map' -}; - -/** - * ---------------------------------------------------------------------- - * The possible types of a vector - * - * @enum - */ -org.apache.arrow.flatbuf.VectorType = { - /** - * used in List type, Dense Union and variable length primitive types (String, Binary) - */ - 'OFFSET': 0, 0: 'OFFSET', - - /** - * actual data, either wixed width primitive types in slots or variable width delimited by an OFFSET vector - */ - 'DATA': 1, 1: 'DATA', - - /** - * Bit vector indicating if each value is null - */ - 'VALIDITY': 2, 2: 'VALIDITY', - - /** - * Type vector used in Union type - */ - 'TYPE': 3, 3: 'TYPE' -}; - -/** - * ---------------------------------------------------------------------- - * Endianness of the platform producing the data - * - * @enum - */ -org.apache.arrow.flatbuf.Endianness = { - 'Little': 0, 0: 'Little', - 'Big': 1, 1: 'Big', -}; - -/** - * These are stored in the flatbuffer in the Type union below - * - * @constructor - */ -org.apache.arrow.flatbuf.Null = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Null} - */ -org.apache.arrow.flatbuf.Null.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Null=} obj - * @returns {org.apache.arrow.flatbuf.Null} - */ -org.apache.arrow.flatbuf.Null.getRootAsNull = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Null).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Null.startNull = function(builder) { - builder.startObject(0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Null.endNull = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * A Struct_ in the flatbuffer metadata is the same as an Arrow Struct - * (according to the physical memory layout). We used Struct_ here as - * Struct is a reserved word in Flatbuffers - * - * @constructor - */ -org.apache.arrow.flatbuf.Struct_ = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Struct_} - */ -org.apache.arrow.flatbuf.Struct_.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Struct_=} obj - * @returns {org.apache.arrow.flatbuf.Struct_} - */ -org.apache.arrow.flatbuf.Struct_.getRootAsStruct_ = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Struct_).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Struct_.startStruct_ = function(builder) { - builder.startObject(0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Struct_.endStruct_ = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @constructor - */ -org.apache.arrow.flatbuf.List = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.List} - */ -org.apache.arrow.flatbuf.List.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.List=} obj - * @returns {org.apache.arrow.flatbuf.List} - */ -org.apache.arrow.flatbuf.List.getRootAsList = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.List).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.List.startList = function(builder) { - builder.startObject(0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.List.endList = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @constructor - */ -org.apache.arrow.flatbuf.FixedSizeList = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.FixedSizeList} - */ -org.apache.arrow.flatbuf.FixedSizeList.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.FixedSizeList=} obj - * @returns {org.apache.arrow.flatbuf.FixedSizeList} - */ -org.apache.arrow.flatbuf.FixedSizeList.getRootAsFixedSizeList = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.FixedSizeList).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * Number of list items per value - * - * @returns {number} - */ -org.apache.arrow.flatbuf.FixedSizeList.prototype.listSize = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? this.bb.readInt32(this.bb_pos + offset) : 0; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.FixedSizeList.startFixedSizeList = function(builder) { - builder.startObject(1); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} listSize - */ -org.apache.arrow.flatbuf.FixedSizeList.addListSize = function(builder, listSize) { - builder.addFieldInt32(0, listSize, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.FixedSizeList.endFixedSizeList = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * A Map is a logical nested type that is represented as - * - * List> - * - * In this layout, the keys and values are each respectively contiguous. We do - * not constrain the key and value types, so the application is responsible - * for ensuring that the keys are hashable and unique. Whether the keys are sorted - * may be set in the metadata for this field - * - * In a Field with Map type, the Field has a child Struct field, which then - * has two children: key type and the second the value type. The names of the - * child fields may be respectively "entry", "key", and "value", but this is - * not enforced - * - * Map - * - child[0] entry: Struct - * - child[0] key: K - * - child[1] value: V - * - * Neither the "entry" field nor the "key" field may be nullable. - * - * The metadata is structured so that Arrow systems without special handling - * for Map can make Map an alias for List. The "layout" attribute for the Map - * field must have the same contents as a List. - * - * @constructor - */ -org.apache.arrow.flatbuf.Map = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Map} - */ -org.apache.arrow.flatbuf.Map.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Map=} obj - * @returns {org.apache.arrow.flatbuf.Map} - */ -org.apache.arrow.flatbuf.Map.getRootAsMap = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Map).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * Set to true if the keys within each value are sorted - * - * @returns {boolean} - */ -org.apache.arrow.flatbuf.Map.prototype.keysSorted = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? !!this.bb.readInt8(this.bb_pos + offset) : false; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Map.startMap = function(builder) { - builder.startObject(1); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {boolean} keysSorted - */ -org.apache.arrow.flatbuf.Map.addKeysSorted = function(builder, keysSorted) { - builder.addFieldInt8(0, +keysSorted, +false); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Map.endMap = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * A union is a complex type with children in Field - * By default ids in the type vector refer to the offsets in the children - * optionally typeIds provides an indirection between the child offset and the type id - * for each child typeIds[offset] is the id used in the type vector - * - * @constructor - */ -org.apache.arrow.flatbuf.Union = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Union} - */ -org.apache.arrow.flatbuf.Union.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Union=} obj - * @returns {org.apache.arrow.flatbuf.Union} - */ -org.apache.arrow.flatbuf.Union.getRootAsUnion = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Union).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @returns {org.apache.arrow.flatbuf.UnionMode} - */ -org.apache.arrow.flatbuf.Union.prototype.mode = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? /** @type {org.apache.arrow.flatbuf.UnionMode} */ (this.bb.readInt16(this.bb_pos + offset)) : org.apache.arrow.flatbuf.UnionMode.Sparse; -}; - -/** - * @param {number} index - * @returns {number} - */ -org.apache.arrow.flatbuf.Union.prototype.typeIds = function(index) { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? this.bb.readInt32(this.bb.__vector(this.bb_pos + offset) + index * 4) : 0; -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.Union.prototype.typeIdsLength = function() { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? this.bb.__vector_len(this.bb_pos + offset) : 0; -}; - -/** - * @returns {Int32Array} - */ -org.apache.arrow.flatbuf.Union.prototype.typeIdsArray = function() { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? new Int32Array(this.bb.bytes().buffer, this.bb.bytes().byteOffset + this.bb.__vector(this.bb_pos + offset), this.bb.__vector_len(this.bb_pos + offset)) : null; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Union.startUnion = function(builder) { - builder.startObject(2); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.UnionMode} mode - */ -org.apache.arrow.flatbuf.Union.addMode = function(builder, mode) { - builder.addFieldInt16(0, mode, org.apache.arrow.flatbuf.UnionMode.Sparse); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} typeIdsOffset - */ -org.apache.arrow.flatbuf.Union.addTypeIds = function(builder, typeIdsOffset) { - builder.addFieldOffset(1, typeIdsOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {Array.} data - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Union.createTypeIdsVector = function(builder, data) { - builder.startVector(4, data.length, 4); - for (var i = data.length - 1; i >= 0; i--) { - builder.addInt32(data[i]); - } - return builder.endVector(); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} numElems - */ -org.apache.arrow.flatbuf.Union.startTypeIdsVector = function(builder, numElems) { - builder.startVector(4, numElems, 4); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Union.endUnion = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @constructor - */ -org.apache.arrow.flatbuf.Int = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Int} - */ -org.apache.arrow.flatbuf.Int.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Int=} obj - * @returns {org.apache.arrow.flatbuf.Int} - */ -org.apache.arrow.flatbuf.Int.getRootAsInt = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Int).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.Int.prototype.bitWidth = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? this.bb.readInt32(this.bb_pos + offset) : 0; -}; - -/** - * @returns {boolean} - */ -org.apache.arrow.flatbuf.Int.prototype.isSigned = function() { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? !!this.bb.readInt8(this.bb_pos + offset) : false; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Int.startInt = function(builder) { - builder.startObject(2); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} bitWidth - */ -org.apache.arrow.flatbuf.Int.addBitWidth = function(builder, bitWidth) { - builder.addFieldInt32(0, bitWidth, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {boolean} isSigned - */ -org.apache.arrow.flatbuf.Int.addIsSigned = function(builder, isSigned) { - builder.addFieldInt8(1, +isSigned, +false); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Int.endInt = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @constructor - */ -org.apache.arrow.flatbuf.FloatingPoint = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.FloatingPoint} - */ -org.apache.arrow.flatbuf.FloatingPoint.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.FloatingPoint=} obj - * @returns {org.apache.arrow.flatbuf.FloatingPoint} - */ -org.apache.arrow.flatbuf.FloatingPoint.getRootAsFloatingPoint = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.FloatingPoint).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @returns {org.apache.arrow.flatbuf.Precision} - */ -org.apache.arrow.flatbuf.FloatingPoint.prototype.precision = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? /** @type {org.apache.arrow.flatbuf.Precision} */ (this.bb.readInt16(this.bb_pos + offset)) : org.apache.arrow.flatbuf.Precision.HALF; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.FloatingPoint.startFloatingPoint = function(builder) { - builder.startObject(1); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.Precision} precision - */ -org.apache.arrow.flatbuf.FloatingPoint.addPrecision = function(builder, precision) { - builder.addFieldInt16(0, precision, org.apache.arrow.flatbuf.Precision.HALF); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.FloatingPoint.endFloatingPoint = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * Unicode with UTF-8 encoding - * - * @constructor - */ -org.apache.arrow.flatbuf.Utf8 = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Utf8} - */ -org.apache.arrow.flatbuf.Utf8.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Utf8=} obj - * @returns {org.apache.arrow.flatbuf.Utf8} - */ -org.apache.arrow.flatbuf.Utf8.getRootAsUtf8 = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Utf8).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Utf8.startUtf8 = function(builder) { - builder.startObject(0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Utf8.endUtf8 = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @constructor - */ -org.apache.arrow.flatbuf.Binary = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Binary} - */ -org.apache.arrow.flatbuf.Binary.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Binary=} obj - * @returns {org.apache.arrow.flatbuf.Binary} - */ -org.apache.arrow.flatbuf.Binary.getRootAsBinary = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Binary).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Binary.startBinary = function(builder) { - builder.startObject(0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Binary.endBinary = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @constructor - */ -org.apache.arrow.flatbuf.FixedSizeBinary = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.FixedSizeBinary} - */ -org.apache.arrow.flatbuf.FixedSizeBinary.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.FixedSizeBinary=} obj - * @returns {org.apache.arrow.flatbuf.FixedSizeBinary} - */ -org.apache.arrow.flatbuf.FixedSizeBinary.getRootAsFixedSizeBinary = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.FixedSizeBinary).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * Number of bytes per value - * - * @returns {number} - */ -org.apache.arrow.flatbuf.FixedSizeBinary.prototype.byteWidth = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? this.bb.readInt32(this.bb_pos + offset) : 0; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.FixedSizeBinary.startFixedSizeBinary = function(builder) { - builder.startObject(1); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} byteWidth - */ -org.apache.arrow.flatbuf.FixedSizeBinary.addByteWidth = function(builder, byteWidth) { - builder.addFieldInt32(0, byteWidth, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.FixedSizeBinary.endFixedSizeBinary = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @constructor - */ -org.apache.arrow.flatbuf.Bool = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Bool} - */ -org.apache.arrow.flatbuf.Bool.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Bool=} obj - * @returns {org.apache.arrow.flatbuf.Bool} - */ -org.apache.arrow.flatbuf.Bool.getRootAsBool = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Bool).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Bool.startBool = function(builder) { - builder.startObject(0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Bool.endBool = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @constructor - */ -org.apache.arrow.flatbuf.Decimal = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Decimal} - */ -org.apache.arrow.flatbuf.Decimal.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Decimal=} obj - * @returns {org.apache.arrow.flatbuf.Decimal} - */ -org.apache.arrow.flatbuf.Decimal.getRootAsDecimal = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Decimal).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * Total number of decimal digits - * - * @returns {number} - */ -org.apache.arrow.flatbuf.Decimal.prototype.precision = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? this.bb.readInt32(this.bb_pos + offset) : 0; -}; - -/** - * Number of digits after the decimal point "." - * - * @returns {number} - */ -org.apache.arrow.flatbuf.Decimal.prototype.scale = function() { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? this.bb.readInt32(this.bb_pos + offset) : 0; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Decimal.startDecimal = function(builder) { - builder.startObject(2); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} precision - */ -org.apache.arrow.flatbuf.Decimal.addPrecision = function(builder, precision) { - builder.addFieldInt32(0, precision, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} scale - */ -org.apache.arrow.flatbuf.Decimal.addScale = function(builder, scale) { - builder.addFieldInt32(1, scale, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Decimal.endDecimal = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX - * epoch (1970-01-01), stored in either of two units: - * - * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no - * leap seconds), where the values are evenly divisible by 86400000 - * * Days (32 bits) since the UNIX epoch - * - * @constructor - */ -org.apache.arrow.flatbuf.Date = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Date} - */ -org.apache.arrow.flatbuf.Date.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Date=} obj - * @returns {org.apache.arrow.flatbuf.Date} - */ -org.apache.arrow.flatbuf.Date.getRootAsDate = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Date).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @returns {org.apache.arrow.flatbuf.DateUnit} - */ -org.apache.arrow.flatbuf.Date.prototype.unit = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? /** @type {org.apache.arrow.flatbuf.DateUnit} */ (this.bb.readInt16(this.bb_pos + offset)) : org.apache.arrow.flatbuf.DateUnit.MILLISECOND; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Date.startDate = function(builder) { - builder.startObject(1); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.DateUnit} unit - */ -org.apache.arrow.flatbuf.Date.addUnit = function(builder, unit) { - builder.addFieldInt16(0, unit, org.apache.arrow.flatbuf.DateUnit.MILLISECOND); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Date.endDate = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * Time type. The physical storage type depends on the unit - * - SECOND and MILLISECOND: 32 bits - * - MICROSECOND and NANOSECOND: 64 bits - * - * @constructor - */ -org.apache.arrow.flatbuf.Time = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Time} - */ -org.apache.arrow.flatbuf.Time.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Time=} obj - * @returns {org.apache.arrow.flatbuf.Time} - */ -org.apache.arrow.flatbuf.Time.getRootAsTime = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Time).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @returns {org.apache.arrow.flatbuf.TimeUnit} - */ -org.apache.arrow.flatbuf.Time.prototype.unit = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? /** @type {org.apache.arrow.flatbuf.TimeUnit} */ (this.bb.readInt16(this.bb_pos + offset)) : org.apache.arrow.flatbuf.TimeUnit.MILLISECOND; -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.Time.prototype.bitWidth = function() { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? this.bb.readInt32(this.bb_pos + offset) : 32; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Time.startTime = function(builder) { - builder.startObject(2); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.TimeUnit} unit - */ -org.apache.arrow.flatbuf.Time.addUnit = function(builder, unit) { - builder.addFieldInt16(0, unit, org.apache.arrow.flatbuf.TimeUnit.MILLISECOND); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} bitWidth - */ -org.apache.arrow.flatbuf.Time.addBitWidth = function(builder, bitWidth) { - builder.addFieldInt32(1, bitWidth, 32); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Time.endTime = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding - * leap seconds, as a 64-bit integer. Note that UNIX time does not include - * leap seconds. - * - * The Timestamp metadata supports both "time zone naive" and "time zone - * aware" timestamps. Read about the timezone attribute for more detail - * - * @constructor - */ -org.apache.arrow.flatbuf.Timestamp = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Timestamp} - */ -org.apache.arrow.flatbuf.Timestamp.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Timestamp=} obj - * @returns {org.apache.arrow.flatbuf.Timestamp} - */ -org.apache.arrow.flatbuf.Timestamp.getRootAsTimestamp = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Timestamp).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @returns {org.apache.arrow.flatbuf.TimeUnit} - */ -org.apache.arrow.flatbuf.Timestamp.prototype.unit = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? /** @type {org.apache.arrow.flatbuf.TimeUnit} */ (this.bb.readInt16(this.bb_pos + offset)) : org.apache.arrow.flatbuf.TimeUnit.SECOND; -}; - -/** - * The time zone is a string indicating the name of a time zone, one of: - * - * * As used in the Olson time zone database (the "tz database" or - * "tzdata"), such as "America/New_York" - * * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30 - * - * Whether a timezone string is present indicates different semantics about - * the data: - * - * * If the time zone is null or equal to an empty string, the data is "time - * zone naive" and shall be displayed *as is* to the user, not localized - * to the locale of the user. This data can be though of as UTC but - * without having "UTC" as the time zone, it is not considered to be - * localized to any time zone - * - * * If the time zone is set to a valid value, values can be displayed as - * "localized" to that time zone, even though the underlying 64-bit - * integers are identical to the same data stored in UTC. Converting - * between time zones is a metadata-only operation and does not change the - * underlying values - * - * @param {flatbuffers.Encoding=} optionalEncoding - * @returns {string|Uint8Array|null} - */ -org.apache.arrow.flatbuf.Timestamp.prototype.timezone = function(optionalEncoding) { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? this.bb.__string(this.bb_pos + offset, optionalEncoding) : null; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Timestamp.startTimestamp = function(builder) { - builder.startObject(2); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.TimeUnit} unit - */ -org.apache.arrow.flatbuf.Timestamp.addUnit = function(builder, unit) { - builder.addFieldInt16(0, unit, org.apache.arrow.flatbuf.TimeUnit.SECOND); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} timezoneOffset - */ -org.apache.arrow.flatbuf.Timestamp.addTimezone = function(builder, timezoneOffset) { - builder.addFieldOffset(1, timezoneOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Timestamp.endTimestamp = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @constructor - */ -org.apache.arrow.flatbuf.Interval = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Interval} - */ -org.apache.arrow.flatbuf.Interval.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Interval=} obj - * @returns {org.apache.arrow.flatbuf.Interval} - */ -org.apache.arrow.flatbuf.Interval.getRootAsInterval = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Interval).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @returns {org.apache.arrow.flatbuf.IntervalUnit} - */ -org.apache.arrow.flatbuf.Interval.prototype.unit = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? /** @type {org.apache.arrow.flatbuf.IntervalUnit} */ (this.bb.readInt16(this.bb_pos + offset)) : org.apache.arrow.flatbuf.IntervalUnit.YEAR_MONTH; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Interval.startInterval = function(builder) { - builder.startObject(1); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.IntervalUnit} unit - */ -org.apache.arrow.flatbuf.Interval.addUnit = function(builder, unit) { - builder.addFieldInt16(0, unit, org.apache.arrow.flatbuf.IntervalUnit.YEAR_MONTH); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Interval.endInterval = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * ---------------------------------------------------------------------- - * represents the physical layout of a buffer - * buffers have fixed width slots of a given type - * - * @constructor - */ -org.apache.arrow.flatbuf.VectorLayout = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.VectorLayout} - */ -org.apache.arrow.flatbuf.VectorLayout.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.VectorLayout=} obj - * @returns {org.apache.arrow.flatbuf.VectorLayout} - */ -org.apache.arrow.flatbuf.VectorLayout.getRootAsVectorLayout = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.VectorLayout).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * the width of a slot in the buffer (typically 1, 8, 16, 32 or 64) - * - * @returns {number} - */ -org.apache.arrow.flatbuf.VectorLayout.prototype.bitWidth = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? this.bb.readInt16(this.bb_pos + offset) : 0; -}; - -/** - * the purpose of the vector - * - * @returns {org.apache.arrow.flatbuf.VectorType} - */ -org.apache.arrow.flatbuf.VectorLayout.prototype.type = function() { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? /** @type {org.apache.arrow.flatbuf.VectorType} */ (this.bb.readInt16(this.bb_pos + offset)) : org.apache.arrow.flatbuf.VectorType.OFFSET; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.VectorLayout.startVectorLayout = function(builder) { - builder.startObject(2); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} bitWidth - */ -org.apache.arrow.flatbuf.VectorLayout.addBitWidth = function(builder, bitWidth) { - builder.addFieldInt16(0, bitWidth, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.VectorType} type - */ -org.apache.arrow.flatbuf.VectorLayout.addType = function(builder, type) { - builder.addFieldInt16(1, type, org.apache.arrow.flatbuf.VectorType.OFFSET); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.VectorLayout.endVectorLayout = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * ---------------------------------------------------------------------- - * user defined key value pairs to add custom metadata to arrow - * key namespacing is the responsibility of the user - * - * @constructor - */ -org.apache.arrow.flatbuf.KeyValue = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.KeyValue} - */ -org.apache.arrow.flatbuf.KeyValue.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.KeyValue=} obj - * @returns {org.apache.arrow.flatbuf.KeyValue} - */ -org.apache.arrow.flatbuf.KeyValue.getRootAsKeyValue = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.KeyValue).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @param {flatbuffers.Encoding=} optionalEncoding - * @returns {string|Uint8Array|null} - */ -org.apache.arrow.flatbuf.KeyValue.prototype.key = function(optionalEncoding) { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? this.bb.__string(this.bb_pos + offset, optionalEncoding) : null; -}; - -/** - * @param {flatbuffers.Encoding=} optionalEncoding - * @returns {string|Uint8Array|null} - */ -org.apache.arrow.flatbuf.KeyValue.prototype.value = function(optionalEncoding) { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? this.bb.__string(this.bb_pos + offset, optionalEncoding) : null; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.KeyValue.startKeyValue = function(builder) { - builder.startObject(2); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} keyOffset - */ -org.apache.arrow.flatbuf.KeyValue.addKey = function(builder, keyOffset) { - builder.addFieldOffset(0, keyOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} valueOffset - */ -org.apache.arrow.flatbuf.KeyValue.addValue = function(builder, valueOffset) { - builder.addFieldOffset(1, valueOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.KeyValue.endKeyValue = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * ---------------------------------------------------------------------- - * Dictionary encoding metadata - * - * @constructor - */ -org.apache.arrow.flatbuf.DictionaryEncoding = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.DictionaryEncoding} - */ -org.apache.arrow.flatbuf.DictionaryEncoding.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.DictionaryEncoding=} obj - * @returns {org.apache.arrow.flatbuf.DictionaryEncoding} - */ -org.apache.arrow.flatbuf.DictionaryEncoding.getRootAsDictionaryEncoding = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.DictionaryEncoding).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * The known dictionary id in the application where this data is used. In - * the file or streaming formats, the dictionary ids are found in the - * DictionaryBatch messages - * - * @returns {flatbuffers.Long} - */ -org.apache.arrow.flatbuf.DictionaryEncoding.prototype.id = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? this.bb.readInt64(this.bb_pos + offset) : this.bb.createLong(0, 0); -}; - -/** - * The dictionary indices are constrained to be positive integers. If this - * field is null, the indices must be signed int32 - * - * @param {org.apache.arrow.flatbuf.Int=} obj - * @returns {org.apache.arrow.flatbuf.Int|null} - */ -org.apache.arrow.flatbuf.DictionaryEncoding.prototype.indexType = function(obj) { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? (obj || new org.apache.arrow.flatbuf.Int).__init(this.bb.__indirect(this.bb_pos + offset), this.bb) : null; -}; - -/** - * By default, dictionaries are not ordered, or the order does not have - * semantic meaning. In some statistical, applications, dictionary-encoding - * is used to represent ordered categorical data, and we provide a way to - * preserve that metadata here - * - * @returns {boolean} - */ -org.apache.arrow.flatbuf.DictionaryEncoding.prototype.isOrdered = function() { - var offset = this.bb.__offset(this.bb_pos, 8); - return offset ? !!this.bb.readInt8(this.bb_pos + offset) : false; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.DictionaryEncoding.startDictionaryEncoding = function(builder) { - builder.startObject(3); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Long} id - */ -org.apache.arrow.flatbuf.DictionaryEncoding.addId = function(builder, id) { - builder.addFieldInt64(0, id, builder.createLong(0, 0)); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} indexTypeOffset - */ -org.apache.arrow.flatbuf.DictionaryEncoding.addIndexType = function(builder, indexTypeOffset) { - builder.addFieldOffset(1, indexTypeOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {boolean} isOrdered - */ -org.apache.arrow.flatbuf.DictionaryEncoding.addIsOrdered = function(builder, isOrdered) { - builder.addFieldInt8(2, +isOrdered, +false); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.DictionaryEncoding.endDictionaryEncoding = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * ---------------------------------------------------------------------- - * A field represents a named column in a record / row batch or child of a - * nested type. - * - * - children is only for nested Arrow arrays - * - For primitive types, children will have length 0 - * - nullable should default to true in general - * - * @constructor - */ -org.apache.arrow.flatbuf.Field = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Field} - */ -org.apache.arrow.flatbuf.Field.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Field=} obj - * @returns {org.apache.arrow.flatbuf.Field} - */ -org.apache.arrow.flatbuf.Field.getRootAsField = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Field).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * @param {flatbuffers.Encoding=} optionalEncoding - * @returns {string|Uint8Array|null} - */ -org.apache.arrow.flatbuf.Field.prototype.name = function(optionalEncoding) { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? this.bb.__string(this.bb_pos + offset, optionalEncoding) : null; -}; - -/** - * @returns {boolean} - */ -org.apache.arrow.flatbuf.Field.prototype.nullable = function() { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? !!this.bb.readInt8(this.bb_pos + offset) : false; -}; - -/** - * @returns {org.apache.arrow.flatbuf.Type} - */ -org.apache.arrow.flatbuf.Field.prototype.typeType = function() { - var offset = this.bb.__offset(this.bb_pos, 8); - return offset ? /** @type {org.apache.arrow.flatbuf.Type} */ (this.bb.readUint8(this.bb_pos + offset)) : org.apache.arrow.flatbuf.Type.NONE; -}; - -/** - * @param {flatbuffers.Table} obj - * @returns {?flatbuffers.Table} - */ -org.apache.arrow.flatbuf.Field.prototype.type = function(obj) { - var offset = this.bb.__offset(this.bb_pos, 10); - return offset ? this.bb.__union(obj, this.bb_pos + offset) : null; -}; - -/** - * @param {org.apache.arrow.flatbuf.DictionaryEncoding=} obj - * @returns {org.apache.arrow.flatbuf.DictionaryEncoding|null} - */ -org.apache.arrow.flatbuf.Field.prototype.dictionary = function(obj) { - var offset = this.bb.__offset(this.bb_pos, 12); - return offset ? (obj || new org.apache.arrow.flatbuf.DictionaryEncoding).__init(this.bb.__indirect(this.bb_pos + offset), this.bb) : null; -}; - -/** - * @param {number} index - * @param {org.apache.arrow.flatbuf.Field=} obj - * @returns {org.apache.arrow.flatbuf.Field} - */ -org.apache.arrow.flatbuf.Field.prototype.children = function(index, obj) { - var offset = this.bb.__offset(this.bb_pos, 14); - return offset ? (obj || new org.apache.arrow.flatbuf.Field).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos + offset) + index * 4), this.bb) : null; -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.Field.prototype.childrenLength = function() { - var offset = this.bb.__offset(this.bb_pos, 14); - return offset ? this.bb.__vector_len(this.bb_pos + offset) : 0; -}; - -/** - * layout of buffers produced for this type (as derived from the Type) - * does not include children - * each recordbatch will return instances of those Buffers. - * - * @param {number} index - * @param {org.apache.arrow.flatbuf.VectorLayout=} obj - * @returns {org.apache.arrow.flatbuf.VectorLayout} - */ -org.apache.arrow.flatbuf.Field.prototype.layout = function(index, obj) { - var offset = this.bb.__offset(this.bb_pos, 16); - return offset ? (obj || new org.apache.arrow.flatbuf.VectorLayout).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos + offset) + index * 4), this.bb) : null; -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.Field.prototype.layoutLength = function() { - var offset = this.bb.__offset(this.bb_pos, 16); - return offset ? this.bb.__vector_len(this.bb_pos + offset) : 0; -}; - -/** - * @param {number} index - * @param {org.apache.arrow.flatbuf.KeyValue=} obj - * @returns {org.apache.arrow.flatbuf.KeyValue} - */ -org.apache.arrow.flatbuf.Field.prototype.customMetadata = function(index, obj) { - var offset = this.bb.__offset(this.bb_pos, 18); - return offset ? (obj || new org.apache.arrow.flatbuf.KeyValue).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos + offset) + index * 4), this.bb) : null; -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.Field.prototype.customMetadataLength = function() { - var offset = this.bb.__offset(this.bb_pos, 18); - return offset ? this.bb.__vector_len(this.bb_pos + offset) : 0; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Field.startField = function(builder) { - builder.startObject(8); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} nameOffset - */ -org.apache.arrow.flatbuf.Field.addName = function(builder, nameOffset) { - builder.addFieldOffset(0, nameOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {boolean} nullable - */ -org.apache.arrow.flatbuf.Field.addNullable = function(builder, nullable) { - builder.addFieldInt8(1, +nullable, +false); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.Type} typeType - */ -org.apache.arrow.flatbuf.Field.addTypeType = function(builder, typeType) { - builder.addFieldInt8(2, typeType, org.apache.arrow.flatbuf.Type.NONE); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} typeOffset - */ -org.apache.arrow.flatbuf.Field.addType = function(builder, typeOffset) { - builder.addFieldOffset(3, typeOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} dictionaryOffset - */ -org.apache.arrow.flatbuf.Field.addDictionary = function(builder, dictionaryOffset) { - builder.addFieldOffset(4, dictionaryOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} childrenOffset - */ -org.apache.arrow.flatbuf.Field.addChildren = function(builder, childrenOffset) { - builder.addFieldOffset(5, childrenOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {Array.} data - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Field.createChildrenVector = function(builder, data) { - builder.startVector(4, data.length, 4); - for (var i = data.length - 1; i >= 0; i--) { - builder.addOffset(data[i]); - } - return builder.endVector(); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} numElems - */ -org.apache.arrow.flatbuf.Field.startChildrenVector = function(builder, numElems) { - builder.startVector(4, numElems, 4); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} layoutOffset - */ -org.apache.arrow.flatbuf.Field.addLayout = function(builder, layoutOffset) { - builder.addFieldOffset(6, layoutOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {Array.} data - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Field.createLayoutVector = function(builder, data) { - builder.startVector(4, data.length, 4); - for (var i = data.length - 1; i >= 0; i--) { - builder.addOffset(data[i]); - } - return builder.endVector(); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} numElems - */ -org.apache.arrow.flatbuf.Field.startLayoutVector = function(builder, numElems) { - builder.startVector(4, numElems, 4); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} customMetadataOffset - */ -org.apache.arrow.flatbuf.Field.addCustomMetadata = function(builder, customMetadataOffset) { - builder.addFieldOffset(7, customMetadataOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {Array.} data - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Field.createCustomMetadataVector = function(builder, data) { - builder.startVector(4, data.length, 4); - for (var i = data.length - 1; i >= 0; i--) { - builder.addOffset(data[i]); - } - return builder.endVector(); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} numElems - */ -org.apache.arrow.flatbuf.Field.startCustomMetadataVector = function(builder, numElems) { - builder.startVector(4, numElems, 4); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Field.endField = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * ---------------------------------------------------------------------- - * A Buffer represents a single contiguous memory segment - * - * @constructor - */ -org.apache.arrow.flatbuf.Buffer = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Buffer} - */ -org.apache.arrow.flatbuf.Buffer.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * The relative offset into the shared memory page where the bytes for this - * buffer starts - * - * @returns {flatbuffers.Long} - */ -org.apache.arrow.flatbuf.Buffer.prototype.offset = function() { - return this.bb.readInt64(this.bb_pos); -}; - -/** - * The absolute length (in bytes) of the memory buffer. The memory is found - * from offset (inclusive) to offset + length (non-inclusive). - * - * @returns {flatbuffers.Long} - */ -org.apache.arrow.flatbuf.Buffer.prototype.length = function() { - return this.bb.readInt64(this.bb_pos + 8); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Long} offset - * @param {flatbuffers.Long} length - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Buffer.createBuffer = function(builder, offset, length) { - builder.prep(8, 16); - builder.writeInt64(length); - builder.writeInt64(offset); - return builder.offset(); -}; - -/** - * ---------------------------------------------------------------------- - * A Schema describes the columns in a row batch - * - * @constructor - */ -org.apache.arrow.flatbuf.Schema = function() { - /** - * @type {flatbuffers.ByteBuffer} - */ - this.bb = null; - - /** - * @type {number} - */ - this.bb_pos = 0; -}; - -/** - * @param {number} i - * @param {flatbuffers.ByteBuffer} bb - * @returns {org.apache.arrow.flatbuf.Schema} - */ -org.apache.arrow.flatbuf.Schema.prototype.__init = function(i, bb) { - this.bb_pos = i; - this.bb = bb; - return this; -}; - -/** - * @param {flatbuffers.ByteBuffer} bb - * @param {org.apache.arrow.flatbuf.Schema=} obj - * @returns {org.apache.arrow.flatbuf.Schema} - */ -org.apache.arrow.flatbuf.Schema.getRootAsSchema = function(bb, obj) { - return (obj || new org.apache.arrow.flatbuf.Schema).__init(bb.readInt32(bb.position()) + bb.position(), bb); -}; - -/** - * endianness of the buffer - * it is Little Endian by default - * if endianness doesn't match the underlying system then the vectors need to be converted - * - * @returns {org.apache.arrow.flatbuf.Endianness} - */ -org.apache.arrow.flatbuf.Schema.prototype.endianness = function() { - var offset = this.bb.__offset(this.bb_pos, 4); - return offset ? /** @type {org.apache.arrow.flatbuf.Endianness} */ (this.bb.readInt16(this.bb_pos + offset)) : org.apache.arrow.flatbuf.Endianness.Little; -}; - -/** - * @param {number} index - * @param {org.apache.arrow.flatbuf.Field=} obj - * @returns {org.apache.arrow.flatbuf.Field} - */ -org.apache.arrow.flatbuf.Schema.prototype.fields = function(index, obj) { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? (obj || new org.apache.arrow.flatbuf.Field).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos + offset) + index * 4), this.bb) : null; -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.Schema.prototype.fieldsLength = function() { - var offset = this.bb.__offset(this.bb_pos, 6); - return offset ? this.bb.__vector_len(this.bb_pos + offset) : 0; -}; - -/** - * @param {number} index - * @param {org.apache.arrow.flatbuf.KeyValue=} obj - * @returns {org.apache.arrow.flatbuf.KeyValue} - */ -org.apache.arrow.flatbuf.Schema.prototype.customMetadata = function(index, obj) { - var offset = this.bb.__offset(this.bb_pos, 8); - return offset ? (obj || new org.apache.arrow.flatbuf.KeyValue).__init(this.bb.__indirect(this.bb.__vector(this.bb_pos + offset) + index * 4), this.bb) : null; -}; - -/** - * @returns {number} - */ -org.apache.arrow.flatbuf.Schema.prototype.customMetadataLength = function() { - var offset = this.bb.__offset(this.bb_pos, 8); - return offset ? this.bb.__vector_len(this.bb_pos + offset) : 0; -}; - -/** - * @param {flatbuffers.Builder} builder - */ -org.apache.arrow.flatbuf.Schema.startSchema = function(builder) { - builder.startObject(3); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {org.apache.arrow.flatbuf.Endianness} endianness - */ -org.apache.arrow.flatbuf.Schema.addEndianness = function(builder, endianness) { - builder.addFieldInt16(0, endianness, org.apache.arrow.flatbuf.Endianness.Little); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} fieldsOffset - */ -org.apache.arrow.flatbuf.Schema.addFields = function(builder, fieldsOffset) { - builder.addFieldOffset(1, fieldsOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {Array.} data - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Schema.createFieldsVector = function(builder, data) { - builder.startVector(4, data.length, 4); - for (var i = data.length - 1; i >= 0; i--) { - builder.addOffset(data[i]); - } - return builder.endVector(); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} numElems - */ -org.apache.arrow.flatbuf.Schema.startFieldsVector = function(builder, numElems) { - builder.startVector(4, numElems, 4); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} customMetadataOffset - */ -org.apache.arrow.flatbuf.Schema.addCustomMetadata = function(builder, customMetadataOffset) { - builder.addFieldOffset(2, customMetadataOffset, 0); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {Array.} data - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Schema.createCustomMetadataVector = function(builder, data) { - builder.startVector(4, data.length, 4); - for (var i = data.length - 1; i >= 0; i--) { - builder.addOffset(data[i]); - } - return builder.endVector(); -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {number} numElems - */ -org.apache.arrow.flatbuf.Schema.startCustomMetadataVector = function(builder, numElems) { - builder.startVector(4, numElems, 4); -}; - -/** - * @param {flatbuffers.Builder} builder - * @returns {flatbuffers.Offset} - */ -org.apache.arrow.flatbuf.Schema.endSchema = function(builder) { - var offset = builder.endObject(); - return offset; -}; - -/** - * @param {flatbuffers.Builder} builder - * @param {flatbuffers.Offset} offset - */ -org.apache.arrow.flatbuf.Schema.finishSchemaBuffer = function(builder, offset) { - builder.finish(offset); -}; -export { org }; - diff --git a/js/src/ipc/magic.ts b/js/src/ipc/magic.ts new file mode 100644 index 0000000000000..0688d1a2d1e19 --- /dev/null +++ b/js/src/ipc/magic.ts @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { flatbuffers } from 'flatbuffers'; +import ByteBuffer = flatbuffers.ByteBuffer; + +export const PADDING = 4; +export const MAGIC_STR = 'ARROW1'; +export const MAGIC = new Uint8Array(MAGIC_STR.length); + +for (let i = 0; i < MAGIC_STR.length; i += 1 | 0) { + MAGIC[i] = MAGIC_STR.charCodeAt(i); +} + +export function checkForMagicArrowString(buffer: Uint8Array, index = 0) { + for (let i = -1, n = MAGIC.length; ++i < n;) { + if (MAGIC[i] !== buffer[index + i]) { + return false; + } + } + return true; +} + +export function isValidArrowFile(bb: ByteBuffer) { + let fileLength = bb.capacity(), footerLength: number, lengthOffset: number; + if ((fileLength < magicX2AndPadding /* Arrow buffer too small */) || + (!checkForMagicArrowString(bb.bytes(), 0) /* Missing magic start */) || + (!checkForMagicArrowString(bb.bytes(), fileLength - magicLength) /* Missing magic end */) || + (/* Invalid footer length */ + (footerLength = bb.readInt32(lengthOffset = fileLength - magicAndPadding)) < 1 && + (footerLength + lengthOffset > fileLength))) { + return false; + } + return true; +} + +export const magicLength = MAGIC.length; +export const magicAndPadding = magicLength + PADDING; +export const magicX2AndPadding = magicLength * 2 + PADDING; diff --git a/js/src/ipc/metadata.ts b/js/src/ipc/metadata.ts index 88b7e52983b8e..25b94b1dd5577 100644 --- a/js/src/ipc/metadata.ts +++ b/js/src/ipc/metadata.ts @@ -25,7 +25,12 @@ export class Footer { } export class FileBlock { - constructor(public metaDataLength: number, public bodyLength: Long, public offset: Long) {} + public offset: number; + public bodyLength: number; + constructor(public metaDataLength: number, bodyLength: Long | number, offset: Long | number) { + this.offset = typeof offset === 'number' ? offset : offset.low; + this.bodyLength = typeof bodyLength === 'number' ? bodyLength : bodyLength.low; + } } export class Message { @@ -46,8 +51,11 @@ export class RecordBatchMetadata extends Message { public length: number; public nodes: FieldMetadata[]; public buffers: BufferMetadata[]; - constructor(version: MetadataVersion, length: Long | number, nodes: FieldMetadata[], buffers: BufferMetadata[]) { - super(version, buffers.reduce((s, b) => align(s + b.length + (b.offset - s), 8), 0), MessageHeader.RecordBatch); + constructor(version: MetadataVersion, length: Long | number, nodes: FieldMetadata[], buffers: BufferMetadata[], bodyLength?: Long | number) { + if (bodyLength === void(0)) { + bodyLength = buffers.reduce((s, b) => align(s + b.length + (b.offset - s), 8), 0); + } + super(version, bodyLength, MessageHeader.RecordBatch); this.nodes = nodes; this.buffers = buffers; this.length = typeof length === 'number' ? length : length.low; diff --git a/js/src/ipc/reader/arrow.ts b/js/src/ipc/reader/arrow.ts index af535900cbf46..1847c9c2eb628 100644 --- a/js/src/ipc/reader/arrow.ts +++ b/js/src/ipc/reader/arrow.ts @@ -16,6 +16,7 @@ // under the License. import { readJSON } from './json'; +import { fromReadableStream } from './node'; import { RecordBatch } from '../../recordbatch'; import { readBuffers, readBuffersAsync } from './binary'; import { readRecordBatches, readRecordBatchesAsync, TypeDataLoader } from './vector'; @@ -46,3 +47,9 @@ export async function* readAsync(sources: AsyncIterable fileLength))) { + if (!isValidArrowFile(bb)) { return null; } - bb.setPosition(footerOffset - footerLength); + let fileLength = bb.capacity(); + let lengthOffset = fileLength - magicAndPadding; + let footerLength = bb.readInt32(lengthOffset); + bb.setPosition(lengthOffset - footerLength); return footerFromByteBuffer(bb); } function readFileMessages(footer: Footer) { return function* (bb: ByteBuffer) { + let message: RecordBatchMetadata | DictionaryBatch; for (let i = -1, batches = footer.dictionaryBatches, n = batches.length; ++i < n;) { - bb.setPosition(batches[i].offset.low); - yield readMessage(bb, bb.readInt32(bb.position())) as DictionaryBatch; + bb.setPosition(batches[i].offset); + if (message = readMessage(bb, bb.readInt32(bb.position())) as DictionaryBatch) { + yield message; + } } for (let i = -1, batches = footer.recordBatches, n = batches.length; ++i < n;) { - bb.setPosition(batches[i].offset.low); - yield readMessage(bb, bb.readInt32(bb.position())) as RecordBatchMetadata; + bb.setPosition(batches[i].offset); + if (message = readMessage(bb, bb.readInt32(bb.position())) as RecordBatchMetadata) { + yield message; + } } }; } @@ -267,8 +250,8 @@ function messageFromByteBuffer(bb: ByteBuffer) { const m = _Message.getRootAsMessage(bb)!, type = m.headerType(), version = m.version(); switch (type) { case MessageHeader.Schema: return schemaFromMessage(version, m.header(new _Schema())!, new Map()); - case MessageHeader.RecordBatch: return recordBatchFromMessage(version, m.header(new _RecordBatch())!); - case MessageHeader.DictionaryBatch: return dictionaryBatchFromMessage(version, m.header(new _DictionaryBatch())!); + case MessageHeader.RecordBatch: return recordBatchFromMessage(version, m, m.header(new _RecordBatch())!); + case MessageHeader.DictionaryBatch: return dictionaryBatchFromMessage(version, m, m.header(new _DictionaryBatch())!); } return null; // throw new Error(`Unrecognized Message type '${type}'`); @@ -278,12 +261,12 @@ function schemaFromMessage(version: MetadataVersion, s: _Schema, dictionaryField return new Schema(fieldsFromSchema(s, dictionaryFields), customMetadata(s), version, dictionaryFields); } -function recordBatchFromMessage(version: MetadataVersion, b: _RecordBatch) { - return new RecordBatchMetadata(version, b.length(), fieldNodesFromRecordBatch(b), buffersFromRecordBatch(b, version)); +function recordBatchFromMessage(version: MetadataVersion, m: _Message, b: _RecordBatch) { + return new RecordBatchMetadata(version, b.length(), fieldNodesFromRecordBatch(b), buffersFromRecordBatch(b, version), m.bodyLength()); } -function dictionaryBatchFromMessage(version: MetadataVersion, d: _DictionaryBatch) { - return new DictionaryBatch(version, recordBatchFromMessage(version, d.data()!), d.id(), d.isDelta()); +function dictionaryBatchFromMessage(version: MetadataVersion, m: _Message, d: _DictionaryBatch) { + return new DictionaryBatch(version, recordBatchFromMessage(version, m, d.data()!), d.id(), d.isDelta()); } function dictionaryBatchesFromFooter(f: _Footer) { diff --git a/js/src/ipc/reader/node.ts b/js/src/ipc/reader/node.ts new file mode 100644 index 0000000000000..7fbd7bfbbfc2c --- /dev/null +++ b/js/src/ipc/reader/node.ts @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { flatbuffers } from 'flatbuffers'; +import * as Message_ from '../../fb/Message'; +import ByteBuffer = flatbuffers.ByteBuffer; +import _Message = Message_.org.apache.arrow.flatbuf.Message; +import { PADDING, isValidArrowFile, checkForMagicArrowString } from '../magic'; + +export async function* fromReadableStream(stream: NodeJS.ReadableStream) { + + let bb: ByteBuffer; + let bytesRead = 0, bytes = new Uint8Array(0); + let messageLength = 0, message: _Message | null = null; + + for await (let chunk of (stream as any as AsyncIterable)) { + + const grown = new Uint8Array(bytes.byteLength + chunk.length); + + if (typeof chunk !== 'string') { + grown.set(bytes, 0) || grown.set(chunk, bytes.byteLength); + } else { + for (let i = -1, j = bytes.byteLength, n = chunk.length; ++i < n;) { + grown[i + j] = chunk.charCodeAt(i); + } + } + + bytes = grown; + + // If we're reading in an Arrow File, just concatenate the bytes until + // the file is fully read in + if (checkForMagicArrowString(bytes)) { + if (!isValidArrowFile(new ByteBuffer(bytes))) { + continue; + } + return yield bytes; + } + + if (messageLength <= 0) { + messageLength = new DataView(bytes.buffer).getInt32(0, true); + } + + while (messageLength < bytes.byteLength) { + if (!message) { + (bb = new ByteBuffer(bytes)).setPosition(4); + if (message = _Message.getRootAsMessage(bb)) { + messageLength += message.bodyLength().low; + continue; + } + throw new Error(`Invalid message at position ${bytesRead}`); + } + bytesRead += messageLength + PADDING; + yield bytes.subarray(0, messageLength + PADDING); + bytes = bytes.subarray(messageLength + PADDING); + messageLength = bytes.byteLength <= 0 ? 0 : + new DataView(bytes.buffer).getInt32(bytes.byteOffset, true); + message = null; + } + } +} diff --git a/js/src/ipc/reader/vector.ts b/js/src/ipc/reader/vector.ts index b8c4871ebac2f..c4688f5e2b851 100644 --- a/js/src/ipc/reader/vector.ts +++ b/js/src/ipc/reader/vector.ts @@ -126,6 +126,6 @@ export abstract class TypeDataLoader extends TypeVisitor { protected visitUnionType(type: DenseUnion | SparseUnion, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) { return type.mode === UnionMode.Sparse ? new SparseUnionData(type as SparseUnion, length, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.visitFields(type.children), 0, nullCount) : - new DenseUnionData(type as DenseUnion, length, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.readTypeIds(type), this.visitFields(type.children), 0, nullCount); + new DenseUnionData(type as DenseUnion, length, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.readOffsets(type), this.visitFields(type.children), 0, nullCount); } } diff --git a/js/src/ipc/writer/arrow.ts b/js/src/ipc/writer/arrow.ts new file mode 100644 index 0000000000000..4ff82a61d2f8d --- /dev/null +++ b/js/src/ipc/writer/arrow.ts @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Table } from '../../table'; +import { serializeStream, serializeFile } from './binary'; + +export function writeTableBinary(table: Table, stream = true) { + return concatBuffers(stream ? serializeStream(table) : serializeFile(table)); +} + +function concatBuffers(messages: Iterable) { + + let buffers = [], byteLength = 0; + + for (const message of messages) { + buffers.push(message); + byteLength += message.byteLength; + } + + const { buffer } = buffers.reduce(({ buffer, byteOffset }, bytes) => { + buffer.set(bytes, byteOffset); + return { buffer, byteOffset: byteOffset + bytes.byteLength }; + }, { buffer: new Uint8Array(byteLength), byteOffset: 0 }); + + return buffer; +} diff --git a/js/src/ipc/writer/binary.ts b/js/src/ipc/writer/binary.ts new file mode 100644 index 0000000000000..d8b1d7eb2374c --- /dev/null +++ b/js/src/ipc/writer/binary.ts @@ -0,0 +1,705 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Table } from '../../table'; +import { DenseUnionData } from '../../data'; +import { RecordBatch } from '../../recordbatch'; +import { VectorVisitor, TypeVisitor } from '../../visitor'; +import { MAGIC, magicLength, magicAndPadding, PADDING } from '../magic'; +import { align, getBool, packBools, iterateBits } from '../../util/bit'; +import { Vector, UnionVector, DictionaryVector, NestedVector, ListVector } from '../../vector'; +import { BufferMetadata, FieldMetadata, Footer, FileBlock, Message, RecordBatchMetadata, DictionaryBatch } from '../metadata'; +import { + Schema, Field, TypedArray, MetadataVersion, + DataType, + Dictionary, + Null, Int, Float, + Binary, Bool, Utf8, Decimal, + Date_, Time, Timestamp, Interval, + List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, + FlatType, FlatListType, NestedType, UnionMode, SparseUnion, DenseUnion, SingleNestedType, +} from '../../type'; + +export function* serializeStream(table: Table) { + yield serializeMessage(table.schema).buffer; + for (const [id, field] of table.schema.dictionaries) { + const vec = table.getColumn(field.name) as DictionaryVector; + if (vec && vec.dictionary) { + yield serializeDictionaryBatch(vec.dictionary, id).buffer; + } + } + for (const recordBatch of table.batches) { + yield serializeRecordBatch(recordBatch).buffer; + } +} + +export function* serializeFile(table: Table) { + + const recordBatches = []; + const dictionaryBatches = []; + + // First yield the magic string (aligned) + let buffer = new Uint8Array(align(magicLength, 8)); + let metadataLength, byteLength = buffer.byteLength; + buffer.set(MAGIC, 0); + yield buffer; + + // Then yield the schema + ({ metadataLength, buffer } = serializeMessage(table.schema)); + byteLength += buffer.byteLength; + yield buffer; + + for (const [id, field] of table.schema.dictionaries) { + const vec = table.getColumn(field.name) as DictionaryVector; + if (vec && vec.dictionary) { + ({ metadataLength, buffer } = serializeDictionaryBatch(vec.dictionary, id)); + dictionaryBatches.push(new FileBlock(metadataLength, buffer.byteLength, byteLength)); + byteLength += buffer.byteLength; + yield buffer; + } + } + for (const recordBatch of table.batches) { + ({ metadataLength, buffer } = serializeRecordBatch(recordBatch)); + recordBatches.push(new FileBlock(metadataLength, buffer.byteLength, byteLength)); + byteLength += buffer.byteLength; + yield buffer; + } + + // Then yield the footer metadata (not aligned) + ({ metadataLength, buffer } = serializeFooter(new Footer(dictionaryBatches, recordBatches, table.schema))); + yield buffer; + + // Last, yield the footer length + terminating magic arrow string (aligned) + buffer = new Uint8Array(magicAndPadding); + new DataView(buffer.buffer).setInt32(0, metadataLength, platformIsLittleEndian); + buffer.set(MAGIC, buffer.byteLength - magicLength); + yield buffer; +} + +export function serializeRecordBatch(recordBatch: RecordBatch) { + const { byteLength, fieldNodes, buffers, buffersMeta } = new RecordBatchSerializer().visitRecordBatch(recordBatch); + const rbMeta = new RecordBatchMetadata(MetadataVersion.V4, recordBatch.length, fieldNodes, buffersMeta); + const rbData = concatBuffersWithMetadata(byteLength, buffers, buffersMeta); + return serializeMessage(rbMeta, rbData); +} + +export function serializeDictionaryBatch(dictionary: Vector, id: Long | number, isDelta: boolean = false) { + const { byteLength, fieldNodes, buffers, buffersMeta } = new RecordBatchSerializer().visitRecordBatch(RecordBatch.from([dictionary])); + const rbMeta = new RecordBatchMetadata(MetadataVersion.V4, dictionary.length, fieldNodes, buffersMeta); + const dbMeta = new DictionaryBatch(MetadataVersion.V4, rbMeta, id, isDelta); + const rbData = concatBuffersWithMetadata(byteLength, buffers, buffersMeta); + return serializeMessage(dbMeta, rbData); +} + +export function serializeMessage(message: Message, data?: Uint8Array) { + const b = new Builder(); + _Message.finishMessageBuffer(b, writeMessage(b, message)); + // Slice out the buffer that contains the message metadata + const metadataBytes = b.asUint8Array(); + // Reserve 4 bytes for writing the message size at the front. + // Metadata length includes the metadata byteLength + the 4 + // bytes for the length, and rounded up to the nearest 8 bytes. + const metadataLength = align(PADDING + metadataBytes.byteLength, 8); + // + the length of the optional data buffer at the end, padded + const dataByteLength = data ? data.byteLength : 0; + // ensure the entire message is aligned to an 8-byte boundary + const messageBytes = new Uint8Array(align(metadataLength + dataByteLength, 8)); + // Write the metadata length into the first 4 bytes, but subtract the + // bytes we use to hold the length itself. + new DataView(messageBytes.buffer).setInt32(0, metadataLength - PADDING, platformIsLittleEndian); + // Copy the metadata bytes into the message buffer + messageBytes.set(metadataBytes, PADDING); + // Copy the optional data buffer after the metadata bytes + (data && dataByteLength > 0) && messageBytes.set(data, metadataLength); + // if (messageBytes.byteLength % 8 !== 0) { debugger; } + // Return the metadata length because we need to write it into each FileBlock also + return { metadataLength, buffer: messageBytes }; +} + +export function serializeFooter(footer: Footer) { + const b = new Builder(); + _Footer.finishFooterBuffer(b, writeFooter(b, footer)); + // Slice out the buffer that contains the footer metadata + const footerBytes = b.asUint8Array(); + const metadataLength = footerBytes.byteLength; + return { metadataLength, buffer: footerBytes }; +} + +export class RecordBatchSerializer extends VectorVisitor { + public byteLength = 0; + public buffers: TypedArray[] = []; + public fieldNodes: FieldMetadata[] = []; + public buffersMeta: BufferMetadata[] = []; + public visitRecordBatch(recordBatch: RecordBatch) { + this.buffers = []; + this.byteLength = 0; + this.fieldNodes = []; + this.buffersMeta = []; + for (let vector: Vector, index = -1, numCols = recordBatch.numCols; ++index < numCols;) { + if (vector = recordBatch.getChildAt(index)!) { + this.visit(vector); + } + } + return this; + } + public visit(vector: Vector) { + if (!DataType.isDictionary(vector.type)) { + const { data, length, nullCount } = vector; + if (length > 2147483647) { + throw new RangeError('Cannot write arrays larger than 2^31 - 1 in length'); + } + this.fieldNodes.push(new FieldMetadata(length, nullCount)); + this.addBuffer(nullCount <= 0 + ? new Uint8Array(0) // placeholder validity buffer + : this.getTruncatedBitmap(data.offset, length, data.nullBitmap!) + ); + } + return super.visit(vector); + } + public visitNull (_nullz: Vector) { return this; } + public visitBool (vector: Vector) { return this.visitBoolVector(vector); } + public visitInt (vector: Vector) { return this.visitFlatVector(vector); } + public visitFloat (vector: Vector) { return this.visitFlatVector(vector); } + public visitUtf8 (vector: Vector) { return this.visitFlatListVector(vector); } + public visitBinary (vector: Vector) { return this.visitFlatListVector(vector); } + public visitDate (vector: Vector) { return this.visitFlatVector(vector); } + public visitTimestamp (vector: Vector) { return this.visitFlatVector(vector); } + public visitTime (vector: Vector