Skip to content

Commit

Permalink
Faster row transpose
Browse files Browse the repository at this point in the history
  • Loading branch information
platypii committed May 15, 2024
1 parent 9f95eff commit 034e9cd
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 25 deletions.
7 changes: 3 additions & 4 deletions src/column.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { concat } from './utils.js'
* @param {ColumnMetaData} columnMetadata column metadata
* @param {SchemaTree[]} schemaPath schema path for the column
* @param {Compressors} [compressors] custom decompressors
* @returns {ArrayLike<any>} array of values
* @returns {any[]} array of values
*/
export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, schemaPath, compressors) {
/** @type {ArrayLike<any> | undefined} */
Expand All @@ -43,9 +43,8 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
}

// read compressed_page_size bytes starting at offset
const compressedBytes = new Uint8Array(arrayBuffer).subarray(
columnOffset + reader.offset,
columnOffset + reader.offset + header.compressed_page_size
const compressedBytes = new Uint8Array(
arrayBuffer, columnOffset + reader.offset, header.compressed_page_size
)

// parse page data by type
Expand Down
28 changes: 8 additions & 20 deletions src/read.js
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,9 @@ async function readRowGroup(options, rowGroup, groupStart) {
}

/** @type {any[][]} */
const groupData = []
const groupColumnData = []
const promises = []
const maps = new Map()
let outputColumnIndex = 0
// read column data
for (let columnIndex = 0; columnIndex < rowGroup.columns.length; columnIndex++) {
const columnMetadata = rowGroup.columns[columnIndex].meta_data
Expand Down Expand Up @@ -149,7 +148,7 @@ async function readRowGroup(options, rowGroup, groupStart) {
// read column data async
promises.push(buffer.then(arrayBuffer => {
const schemaPath = getSchemaPath(metadata.schema, columnMetadata.path_in_schema)
/** @type {ArrayLike<any> | undefined} */
/** @type {any[] | undefined} */
let columnData = readColumn(
arrayBuffer, bufferOffset, rowGroup, columnMetadata, schemaPath, compressors
)
Expand Down Expand Up @@ -207,25 +206,14 @@ async function readRowGroup(options, rowGroup, groupStart) {
rowStart: groupStart,
rowEnd: groupStart + columnData.length,
})
// add colum data to group data only if onComplete is defined
if (options.onComplete) addColumn(groupData, outputColumnIndex, columnData)
outputColumnIndex++
// save column data only if onComplete is defined
if (options.onComplete) groupColumnData.push(columnData)
}))
}
await Promise.all(promises)
return groupData
}

/**
* Add a column to rows.
*
* @param {any[][]} rows rows to add column data to
* @param {number} columnIndex column index to add
* @param {ArrayLike<any>} columnData column data to add
*/
function addColumn(rows, columnIndex, columnData) {
for (let i = 0; i < columnData.length; i++) {
if (!rows[i]) rows[i] = []
rows[i][columnIndex] = columnData[i]
if (options.onComplete) {
// transpose columns into rows
return groupColumnData[0].map((_, row) => groupColumnData.map(col => col[row]))
}
return []
}
2 changes: 1 addition & 1 deletion src/types.d.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
type Awaitable<T> = T | Promise<T>
export type Awaitable<T> = T | Promise<T>

/**
* File-like object that can read slices of a file asynchronously.
Expand Down

0 comments on commit 034e9cd

Please sign in to comment.