Skip to content
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,17 @@

### Changed

- **Breaking** the `OrderBy` type is now an array of column sorts: `{ column: string; direction: 'ascending' | 'descending' }[]`. If empty, the data is not sorted. If it contains one element, the data is sorted along the column, in the specified direction. If it contains multiple elements, the first column is used to sort, then the second one is used for the rows with the same value, and so on ([#67](https://github.com/hyparam/hightable/pull/67), [#68](https://github.com/hyparam/hightable/pull/68)).
- **Breaking** the `OrderBy` type is now an array of column sorts: `{ column: string; direction: 'ascending' | 'descending' }[]`. If empty, the data is not sorted. If it contains one element, the data is sorted along the column, in the specified direction. If it contains multiple elements, the first column is used to sort, then the second one is used to handle the ties, and so on ([#67](https://github.com/hyparam/hightable/pull/67), [#68](https://github.com/hyparam/hightable/pull/68), [#69](https://github.com/hyparam/hightable/pull/69)).
- **Breaking** the `orderBy` property in `rows` method uses the new `OrderBy` type. If `data.sortable` is `true`, the data frame is able to sort along the columns as described above.
- **Breaking** the `orderBy` property in `HighTable` and `TableHeader` uses the new `OrderBy` type.
- **Breaking** the `onOrderByChange` property in `HighTable` and `TableHeader` that takes the new `OrderBy` argument.
- **Breaking** successive clicks on a column header follow a new behavior: instead of toggling between ascending sort and no sort, it now cycles through ascending, descending, and no sort ([#68](https://github.com/hyparam/hightable/pull/68)).
- **Breaking** click on a column header has a new behavior: it sorts along that column first, and uses the other columns of `orderBy` as secondary sorts. If the column was already the first column, it follows the cycle ascending -> descending -> no sort ([#69](https://github.com/hyparam/hightable/pull/69)).
- **Breaking** the top left cell of the table now handles the checkbox to select all the rows (and the absolutely positioned div is removed). It can affect overriden CSS ([#70](https://github.com/hyparam/hightable/pull/70)).
- **Breaking** all CSS classes have been removed. Use the `className` prop to apply custom styles ([#75](https://github.com/hyparam/hightable/pull/75)).
- changed the format of the keys in local storage when storing the column widths. Each column now has its own key ([#71](https://github.com/hyparam/hightable/pull/71)).
- split the CSS styles into mandatory functional styles and optional theme styles ([#75](https://github.com/hyparam/hightable/pull/75)).
- the selection checkboxes are now disabled while the data is being loaded ([#77](https://github.com/hyparam/hightable/pull/77)).
- sortableDataFrame now supports sorting along multiple columns ([#69](https://github.com/hyparam/hightable/pull/69)).

### Refactored

Expand Down
12 changes: 6 additions & 6 deletions src/components/HighTable/HighTable.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { MouseEvent, useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { DataFrame } from '../../helpers/dataframe.js'
import { PartialRow } from '../../helpers/row.js'
import { Selection, SortIndex, areAllSelected, isSelected, toggleAll, toggleIndexInSelection, toggleRangeInSelection, toggleRangeInTable } from '../../helpers/selection.js'
import { Selection, areAllSelected, isSelected, toggleAll, toggleIndexInSelection, toggleRangeInSelection, toggleRangeInTable } from '../../helpers/selection.js'
import { OrderBy, areEqualOrderBy } from '../../helpers/sort.js'
import { cellStyle } from '../../helpers/width.js'
import { useInputState } from '../../hooks/useInputState.js'
Expand Down Expand Up @@ -93,7 +93,7 @@ export default function HighTable({

// TODO(SL): remove this state and only rely on the data frame for these operations?
// ie. cache the previous sort indexes in the data frame itself
const [sortIndexes, setSortIndexes] = useState<Map<string, SortIndex>>(() => new Map())
const [ranksMap, setRanksMap] = useState<Map<string, Promise<number[]>>>(() => new Map())

// Sorting is disabled if the data is not sortable
const {
Expand Down Expand Up @@ -159,8 +159,8 @@ export default function HighTable({
tableIndex,
orderBy,
data,
sortIndexes,
setSortIndexes,
ranksMap,
setRanksMap,
})
if (requestId === pendingSelectionRequest.current) {
// only update the selection if the request is still the last one
Expand All @@ -170,7 +170,7 @@ export default function HighTable({
return (event: MouseEvent): void => {
void onSelectRowClick(event)
}
}, [onSelectionChange, selection, data, orderBy, sortIndexes])
}, [data, onSelectionChange, orderBy, ranksMap, selection])
const allRowsSelected = useMemo(() => {
if (!selection) return false
const { ranges } = selection
Expand Down Expand Up @@ -198,7 +198,7 @@ export default function HighTable({
// reset the flag, the column widths will be recalculated
setHasCompleteRow(false)
// delete the cached sort indexes
setSortIndexes(new Map())
setRanksMap(new Map())
// if uncontrolled, reset the selection (if controlled, it's the responsibility of the parent to do it)
if (!isSelectionControlled) {
onSelectionChange({ ranges: [], anchor: undefined })
Expand Down
93 changes: 68 additions & 25 deletions src/helpers/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,61 @@ export function getGetColumn(data: DataFrame): GetColumn {
}
}

export async function getColumnIndex({ data, column }: {data: DataFrame, column: string}): Promise<number[]> {
// return the column ranks in ascending order
// we can get the descending order replacing the rank with numRows - rank - 1. It's not exactly the rank of
// the descending order, because the rank is the first, not the last, of the ties. But it's enough for the
// purpose of sorting.
export async function getRanks({ data, column }: {data: DataFrame, column: string}): Promise<number[]> {
if (!data.header.includes(column)) {
throw new Error(`Invalid column: ${column}`)
}
const getColumn = getGetColumn(data)
const values = await getColumn({ column })
return Array.from(values.keys()).sort((a, b) => {
if (values[a] < values[b]) return -1
if (values[a] > values[b]) return 1
const valuesWithIndex = (await getColumn({ column })).map((value, index) => ({ value, index }))
const sortedValuesWithIndex = Array.from(valuesWithIndex).sort(({ value: a }, { value: b }) => {
if (a < b) return -1
if (a > b) return 1
return 0
})
const numRows = sortedValuesWithIndex.length
const ascendingRanks = sortedValuesWithIndex.reduce(({ lastValue, lastRank, ranks }, { value, index }, rank) => {
if (value === lastValue) {
ranks[index] = lastRank
return { ranks, lastValue, lastRank }
} else {
ranks[index] = rank
return { ranks, lastValue: value, lastRank: rank }
}
}, { ranks: Array(numRows).fill(-1), lastValue: undefined, lastRank: 0 }).ranks
return ascendingRanks
}

export function computeDataIndexes(orderBy: { direction: 'ascending' | 'descending', ranks: number[] }[]): number[] {
if (!(0 in orderBy)) {
throw new Error('orderBy should have at least one element')
}
const numRows = orderBy[0].ranks.length
const indexes = Array.from({ length: numRows }, (_, i) => i)
const dataIndexes = indexes.sort((a, b) => {
for (const { direction, ranks } of orderBy) {
const rankA = ranks[a]
const rankB = ranks[b]
if (rankA === undefined || rankB === undefined) {
throw new Error('Invalid ranks')
}
const value = direction === 'ascending' ? 1 : -1
if (rankA < rankB) return -value
if (rankA > rankB) return value
}
return 0
})
// dataIndexes[0] gives the index of the first row in the sorted table
return dataIndexes
}

export function getUnsortedRanks({ data }: { data: DataFrame }): Promise<number[]> {
const { numRows } = data
const ranks = Array.from({ length: numRows }, (_, i) => i)
return Promise.resolve(ranks)
}

/**
Expand All @@ -86,33 +130,32 @@ export async function getColumnIndex({ data, column }: {data: DataFrame, column:
export function sortableDataFrame(data: DataFrame): DataFrame {
if (data.sortable) return data // already sortable

const indexesByColumn = new Map<string, Promise<number[]>>()
// TODO(SL): call addGetColumn() to cache the rows if needed
// TODO(SL): create another type (DataFrameWithRanks?) that provides the cached ranks (and/or the cached data indexes for a given orderBy)

const ranksByColumn = new Map<string, Promise<number[]>>()
return {
...data,
rows({ start, end, orderBy }): AsyncRow[] {
if (orderBy && orderBy.length > 0) {
if (!(0 in orderBy)) {
throw new Error('orderBy should have at least one element')
}
// TODO(SL): support multiple columns
const { column, direction } = orderBy[0]
if (!data.header.includes(column)) {
throw new Error(`Invalid orderBy field: ${column}`)
}
const columnIndexes = indexesByColumn.get(column) ?? getColumnIndex({ data, column })
if (!indexesByColumn.has(column)) {
indexesByColumn.set(column, columnIndexes)
if (orderBy.some(({ column }) => !data.header.includes(column)) ){
throw new Error(`Invalid orderBy field: ${orderBy.map(({ column }) => column).join(', ')}`)
}
const indexesSlice = columnIndexes.then(indexes => {
if (direction === 'ascending') {
return indexes.slice(start, end)
} else {
// descending order
const newStart = data.numRows - end
const newEnd = data.numRows - start
return indexes.slice(newStart, newEnd).reverse()
// TODO(SL): only fetch ranks if needed?
// To get a consistent order in case of ties, we append a fake column orderby, to sort by the ascending indexes of the rows in the last case
const orderByWithDefaultSort = [...orderBy, { column: '', direction: 'ascending' as const }]
const orderByWithRanks = orderByWithDefaultSort.map(async ({ column, direction }) => {
const ranksPromise = ranksByColumn.get(column) ?? (column === '' ? getUnsortedRanks({ data }) : getRanks({ data, column }))
if (!ranksByColumn.has(column)) {
ranksByColumn.set(column, ranksPromise)
}
const ranks = await ranksPromise
return { column, direction, ranks }
})
// We cannot slice directly, because columns can have ties in the borders of the slice
// TODO(SL): avoid sorting along the whole columns, maybe sort only the slice, and expand if needed
const indexes = Promise.all(orderByWithRanks).then(computeDataIndexes)
const indexesSlice = indexes.then(indexes => indexes.slice(start, end))
const rowsSlice = indexesSlice.then(indexes => Promise.all(
// TODO(SL): optimize to fetch groups of rows instead of individual rows?
// if so: maybe the 'reverse' above should be done after fetching the rows
Expand Down
Loading