Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: pileup calculation works when start and end fields are the same #891

Merged
merged 1 commit into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
107 changes: 106 additions & 1 deletion src/core/utils/data-transform.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { filterData, calculateData, aggregateData, splitExon, inferSvType } from './data-transform';
import { filterData, calculateData, aggregateData, splitExon, inferSvType, displace } from './data-transform';
import { scaleLinear } from 'd3-scale';

describe('Data Transformation', () => {
it('Filter', () => {
Expand All @@ -12,6 +13,110 @@ describe('Data Transformation', () => {
expect(filtered.filter(d => d['c'] === 'b')).toHaveLength(0);
expect(filtered.filter(d => d['q'] === 4)).toHaveLength(0);
});
it('Pile', () => {
const data = [
{ s: 2, e: 3 },
{ s: 4, e: 6 },
{ s: 1, e: 2 },
{ s: 1, e: 3 }
];
const scale = scaleLinear().domain([1, 10]).range([1, 1000]);
expect(
displace(
{ type: 'displace', method: 'pile', boundingBox: { startField: 's', endField: 'e' }, newField: 'row' },
data,
scale
)
).toMatchInlineSnapshot(`
[
{
"e": 2,
"row": "0",
"s": 1,
},
{
"e": 3,
"row": "1",
"s": 1,
},
{
"e": 3,
"row": "2",
"s": 2,
},
{
"e": 6,
"row": "0",
"s": 4,
},
]
`);
expect(
displace(
{ type: 'displace', method: 'pile', boundingBox: { startField: 's', endField: 's' }, newField: 'row' },
data,
scale
)
).toMatchInlineSnapshot(`
[
{
"e": 2,
"row": "0",
"s": 1,
},
{
"e": 3,
"row": "1",
"s": 1,
},
{
"e": 3,
"row": "0",
"s": 2,
},
{
"e": 6,
"row": "0",
"s": 4,
},
]
`);
expect(
displace(
{
type: 'displace',
method: 'pile',
boundingBox: { startField: 's', endField: 'e', padding: 1, isPaddingBP: true },
newField: 'row'
},
data,
scale
)
).toMatchInlineSnapshot(`
[
{
"e": 2,
"row": "0",
"s": 1,
},
{
"e": 3,
"row": "1",
"s": 1,
},
{
"e": 3,
"row": "2",
"s": 2,
},
{
"e": 6,
"row": "3",
"s": 4,
},
]
`);
});
it('SV', () => {
const svTypes = inferSvType(
{
Expand Down
133 changes: 49 additions & 84 deletions src/core/utils/data-transform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -228,21 +228,26 @@ export function aggregateCoverage(
return output;
}

/**
* Mark displacement transform furnctions.
* @param t An object that contains data transformation spec.
* @param data An array of objects that contain data.
* @param scale A d3's linear scale that map between data-level values to screen-level values (px).
* @returns
*/
export function displace(
t: DisplaceTransform,
data: Datum[],
scale: d3.ScaleContinuousNumeric<number, number>
): Datum[] {
// Logging.recordTime('displace()');

const { boundingBox, method, newField } = t;
const { startField, endField, groupField } = boundingBox;

let padding = 0; // This is a pixel value.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was a wrong comment statement. This is actually a value in BP

let paddingInBp = 0;
if (boundingBox.padding && scale && !boundingBox.isPaddingBP) {
padding = Math.abs(scale.invert(boundingBox.padding) - scale.invert(0));
paddingInBp = Math.abs(scale.invert(boundingBox.padding) - scale.invert(0));
} else if (boundingBox.padding && boundingBox.isPaddingBP) {
padding = boundingBox.padding;
paddingInBp = boundingBox.padding;
}

// Check whether we have sufficient information.
Expand All @@ -255,90 +260,50 @@ export function displace(
}

if (method === 'pile') {
const oldAlgorithm = false;

if (oldAlgorithm) {
// This will be deprecated soon.
const { maxRows } = t;
const boundingBoxes: { start: number; end: number; row: number }[] = [];

base.sort((a: Datum, b: Datum) => (a[startField] as number) - (b[startField] as number)).forEach(
(d: Datum) => {
const start = (d[startField] as number) - padding;
const end = (d[endField] as number) + padding;

const overlapped = boundingBoxes.filter(
box =>
(box.start === start && end === box.end) ||
(box.start <= start && start < box.end) ||
(box.start < end && end <= box.end) ||
(start < box.start && box.end < end)
);
// This piling algorithm is heavily based on
// https://github.com/higlass/higlass-pileup/blob/8538a34c6d884c28455d6178377ee1ea2c2c90ae/src/bam-fetcher-worker.js#L626
const { maxRows } = t;
const occupiedSpaceInRows: { [group: string]: { start: number; end: number }[] } = {};

// find the lowest non overlapped row
const uniqueRows = [
...Array.from(new Set(boundingBoxes.map(d => d.row))),
Math.max(...boundingBoxes.map(d => d.row)) + 1
];
const overlappedRows = overlapped.map(d => d.row);
const lowestNonOverlappedRow = Math.min(
...uniqueRows.filter(d => overlappedRows.indexOf(d) === -1)
);
const sorted = base.sort((a: Datum, b: Datum) => (a[startField] as number) - (b[startField] as number));

// row index starts from zero
const row: number = overlapped.length === 0 ? 0 : lowestNonOverlappedRow;
sorted.forEach((d: Datum) => {
const start = +d[startField] - paddingInBp;
const end = +d[endField] + paddingInBp;
Comment on lines +271 to +272
Copy link
Member Author

@sehilyi sehilyi May 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are the lines that solve the issue. The d[endField] was not converted to number properly previously.

This line was previously const end = (d[endField] as number) + paddingInBp; which led to assigning a string value to end, e.g., "3145234.102033".


d[newField] = `${maxRows && maxRows <= row ? maxRows - 1 : row}`;
// Create object if none
const group = groupField ? d[groupField] : '__NO_GROUP__';
if (!occupiedSpaceInRows[group]) {
occupiedSpaceInRows[group] = [];
}

boundingBoxes.push({ start, end, row });
// Find a row to place this segment
let rowIndex = occupiedSpaceInRows[group].findIndex(d => {
// Find a space and update the occupancy info.
if (end < d.start) {
d.start = start;
return true;
} else if (d.end < start) {
d.end = end;
return true;
}
);
} else {
// This piling algorithm is heavily based on
// https://github.com/higlass/higlass-pileup/blob/8538a34c6d884c28455d6178377ee1ea2c2c90ae/src/bam-fetcher-worker.js#L626
const { maxRows } = t;
const occupiedSpaceInRows: { [group: string]: { start: number; end: number }[] } = {};

const sorted = base.sort((a: Datum, b: Datum) => (a[startField] as number) - (b[startField] as number));

sorted.forEach((d: Datum) => {
const start = (d[startField] as number) - padding;
const end = (d[endField] as number) + padding;

// Create object if none
const group = groupField ? d[groupField] : '__NO_GROUP__';
if (!occupiedSpaceInRows[group]) {
occupiedSpaceInRows[group] = [];
}

// Find a row to place this segment
let rowIndex = occupiedSpaceInRows[group].findIndex(d => {
// Find a space and update the occupancy info.
if (end < d.start) {
d.start = start;
return true;
} else if (d.end < start) {
d.end = end;
return true;
}
return false;
});
return false;
});

if (rowIndex === -1) {
// We did not find sufficient space from the existing rows, so add a new row.
occupiedSpaceInRows[group].push({ start, end });
rowIndex = occupiedSpaceInRows[group].length - 1;
}
if (rowIndex === -1) {
// We did not find sufficient space from the existing rows, so add a new row.
occupiedSpaceInRows[group].push({ start, end });
rowIndex = occupiedSpaceInRows[group].length - 1;
}

d[newField] = `${maxRows && maxRows <= rowIndex ? maxRows - 1 : rowIndex}`;
});
}
d[newField] = `${maxRows && maxRows <= rowIndex ? maxRows - 1 : rowIndex}`;
});
} else if (method === 'spread') {
const boundingBoxes: { start: number; end: number }[] = [];

base.sort((a: Datum, b: Datum) => (a[startField] as number) - (b[startField] as number)).forEach((d: Datum) => {
let start = (d[startField] as number) - padding;
let end = (d[endField] as number) + padding;
let start = (d[startField] as number) - paddingInBp;
let end = (d[endField] as number) + paddingInBp;

let overlapped = boundingBoxes.filter(
box =>
Expand All @@ -360,20 +325,20 @@ export function displace(
);
if (overlapped.length > 0) {
if (trial % 2 === 0) {
start += padding * trial;
end += padding * trial;
start += paddingInBp * trial;
end += paddingInBp * trial;
} else {
start -= padding * trial;
end -= padding * trial;
start -= paddingInBp * trial;
end -= paddingInBp * trial;
}
}
trial++;
// TODO: do not go outside of a tile.
} while (overlapped.length > 0 && trial < 1000);
}

d[`${newField}Start`] = `${start + padding}`;
d[`${newField}Etart`] = `${end - padding}`;
d[`${newField}Start`] = `${start + paddingInBp}`;
d[`${newField}Etart`] = `${end - paddingInBp}`;

boundingBoxes.push({ start, end });
});
Expand Down