From 8051e5c1c7efae1c4becf4267d4072c7eca95bc3 Mon Sep 17 00:00:00 2001 From: Dan Vanderkam Date: Wed, 11 Mar 2015 12:37:02 -0400 Subject: [PATCH 1/4] Parse bigBed headers using jBinary. --- package.json | 2 + src/BigBed.js | 319 ++++++++++++++++++++++++++++++++++++++ src/ReadableView.js | 15 +- src/RemoteFile.js | 1 + src/main.js | 6 + test/BigBed-test.js | 4 + test/ReadableView-test.js | 8 + test/jbinary-test.js | 59 +++++++ 8 files changed, 409 insertions(+), 5 deletions(-) create mode 100644 src/BigBed.js create mode 100644 test/BigBed-test.js create mode 100644 test/jbinary-test.js diff --git a/package.json b/package.json index 28db7f47..9bc196f3 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,8 @@ "dependencies": { "backbone": "^1.1.2", "d3": "^3.5.5", + "jbinary": "^2.1.2", + "pako": "^0.2.5", "q": "^1.1.2", "react": "^0.12.2", "underscore": "^1.7.0" diff --git a/src/BigBed.js b/src/BigBed.js new file mode 100644 index 00000000..6ee1cc3a --- /dev/null +++ b/src/BigBed.js @@ -0,0 +1,319 @@ +/** + * Parser for bigBed format. + * Based on UCSC's src/inc/bbiFile.h + */ +'use strict'; + +var Q = require('q'), + _ = require('underscore'), + jBinary = require('jbinary'), + pako = require('pako'); // for gzip inflation + + +var ReadableView = require('./ReadableView'), + RemoteFile = require('./RemoteFile'); + +function typeAtOffset(typeName, offsetFieldName) { + return jBinary.Template({ + baseType: typeName, + read: function(context) { + if (+context[offsetFieldName] == 0) { + return null; + } else { + return this.binary.read(this.baseType, +context[offsetFieldName]); + } + } + }); +} + +var BigBedTypeSet = { + 'jBinary.all': 'File', + 'jBinary.littleEndian': true, + + 'File': { + _magic: ['const', 'uint32', 0x8789F2EB, true], + version: ['const', 'uint16', 4, true], + zoomLevels: 'uint16', + chromosomeTreeOffset: 'uint64', + unzoomedDataOffset: 'uint64', + unzoomedIndexOffset: 'uint64', + fieldCount: 'uint16', + definedFieldCount: 'uint16', + // 0 if no autoSql information + autoSqlOffset: 'uint64', + totalSummaryOffset: 'uint64', + // Size of uncompression buffer. 0 if uncompressed. + uncompressBufSize: 'uint32', + // Offset to header extension 0 if no such extension + // TODO: support extended headers (not used in ensGene.bb) + extensionOffset: 'uint64', + zoomHeaders: ['array', 'ZoomHeader', 'zoomLevels'], + + totalSummary: typeAtOffset('TotalSummary', 'totalSummaryOffset'), + chromosomeTree: typeAtOffset('BPlusTree', 'chromosomeTreeOffset') + }, + + 'TotalSummary': { + basesCovered: 'uint64', + minVal: 'float64', // for bigBed minimum depth of coverage + maxVal: 'float64', // for bigBed maximum depth of coverage + sumData: 'float64', // for bigBed sum of coverage + sumSquared: 'float64' // for bigBed sum of coverage squared + }, + + 'ZoomHeader': { + reductionLevel: 'uint32', + _reserved: 'uint32', + dataOffset: 'uint64', + indexOffset: 'uint64' + }, + + 'BPlusTree': { + magic: ['const', 'uint32', 0x78CA8C91, true], + // Number of children per block (not byte size of block) + blockSize: 'uint32', + // Number of significant bytes in key + keySize: 'uint32', + // Number of bytes in value + valSize: 'uint32', + // Number of items in index + itemCount: 'uint64', + _reserved2: ['skip', 4], + _reserved3: ['skip', 4], + nodes: 'BPlusTreeNode' // ['array', 'BPlusTreeNode', 'itemCount'] + }, + + 'BPlusTreeNode': { + isLeaf: 'uint8', // 1 = yes, 0 = no + _reserved: 'uint8', + count: 'uint16', + contents: ['array', ['if', 'isLeaf', { + key: ['string', 'keySize'], + // Note: bigBed allows more general values; this is what Ensembl uses. + // value: ['string', 'valSize'] + id: 'uint32', + size: 'uint32' + }, { + key: ['string', 'keySize'], + offset: 'uint64' + }], 'count'] + } +}; + +var CirTreeTypeSet = { + 'jBinary.all': 'File', + 'jBinary.littleEndian': true, + + 'File': { + _magic: ['const', 'uint32', 0x2468ACE0, true], + blockSize: 'uint32', + itemCount: 'uint64', + startChromIx: 'uint32', + startBase: 'uint32', + endChromIx: 'uint32', + endBase: 'uint32', + fileSize: 'uint64', + itemsPerSlot: 'uint32', + _reserved: ['skip', 4], + blocks: 'CirNode' + }, + + 'CirNode': { + isLeaf: 'uint8', // 1 = yes, 0 = no + _reserved: 'uint8', + count: 'uint16', + contents: ['array', ['if', 'isLeaf', { + startChromIx: 'uint32', + startBase: 'uint32', + endChromIx: 'uint32', + endBase: 'uint32', + offset: 'uint64', + size: 'uint64' + }, { + startChromIx: 'uint32', + startBase: 'uint32', + endChromIx: 'uint32', + endBase: 'uint32', + offset: 'uint64', + }], 'count'] + } +}; + + +var BigBedBlock = { + 'jBinary.all': 'File', + 'jBinary.littleEndian': true, + + 'File': ['array', 'BedEntry'], + 'BedEntry': { + 'chrId': 'uint32', + 'start': 'uint32', + 'end': 'uint32', + 'rest': 'string0' + } +}; + + +function parseHeader(dataView: DataView) { + // TODO: check Endianness using magic. Possibly use jDataView.littleEndian + // to flip the endianness for jBinary consumption. + // NB: dalliance doesn't support big endian formats. + var jb = new jBinary(dataView.buffer, BigBedTypeSet); + var header = jb.readAll(); + console.log(header); + + return header; +} + +function parseCirTree(dataView: DataView) { + var jb = new jBinary(dataView.buffer, CirTreeTypeSet); + var cirTree = jb.readAll(); + console.log(cirTree); + + return cirTree; +} + +function generateContigMap(twoBitHeader): {[key:string]: number} { + // Just assume it's a flat "tree" for now. + var nodes = twoBitHeader.chromosomeTree.nodes.contents; + if (!nodes) { + throw 'Invalid chromosome tree'; + } + return _.object(nodes.map(function({id, key}) { + // remove trailing nulls from the key string + return [key.replace(/\0.*/, ''), id]; + })); +} + +function getContigId(contigMap, contig) { + return contigMap[contig] || contigMap['chr' + contig] || null; +} + +function intersectIntervals(intervals: Array<[number, number]>): [number, number] { + if (!intervals.length) { + throw 'Tried to intersect zero intervals'; + } + var result = intervals[0]; + intervals.slice(1).forEach(function([a, b]) { + result[0] = Math.min(a, result[0]); + result[1] = Math.max(b, result[1]); + }); + return result; +} + +// TODO: factor out into module +var lessOrEqual = function(c1, p1, c2, p2) { + return c1 < c2 || (c1 == c2 && p1 <= p2); +}; +var contains = function(startContig, startPos, endContig, endPos, contig, pos) { + return lessOrEqual(startContig, startPos, contig, pos) && + lessOrEqual(contig, pos, endContig, endPos); +}; + +var overlaps = function(startContig, startBase, endContig, endBase, contig, start, stop) { + return contains(startContig, startBase, endContig, endBase, contig, start) || + contains(startContig, startBase, endContig, endBase, contig, stop); +}; + +// Get a byte range in the file containing a superset of the interval. +function findByteRange(twoBitHeader, cirTree, contigIx: number, start: number, stop: number): ?[number, number] { + + // Do a recursive search through the index tree + var matchingIntervals = []; + var find = function(node) { + if (node.contents) { + node.contents.forEach(find); + } else { + if (overlaps(node.startChromIx, node.startBase, + node.endChromIx, node.endBase, + contigIx, start, stop)) { + matchingIntervals.push(node); + } + } + }; + find(cirTree.blocks); + + return matchingIntervals; + + // Intersect the intervals. + // XXX UCSC allows discontiguous intervals. When would this ever happen? + return intersectIntervals( + matchingIntervals.map(n => [+n.offset, n.offset+n.size])); +} + +function extractFeaturesInRange(dataView, dataRange, blocks, contigIx, start, stop) { + console.log('Fetched ', dataRange); + var buffer = dataView.buffer; + + return _.flatten(blocks.map(block => { + var blockOffset = block.offset - dataRange[0], + blockLimit = blockOffset + block.size, + // TODO: where does the +2 come from? (I copied it from dalliance) + blockBuffer = buffer.slice(blockOffset + 2, blockLimit); + // TODO: only inflate if necessary + var inflatedBuffer = pako.inflateRaw(new Uint8Array(blockBuffer)); + + var jb = new jBinary(inflatedBuffer, BigBedBlock); + // TODO: parse only one record at a time, as many as is necessary. + var beds = jb.readAll(); + + console.log(beds); + + beds = beds.filter(function(bed) { + return overlaps(bed.chrId, bed.start, bed.chrId, bed.end, contigIx, start, stop); + }); + + return beds; + })); +} + + +class BigBed { + remoteFile: RemoteFile; + header: Q.Promise; + cirTree: Q.Promise; + + constructor(url: string) { + this.remoteFile = new RemoteFile(url); + this.header = this.remoteFile.getBytes(0, 64*1024).then(parseHeader); + this.contigMap = this.header.then(generateContigMap); + + // Next: fetch [header.unzoomedIndexOffset, zoomHeaders[0].dataOffset] and parse + // the "CIR" tree. + this.cirTree = this.header.then(header => { + // zoomHeaders[0].dataOffset is the next entry in the file. + // We assume the "cirTree" section goes all the way to that point. + var start = header.unzoomedIndexOffset, + length = header.zoomHeaders[0].dataOffset - start; + return this.remoteFile.getBytes(start, length).then(parseCirTree); + }); + + // XXX: are these necessary? what's the right way to propagate errors? + this.header.done(); + this.contigMap.done(); + this.cirTree.done(); + } + + // Returns all BED entries which overlap the range. + // TODO: factor logic out into a helper + getFeaturesInRange(contig: string, start: number, stop: number): Q.Promise { + return Q.spread([this.header, this.cirTree, this.contigMap], + (header, cirTree, contigMap) => { + var contigIx = getContigId(contigMap, contig); + if (contigIx === null) { + throw `Invalid contig ${contig}`; + } + + var blocks = findByteRange(header, cirTree, contigIx, start, stop); + if (!blocks) { + return null; // XXX better to throw? + } + console.log(blocks); + var range = intersectIntervals(blocks.map(n => [+n.offset, n.offset+n.size])); + return this.remoteFile.getBytes(range[0], range[1] - range[0]) + .then(dataView => extractFeaturesInRange(dataView, range, blocks, contigIx, start, stop)); + }); + } +} + +module.exports = BigBed; diff --git a/src/ReadableView.js b/src/ReadableView.js index baf125e9..fb4ebab3 100644 --- a/src/ReadableView.js +++ b/src/ReadableView.js @@ -19,13 +19,18 @@ class ReadableView { return num; } + // Read an unsigned 16-bit integer and advance the current position. + readUint16(): number { + return this.readUint8() + + this.readUint8() * (1 << 8); + } + // Read an unsigned 32-bit integer and advance the current position. readUint32(): number { - var num = this.readUint8() | - this.readUint8() * (1 << 8 ) | - this.readUint8() * (1 << 16) | - this.readUint8() * (1 << 24); - return num; + return this.readUint8() + + this.readUint8() * (1 << 8 ) + + this.readUint8() * (1 << 16) + + this.readUint8() * (1 << 24); } // Read a sequence of 32-bit integers and advance the current position. diff --git a/src/RemoteFile.js b/src/RemoteFile.js index 78b32a99..492e014f 100644 --- a/src/RemoteFile.js +++ b/src/RemoteFile.js @@ -24,6 +24,7 @@ class RemoteFile { this.chunks = []; } + // TODO: return a buffer, not a DataView getBytes(start: number, length: number): Q.Promise { var stop = start + length; // First check the cache. diff --git a/src/main.js b/src/main.js index a1c2688c..90e84fd2 100644 --- a/src/main.js +++ b/src/main.js @@ -1,6 +1,7 @@ /* @flow */ var React = require('react'), TwoBit = require('./TwoBit'), + BigBed = require('./BigBed'), Root = require('./Root'), createTwoBitDataSource = require('./TwoBitDataSource'); @@ -23,3 +24,8 @@ genome.getFeaturesInRange('chr1', 123000, 124000).done(); var root = React.render(, document.getElementById('root')); + +var ensembl = new BigBed('/ensGene.bb'); + +window.ensembl = ensembl; +window.genome = genome; diff --git a/test/BigBed-test.js b/test/BigBed-test.js new file mode 100644 index 00000000..b82b1b99 --- /dev/null +++ b/test/BigBed-test.js @@ -0,0 +1,4 @@ +// Things to test: +// - getFeatures which return no features +// - getFeatures which crosses a block boundary +// - getFeatures which crosses a contig boundary (not currently possible) diff --git a/test/ReadableView-test.js b/test/ReadableView-test.js index 01291a56..c2caff32 100644 --- a/test/ReadableView-test.js +++ b/test/ReadableView-test.js @@ -61,4 +61,12 @@ describe('ReadableView', function() { expect(bytes.tell()).to.equal(20); expect(bytes.bytesRemaining()).to.equal(0); }); + + it('should read a large uint32', function() { + var u32 = new Uint32Array(1); + u32[0] = 0xebf28987; + + var bytes = new ReadableView(new DataView(u32.buffer)); + expect(bytes.readUint32()).to.equal(0xebf28987); + }); }); diff --git a/test/jbinary-test.js b/test/jbinary-test.js new file mode 100644 index 00000000..975fc2e3 --- /dev/null +++ b/test/jbinary-test.js @@ -0,0 +1,59 @@ +// This is a playground to ensure that I understand how jBinary works. +var chai = require('chai'); +var expect = chai.expect; + +var jBinary = require('jbinary'); + +describe('jBinary', function() { + it('should read two-bit headers', function() { + var twoBitTypeSet = { + 'jBinary.all': 'File', + 'jBinary.littleEndian': true, + 'File': { + magic: ['const', 'uint32', 0x1A412743, true], + version: ['const', 'uint32', 0, true], + sequenceCount: 'uint32', + reserved: 'uint32' + } + }; + + var byteArray = [ + 0x43, 0x27, 0x41, 0x1a, + 0x00, 0x00, 0x00, 0x00, + 0x5d, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00]; + var u8array = new Uint8Array(byteArray.length); + byteArray.forEach((val, idx) => { u8array[idx] = val; }); + + var jb = new jBinary(u8array.buffer, twoBitTypeSet); + var header = jb.readAll(); + console.log(header); + + expect(header.magic).to.equal(0x1A412743); // two bit magic + expect(header.version).to.equal(0); + expect(header.sequenceCount).to.equal(93); + expect(header.reserved).to.equal(0); + }); + + it('should advance through a sequence', function() { + var uint8TypeSet = { + 'jBinary.all': 'File', + 'jBinary.littleEndian': true, + 'File': { + value: 'uint8' + } + }; + + var u8array = new Uint8Array(16); + for (var i = 0; i < 16; i++) { + u8array[i] = i * i; + } + var buffer = u8array.buffer; + + var jb = new jBinary(buffer, uint8TypeSet); + while (jb.tell() < buffer.byteLength) { + var x = jb.read({value: 'uint8'}); + console.log(jb.tell(), x); + } + }); +}); From abb8f8adf9f94896cdf08c850a2fe20961f8d008 Mon Sep 17 00:00:00 2001 From: Dan Vanderkam Date: Fri, 13 Mar 2015 11:49:25 -0400 Subject: [PATCH 2/4] BigBed test --- package.json | 4 +- src/BigBed.js | 268 ++++++++------------------------ src/RemoteFile.js | 12 +- src/TwoBit.js | 9 +- src/formats/bbi.js | 127 +++++++++++++++ src/formats/helpers.js | 19 +++ test/BigBed-test.js | 46 ++++++ test/FakeXMLHttpRequest-test.js | 54 ------- test/FakeXMLHttpRequest.js | 92 ----------- test/RemoteFile-test.js | 44 +++--- test/coverage.html | 2 +- test/data/0to9.txt | 1 + test/data/hello.txt | 2 + test/data/itemRgb.bb | Bin 0 -> 26695 bytes test/data/itemRgb.bed | 22 +++ test/jbinary-test.js | 7 +- test/runner.html | 2 +- 17 files changed, 325 insertions(+), 386 deletions(-) create mode 100644 src/formats/bbi.js create mode 100644 src/formats/helpers.js delete mode 100644 test/FakeXMLHttpRequest-test.js delete mode 100644 test/FakeXMLHttpRequest.js create mode 100644 test/data/0to9.txt create mode 100644 test/data/hello.txt create mode 100644 test/data/itemRgb.bb create mode 100644 test/data/itemRgb.bed diff --git a/package.json b/package.json index 9bc196f3..beae556e 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,7 @@ "underscore": "^1.7.0" }, "devDependencies": { + "arraybuffer-slice": "^0.1.2", "chai": "^2.0.0", "coveralls": "^2.11.2", "es5-shim": "^4.1.0", @@ -45,7 +46,6 @@ "react-tools": "^0.12.2", "reactify": "^1.0.0", "sinon": "^1.12.2", - "source-map": "^0.3.0", - "text-encoding": "^0.5.2" + "source-map": "^0.3.0" } } diff --git a/src/BigBed.js b/src/BigBed.js index 6ee1cc3a..a6e275fa 100644 --- a/src/BigBed.js +++ b/src/BigBed.js @@ -11,164 +11,26 @@ var Q = require('q'), var ReadableView = require('./ReadableView'), - RemoteFile = require('./RemoteFile'); - -function typeAtOffset(typeName, offsetFieldName) { - return jBinary.Template({ - baseType: typeName, - read: function(context) { - if (+context[offsetFieldName] == 0) { - return null; - } else { - return this.binary.read(this.baseType, +context[offsetFieldName]); - } - } - }); -} - -var BigBedTypeSet = { - 'jBinary.all': 'File', - 'jBinary.littleEndian': true, - - 'File': { - _magic: ['const', 'uint32', 0x8789F2EB, true], - version: ['const', 'uint16', 4, true], - zoomLevels: 'uint16', - chromosomeTreeOffset: 'uint64', - unzoomedDataOffset: 'uint64', - unzoomedIndexOffset: 'uint64', - fieldCount: 'uint16', - definedFieldCount: 'uint16', - // 0 if no autoSql information - autoSqlOffset: 'uint64', - totalSummaryOffset: 'uint64', - // Size of uncompression buffer. 0 if uncompressed. - uncompressBufSize: 'uint32', - // Offset to header extension 0 if no such extension - // TODO: support extended headers (not used in ensGene.bb) - extensionOffset: 'uint64', - zoomHeaders: ['array', 'ZoomHeader', 'zoomLevels'], - - totalSummary: typeAtOffset('TotalSummary', 'totalSummaryOffset'), - chromosomeTree: typeAtOffset('BPlusTree', 'chromosomeTreeOffset') - }, - - 'TotalSummary': { - basesCovered: 'uint64', - minVal: 'float64', // for bigBed minimum depth of coverage - maxVal: 'float64', // for bigBed maximum depth of coverage - sumData: 'float64', // for bigBed sum of coverage - sumSquared: 'float64' // for bigBed sum of coverage squared - }, - - 'ZoomHeader': { - reductionLevel: 'uint32', - _reserved: 'uint32', - dataOffset: 'uint64', - indexOffset: 'uint64' - }, - - 'BPlusTree': { - magic: ['const', 'uint32', 0x78CA8C91, true], - // Number of children per block (not byte size of block) - blockSize: 'uint32', - // Number of significant bytes in key - keySize: 'uint32', - // Number of bytes in value - valSize: 'uint32', - // Number of items in index - itemCount: 'uint64', - _reserved2: ['skip', 4], - _reserved3: ['skip', 4], - nodes: 'BPlusTreeNode' // ['array', 'BPlusTreeNode', 'itemCount'] - }, - - 'BPlusTreeNode': { - isLeaf: 'uint8', // 1 = yes, 0 = no - _reserved: 'uint8', - count: 'uint16', - contents: ['array', ['if', 'isLeaf', { - key: ['string', 'keySize'], - // Note: bigBed allows more general values; this is what Ensembl uses. - // value: ['string', 'valSize'] - id: 'uint32', - size: 'uint32' - }, { - key: ['string', 'keySize'], - offset: 'uint64' - }], 'count'] - } -}; - -var CirTreeTypeSet = { - 'jBinary.all': 'File', - 'jBinary.littleEndian': true, - - 'File': { - _magic: ['const', 'uint32', 0x2468ACE0, true], - blockSize: 'uint32', - itemCount: 'uint64', - startChromIx: 'uint32', - startBase: 'uint32', - endChromIx: 'uint32', - endBase: 'uint32', - fileSize: 'uint64', - itemsPerSlot: 'uint32', - _reserved: ['skip', 4], - blocks: 'CirNode' - }, - - 'CirNode': { - isLeaf: 'uint8', // 1 = yes, 0 = no - _reserved: 'uint8', - count: 'uint16', - contents: ['array', ['if', 'isLeaf', { - startChromIx: 'uint32', - startBase: 'uint32', - endChromIx: 'uint32', - endBase: 'uint32', - offset: 'uint64', - size: 'uint64' - }, { - startChromIx: 'uint32', - startBase: 'uint32', - endChromIx: 'uint32', - endBase: 'uint32', - offset: 'uint64', - }], 'count'] - } -}; + RemoteFile = require('./RemoteFile'), + Interval = require('./Interval'), + ContigInterval = require('./ContigInterval'), + utils = require('./utils.js'), + bbi = require('./formats/bbi'); -var BigBedBlock = { - 'jBinary.all': 'File', - 'jBinary.littleEndian': true, - - 'File': ['array', 'BedEntry'], - 'BedEntry': { - 'chrId': 'uint32', - 'start': 'uint32', - 'end': 'uint32', - 'rest': 'string0' - } -}; - - -function parseHeader(dataView: DataView) { +function parseHeader(buffer) { // TODO: check Endianness using magic. Possibly use jDataView.littleEndian // to flip the endianness for jBinary consumption. // NB: dalliance doesn't support big endian formats. - var jb = new jBinary(dataView.buffer, BigBedTypeSet); - var header = jb.readAll(); - console.log(header); + var jb = new jBinary(buffer, bbi.TYPE_SET); + var header = jb.read('Header'); return header; } -function parseCirTree(dataView: DataView) { - var jb = new jBinary(dataView.buffer, CirTreeTypeSet); - var cirTree = jb.readAll(); - console.log(cirTree); +function parseCirTree(buffer) { + var jb = new jBinary(buffer, bbi.TYPE_SET); + var cirTree = jb.read('CirTree'); return cirTree; } @@ -186,81 +48,63 @@ function generateContigMap(twoBitHeader): {[key:string]: number} { } function getContigId(contigMap, contig) { - return contigMap[contig] || contigMap['chr' + contig] || null; + if (contig in contigMap) { + return contigMap[contig]; + } + var chr = 'chr' + contig; + if (chr in contigMap) { + return contigMap[chr]; + } + return null; } -function intersectIntervals(intervals: Array<[number, number]>): [number, number] { - if (!intervals.length) { - throw 'Tried to intersect zero intervals'; - } - var result = intervals[0]; - intervals.slice(1).forEach(function([a, b]) { - result[0] = Math.min(a, result[0]); - result[1] = Math.max(b, result[1]); +function reverseContigMap(contigMap: {[key:string]: number}): Array { + var ary = []; + _.forEach(contigMap, (index, name) => { + ary[index] = name; }); - return result; + return ary; } -// TODO: factor out into module -var lessOrEqual = function(c1, p1, c2, p2) { - return c1 < c2 || (c1 == c2 && p1 <= p2); -}; -var contains = function(startContig, startPos, endContig, endPos, contig, pos) { - return lessOrEqual(startContig, startPos, contig, pos) && - lessOrEqual(contig, pos, endContig, endPos); -}; - -var overlaps = function(startContig, startBase, endContig, endBase, contig, start, stop) { - return contains(startContig, startBase, endContig, endBase, contig, start) || - contains(startContig, startBase, endContig, endBase, contig, stop); -}; - -// Get a byte range in the file containing a superset of the interval. -function findByteRange(twoBitHeader, cirTree, contigIx: number, start: number, stop: number): ?[number, number] { - +// Get all blocks in the file containing features which intersect with contigRange. +function findOverlappingBlocks(twoBitHeader, cirTree, contigRange) { // Do a recursive search through the index tree - var matchingIntervals = []; + var matchingBlocks = []; + var tupleRange = [[contigRange.contig, contigRange.start()], + [contigRange.contig, contigRange.stop()]]; var find = function(node) { if (node.contents) { node.contents.forEach(find); } else { - if (overlaps(node.startChromIx, node.startBase, - node.endChromIx, node.endBase, - contigIx, start, stop)) { - matchingIntervals.push(node); + var nodeRange = [[node.startChromIx, node.startBase], + [node.endChromIx, node.endBase]]; + if (utils.tupleRangeOverlaps(nodeRange, tupleRange)) { + matchingBlocks.push(node); } } }; find(cirTree.blocks); - return matchingIntervals; - - // Intersect the intervals. - // XXX UCSC allows discontiguous intervals. When would this ever happen? - return intersectIntervals( - matchingIntervals.map(n => [+n.offset, n.offset+n.size])); + return matchingBlocks; } -function extractFeaturesInRange(dataView, dataRange, blocks, contigIx, start, stop) { - console.log('Fetched ', dataRange); - var buffer = dataView.buffer; - +function extractFeaturesInRange(buffer, dataRange, blocks, contigRange) { return _.flatten(blocks.map(block => { - var blockOffset = block.offset - dataRange[0], + var blockOffset = block.offset - dataRange.start, blockLimit = blockOffset + block.size, // TODO: where does the +2 come from? (I copied it from dalliance) blockBuffer = buffer.slice(blockOffset + 2, blockLimit); // TODO: only inflate if necessary var inflatedBuffer = pako.inflateRaw(new Uint8Array(blockBuffer)); - var jb = new jBinary(inflatedBuffer, BigBedBlock); - // TODO: parse only one record at a time, as many as is necessary. - var beds = jb.readAll(); - - console.log(beds); + var jb = new jBinary(inflatedBuffer, bbi.TYPE_SET); + // TODO: parse only one BedEntry at a time, as many as is necessary. + var beds = jb.read('BedBlock'); beds = beds.filter(function(bed) { - return overlaps(bed.chrId, bed.start, bed.chrId, bed.end, contigIx, start, stop); + var bedInterval = new ContigInterval(bed.chrId, bed.start, bed.stop); + var r = contigRange.intersects(bedInterval); + return r; }); return beds; @@ -283,8 +127,10 @@ class BigBed { this.cirTree = this.header.then(header => { // zoomHeaders[0].dataOffset is the next entry in the file. // We assume the "cirTree" section goes all the way to that point. + // Lacking zoom headers, assume it's 4k. var start = header.unzoomedIndexOffset, - length = header.zoomHeaders[0].dataOffset - start; + zoomHeader = header.zoomHeaders[0], + length = zoomHeader ? zoomHeader.dataOffset - start : 4096; return this.remoteFile.getBytes(start, length).then(parseCirTree); }); @@ -303,15 +149,25 @@ class BigBed { if (contigIx === null) { throw `Invalid contig ${contig}`; } + var contigRange = new ContigInterval(contigIx, start, stop); - var blocks = findByteRange(header, cirTree, contigIx, start, stop); - if (!blocks) { - return null; // XXX better to throw? + var blocks = findOverlappingBlocks(header, cirTree, contigRange); + if (blocks.length == 0) { + return []; } - console.log(blocks); - var range = intersectIntervals(blocks.map(n => [+n.offset, n.offset+n.size])); - return this.remoteFile.getBytes(range[0], range[1] - range[0]) - .then(dataView => extractFeaturesInRange(dataView, range, blocks, contigIx, start, stop)); + + var range = Interval.boundingInterval( + blocks.map(n => new Interval(+n.offset, n.offset+n.size))); + return this.remoteFile.getBytes(range.start, range.length()) + .then(buffer => { + var reverseMap = reverseContigMap(contigMap); + var features = extractFeaturesInRange(buffer, range, blocks, contigRange) + features.forEach(f => { + f.contig = reverseMap[f.chrId]; + delete f.chrId; + }); + return features; + }); }); } } diff --git a/src/RemoteFile.js b/src/RemoteFile.js index 492e014f..6033dc59 100644 --- a/src/RemoteFile.js +++ b/src/RemoteFile.js @@ -17,21 +17,22 @@ class RemoteFile { url: string; fileLength: number; chunks: Array; // regions of file that have already been loaded. + numNetworkRequests: number; // track this for debugging/testing constructor(url: string) { this.url = url; this.fileLength = -1; // unknown this.chunks = []; + this.numNetworkRequests = 0; } - // TODO: return a buffer, not a DataView - getBytes(start: number, length: number): Q.Promise { + getBytes(start: number, length: number): Q.Promise { var stop = start + length; // First check the cache. for (var i = 0; i < this.chunks.length; i++) { var chunk = this.chunks[i]; if (chunk.start <= start && chunk.stop >= stop) { - return Q.when(new DataView(chunk.buffer, start - chunk.start, length)); + return Q.when(chunk.buffer.slice(start - chunk.start, stop - chunk.start)); } } @@ -41,7 +42,7 @@ class RemoteFile { return this.getFromNetwork(start, start + length - 1); } - getFromNetwork(start: number, stop: number): Q.Promise { + getFromNetwork(start: number, stop: number): Q.Promise { var deferred = Q.defer(); var xhr = new XMLHttpRequest(); @@ -56,10 +57,11 @@ class RemoteFile { var newChunk = { start, stop: start + buffer.byteLength - 1, buffer }; remoteFile.chunks.push(newChunk); - deferred.resolve(new DataView(buffer)); + deferred.resolve(buffer); }; // TODO: `reject`, `notify` on progress + this.numNetworkRequests++; xhr.send(); return deferred.promise; diff --git a/src/TwoBit.js b/src/TwoBit.js index 991163f6..a7496977 100644 --- a/src/TwoBit.js +++ b/src/TwoBit.js @@ -166,8 +166,8 @@ class TwoBit { this.header = deferredHeader.promise; // TODO: if 16k is insufficient, fetch the right amount. - this.remoteFile.getBytes(0, 16*1024).then(function(dataView) { - var header = parseHeader(dataView); + this.remoteFile.getBytes(0, 16*1024).then(function(buffer) { + var header = parseHeader(new DataView(buffer)); deferredHeader.resolve(header); }).done(); } @@ -184,7 +184,8 @@ class TwoBit { var dnaOffset = header.offset + header.dnaOffsetFromHeader; var offset = Math.floor(dnaOffset + start/4); var byteLength = Math.ceil((stop - start + 1) / 4) + 1; - return this.remoteFile.getBytes(offset, byteLength).then(dataView => { + return this.remoteFile.getBytes(offset, byteLength).then(buffer => { + var dataView = new DataView(buffer); return markUnknownDNA( unpackDNA(dataView, start % 4, stop - start + 1), start, header) .join(''); @@ -208,7 +209,7 @@ class TwoBit { // TODO: if 4k is insufficient, fetch the right amount. return this.remoteFile.getBytes(seq.offset, 4095).then( - dataView => parseSequenceRecord(dataView, seq.offset)); + buf => parseSequenceRecord(new DataView(buf), seq.offset)); }); } } diff --git a/src/formats/bbi.js b/src/formats/bbi.js new file mode 100644 index 00000000..d67b5d46 --- /dev/null +++ b/src/formats/bbi.js @@ -0,0 +1,127 @@ +/** + * BBI is the shared structure between bigBed and bigWig. + * These structures are based on UCSC's src/inc/bbiFile.h + */ + +'use strict'; + +var jBinary = require('jbinary'), + {typeAtOffset} = require('./helpers'); + +var TYPE_SET = { + 'jBinary.littleEndian': true, + + 'Header': { + _magic: ['const', 'uint32', 0x8789F2EB, true], + version: ['const', 'uint16', 4, true], + zoomLevels: 'uint16', + chromosomeTreeOffset: 'uint64', + unzoomedDataOffset: 'uint64', + unzoomedIndexOffset: 'uint64', + fieldCount: 'uint16', + definedFieldCount: 'uint16', + // 0 if no autoSql information + autoSqlOffset: 'uint64', + totalSummaryOffset: 'uint64', + // Size of uncompression buffer. 0 if uncompressed. + uncompressBufSize: 'uint32', + // Offset to header extension 0 if no such extension + // TODO: support extended headers (not used in ensGene.bb) + extensionOffset: 'uint64', + zoomHeaders: ['array', 'ZoomHeader', 'zoomLevels'], + + totalSummary: typeAtOffset('TotalSummary', 'totalSummaryOffset'), + chromosomeTree: typeAtOffset('BPlusTree', 'chromosomeTreeOffset') + }, + + 'TotalSummary': { + basesCovered: 'uint64', + minVal: 'float64', // for bigBed minimum depth of coverage + maxVal: 'float64', // for bigBed maximum depth of coverage + sumData: 'float64', // for bigBed sum of coverage + sumSquared: 'float64' // for bigBed sum of coverage squared + }, + + 'ZoomHeader': { + reductionLevel: 'uint32', + _reserved: 'uint32', + dataOffset: 'uint64', + indexOffset: 'uint64' + }, + + 'BPlusTree': { + magic: ['const', 'uint32', 0x78CA8C91, true], + // Number of children per block (not byte size of block) + blockSize: 'uint32', + // Number of significant bytes in key + keySize: 'uint32', + // Number of bytes in value + valSize: 'uint32', + // Number of items in index + itemCount: 'uint64', + _reserved2: ['skip', 4], + _reserved3: ['skip', 4], + nodes: 'BPlusTreeNode' // ['array', 'BPlusTreeNode', 'itemCount'] + }, + + 'BPlusTreeNode': { + isLeaf: 'uint8', // 1 = yes, 0 = no + _reserved: 'uint8', + count: 'uint16', + contents: ['array', ['if', 'isLeaf', { + key: ['string', 'keySize'], + // Note: bigBed allows more general values; this is what Ensembl uses. + // value: ['string', 'valSize'] + id: 'uint32', + size: 'uint32' + }, { + key: ['string', 'keySize'], + offset: 'uint64' + }], 'count'] + }, + + 'CirTree': { + _magic: ['const', 'uint32', 0x2468ACE0, true], + blockSize: 'uint32', + itemCount: 'uint64', + startChromIx: 'uint32', + startBase: 'uint32', + endChromIx: 'uint32', + endBase: 'uint32', + fileSize: 'uint64', + itemsPerSlot: 'uint32', + _reserved: ['skip', 4], + blocks: 'CirNode' + }, + + 'CirNode': { + isLeaf: 'uint8', // 1 = yes, 0 = no + _reserved: 'uint8', + count: 'uint16', + contents: ['array', ['if', 'isLeaf', { + startChromIx: 'uint32', + startBase: 'uint32', + endChromIx: 'uint32', + endBase: 'uint32', + offset: 'uint64', + size: 'uint64' + }, { + startChromIx: 'uint32', + startBase: 'uint32', + endChromIx: 'uint32', + endBase: 'uint32', + offset: 'uint64', + }], 'count'] + }, + + 'BedEntry': { + 'chrId': 'uint32', + 'start': 'uint32', + 'stop': 'uint32', + 'rest': 'string0' + }, + + 'BedBlock': ['array', 'BedEntry'], +}; + +module.exports = {TYPE_SET}; diff --git a/src/formats/helpers.js b/src/formats/helpers.js new file mode 100644 index 00000000..235f4a8d --- /dev/null +++ b/src/formats/helpers.js @@ -0,0 +1,19 @@ +/** + * Helpers for specifying file formats using jBinary. + */ +var jBinary = require('jbinary'); + +function typeAtOffset(typeName, offsetFieldName) { + return jBinary.Template({ + baseType: typeName, + read: function(context) { + if (+context[offsetFieldName] == 0) { + return null; + } else { + return this.binary.read(this.baseType, +context[offsetFieldName]); + } + } + }); +} + +module.exports = {typeAtOffset}; diff --git a/test/BigBed-test.js b/test/BigBed-test.js index b82b1b99..56081a92 100644 --- a/test/BigBed-test.js +++ b/test/BigBed-test.js @@ -2,3 +2,49 @@ // - getFeatures which return no features // - getFeatures which crosses a block boundary // - getFeatures which crosses a contig boundary (not currently possible) + +var chai = require('chai'); +var expect = chai.expect; +var assert = chai.assert; + +var BigBed = require('../src/BigBed'); + +describe('BigBed', function() { + function getTestBigBed() { + // This file was generated using UCSC tools: + // cd kent/src/utils/bedToBigBed/tests; make + // This file is compressed, little endian and contains autoSQL. + return new BigBed('/test/data/itemRgb.bb'); + } + + it('should extract features in a range', function(done) { + var bb = getTestBigBed(); + + bb.getFeaturesInRange('chrX', 151077036, 151078532) + .then(features => { + // chrX 151077031 151078198 MID_BLUE 0 - 151077031 151078198 0,0,128 + // chrX 151078198 151079365 VIOLET_RED1 0 - 151078198 151079365 255,62,150 + expect(features).to.have.length(2); + expect(features[0].contig).to.equal('chrX'); + expect(features[0].start).to.equal(151077031); + expect(features[0].stop).to.equal(151078198); + expect(features[1].contig).to.equal('chrX'); + expect(features[1].start).to.equal(151078198); + expect(features[1].stop).to.equal(151079365); + + var rest0 = features[0].rest.split('\t'); + expect(rest0).to.have.length(6) + expect(rest0[0]).to.equal('MID_BLUE'); + expect(rest0[2]).to.equal('-'); + expect(rest0[5]).to.equal('0,0,128'); + + var rest1 = features[1].rest.split('\t'); + expect(rest1).to.have.length(6) + expect(rest1[0]).to.equal('VIOLET_RED1'); + expect(rest1[2]).to.equal('-'); + expect(rest1[5]).to.equal('255,62,150'); + done(); + }) + .done(); + }); +}); diff --git a/test/FakeXMLHttpRequest-test.js b/test/FakeXMLHttpRequest-test.js deleted file mode 100644 index 6af47f5c..00000000 --- a/test/FakeXMLHttpRequest-test.js +++ /dev/null @@ -1,54 +0,0 @@ -var sinon = require('sinon'), - chai = require('chai'), - expect = chai.expect; - -var FakeXHR = require('./FakeXMLHttpRequest'); - -describe('FakeXMLHttpRequest', () => { - beforeEach(() => { - FakeXHR.install(); - }); - afterEach(() => { - FakeXHR.restore(); - }); - - it('should intercept simple XHRs', (done) => { - FakeXHR.addResponse('http://example.com/file.txt', 'hello'); - - var xhr = new XMLHttpRequest(); - xhr.open('GET', 'http://example.com/file.txt'); - xhr.onload = function(e) { - expect(this.response).to.equal('hello'); - done(); - }; - xhr.onerror = function(e) { - throw e; - done(); - }; - xhr.send(); - }); - - it('should intercept arraybuffer XHRs', (done) => { - var buf = new Uint8Array(4); - buf[0] = 1; - buf[1] = 2; - buf[2] = 3; - buf[3] = 4; - FakeXHR.addResponse('http://example.com/file.txt', buf.buffer); - - var xhr = new XMLHttpRequest(); - xhr.open('GET', 'http://example.com/file.txt'); - xhr.responseType = 'arraybuffer'; - xhr.onload = function(e) { - var buf = this.response; - expect(buf).to.be.an.instanceof(ArrayBuffer); - expect(buf.byteLength).to.equal(4); - done(); - }; - xhr.onerror = function(e) { - throw e; - done(); - }; - xhr.send(); - }); -}); diff --git a/test/FakeXMLHttpRequest.js b/test/FakeXMLHttpRequest.js deleted file mode 100644 index 71f490de..00000000 --- a/test/FakeXMLHttpRequest.js +++ /dev/null @@ -1,92 +0,0 @@ -/** @flow */ - -/** - * Tiny fake for just the portions of XHR level 2 that pileup.js needs. - * This should be deleted once FauxJax or Sinon support XHR2. - * (Specifically, we need to let xhr.response be an ArrayBuffer.) - */ -class FakeXMLHttpRequest { - method: string; - url: string; - responseType: string; - requestHeaders: Object; - response: any; - onload: (e: Object) => void; - onerror: (e: any) => void; - - constructor() { - this.method = ''; - this.url = ''; - this.responseType = ''; - this.requestHeaders = {}; - this.response = null; - this.onload = e => {}; - this.onerror = e => { - throw e; - }; - } - - open(method: string, url: string) { - this.method = method; - this.url = url; - } - - setRequestHeader(header: string, value: string) { - this.requestHeaders[header] = value; - } - - send(): void { - if (!this.method || !this.url) { - throw 'must call open() before send()'; - } - FakeXMLHttpRequest.numRequests++; - var rs = FakeXMLHttpRequest.responses; - for (var i = 0; i < rs.length; i++) { - var url = rs[i][0], response = rs[i][1]; - if (url == this.url) { - this.response = response; - window.setTimeout(() => { - this.onload.call(this, {}); - }, 0); - break; - } - } - - if (!this.response) { - this.onerror.call(this, 'Unable to find response for ' + this.url); - } - } - - static responses: Array<[string, any]>; - static addResponse(url: string, response: any) { - var rs = FakeXMLHttpRequest.responses; - if (!rs) { - FakeXMLHttpRequest.responses = rs = []; - } - rs.push([url, response]); - } - - static _origXhr: any; - static numRequests: number; - static install(): void { - if (FakeXMLHttpRequest._origXhr) { - throw "Can't double-install FakeXMLHttpRequest"; - } - FakeXMLHttpRequest._origXhr = XMLHttpRequest; - XMLHttpRequest = FakeXMLHttpRequest; - FakeXMLHttpRequest.numRequests = 0; - } - - static restore(): void { - if (!FakeXMLHttpRequest._origXhr) { - throw "Can't restore XMLHttpRequest without installing FakeXMLHttpRequest"; - } - - XMLHttpRequest = FakeXMLHttpRequest._origXhr; - FakeXMLHttpRequest._origXhr = null; - FakeXMLHttpRequest.responses = []; - } -} - - -module.exports = FakeXMLHttpRequest; diff --git a/test/RemoteFile-test.js b/test/RemoteFile-test.js index f3b78c0d..83d5ff4f 100644 --- a/test/RemoteFile-test.js +++ b/test/RemoteFile-test.js @@ -1,30 +1,38 @@ var chai = require('chai'), - expect = chai.expect; - -var FakeXHR = require('./FakeXMLHttpRequest'); + expect = chai.expect, + jBinary = require('jbinary'); var RemoteFile = require('../src/RemoteFile'); describe('RemoteFile', () => { - beforeEach(() => { - FakeXHR.install(); - }); - afterEach(() => { - FakeXHR.restore(); - }); - - it('should fetch a subset of a file', (done) => { - FakeXHR.addResponse('http://example.com/file.txt', - new TextEncoder('utf-8').encode('01234567890').buffer); + function bufferToText(buf) { + return new jBinary(buf).read('string'); + } - var f = new RemoteFile('http://example.com/file.txt'); - var promisedData = f.getBytes(10, 11); + it('should fetch a subset of a file', done => { + var f = new RemoteFile('/test/data/0to9.txt'); + var promisedData = f.getBytes(4, 5); - expect(FakeXHR.numRequests).to.equal(1); - // expect(req.requestHeaders.Range).to.equal('bytes=10-29'); + expect(f.numNetworkRequests).to.equal(1); promisedData.then(buf => { - expect(buf.byteLength).to.equal(11); + expect(buf.byteLength).to.equal(5); + expect(bufferToText(buf)).to.equal('45678'); done(); }).done(); }); + + it('should fetch subsets from cache', done => { + var f = new RemoteFile('/test/data/0to9.txt'); + f.getBytes(0, 10).then(buf => { + expect(buf.byteLength).to.equal(10); + expect(bufferToText(buf)).to.equal('0123456789'); + expect(f.numNetworkRequests).to.equal(1); + f.getBytes(4, 5).then(buf => { + expect(buf.byteLength).to.equal(5); + expect(bufferToText(buf)).to.equal('45678'); + expect(f.numNetworkRequests).to.equal(1); // it was cached + done(); + }).done(); + }).done(); + }); }); diff --git a/test/coverage.html b/test/coverage.html index 74b456af..577ab1e6 100644 --- a/test/coverage.html +++ b/test/coverage.html @@ -10,7 +10,7 @@ - + diff --git a/test/data/0to9.txt b/test/data/0to9.txt new file mode 100644 index 00000000..11f11f9b --- /dev/null +++ b/test/data/0to9.txt @@ -0,0 +1 @@ +0123456789 diff --git a/test/data/hello.txt b/test/data/hello.txt new file mode 100644 index 00000000..94954abd --- /dev/null +++ b/test/data/hello.txt @@ -0,0 +1,2 @@ +hello +world diff --git a/test/data/itemRgb.bb b/test/data/itemRgb.bb new file mode 100644 index 0000000000000000000000000000000000000000..c31b73c9497e7d8bec275fed7678cd6187b53679 GIT binary patch literal 26695 zcmeI*eN5D57y$4dAaKkL1#yyd@N$QIOxenTK|m%P&S5ls3F60`9Ctmr2zPKTf{p>_ z5Moe4AcD%~2Z-Zi22BJJ73K%Zhz?YQA<#`J=2X3PrRl1$m2kMS{_$Bt!O4CaqvIr=fX>wmMPq%m;> ziczkS5b_9uq%4FIjXIemG>Ax{2oYlB-HgN zK}{+trCNoQDt5On!|O+VtdcY57SjD2kKrb(lExF!~yZ{fAyhuy&Co zAxBbD=McUoh8fa%#W?Qi=I-t$7PC`uwZQD74WEwR=CNyR-(=S1&#W`OZosVk$F_aT z_bEp7vCI=}PXCy(VdK$T=zC@vv#q<;mrswEGf(!eoJ8ZPTwqGcsw<7EEB@2X5^gr1 z(zfk)c{-Zc)*+HC@+#Y67kX&UBJVmMM~9B;?5Ls`Td5PEG_RsmBMG zwY_-iSv(e-J@!Fa;liO1Nw!7ix`4rE-(Mo#ZB=h?>$4SNH$5aj91$ja6z3(sbG^9w zs>KU$x0pkx*EcC-k{k_gSMTT7&kM?Obu~KL5WpLIpYNBpbo#ra(f5yw0tMEc*Y@8P zZauJoU+FzE64`swJJ6;8Um>zeO_#Mff5?BY8y>FwI}iGlr_T8HnP2;BFe;NIe_b3cFPfHlsWv@6PMci587 z-Rmy2H9MLNe~xbbc=nREeyqQU~e~Jx{L*`Q<*QKBg0~;82snTh+0S7=7y*-l09N zJU(+b%ITl}<-@&q=ukG2iD7QkqRl+MCHJBV;F7$GWpjjbFR&TR0nk7I1V8`;KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY{Bg@747OS;$)9UXnFwB>?r%*6}K9n5=i&#o^Y zYE@CVsZm~!t$oiUI}Kj*W@ZCx9LVKmY_l00ck)1V8`; zKmY_l00ck)1V8`;KmY_l00cl_+zK$C#l0#^)I0ClKCGRYccv}cvf1qTjizbl{<`wR zPdn!4Hzr=P$+WdadnI{VhfTbNU*FEOHaEs*_1lE#D-ebs*%?+Bc6f0>Z>T^-a50i` zYY7tv0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X& - + From e156fa3f943e9f01cfe71704c4781df48e83d440 Mon Sep 17 00:00:00 2001 From: Dan Vanderkam Date: Sat, 14 Mar 2015 00:07:45 -0400 Subject: [PATCH 3/4] Port TwoBit to jBinary; kill ReadableView --- src/BigBed.js | 3 +- src/ReadableView.js | 72 ------------------------------------- src/TwoBit.js | 73 +++++++++++--------------------------- src/formats/twoBitTypes.js | 43 ++++++++++++++++++++++ test/ReadableView-test.js | 72 ------------------------------------- 5 files changed, 65 insertions(+), 198 deletions(-) delete mode 100644 src/ReadableView.js create mode 100644 src/formats/twoBitTypes.js delete mode 100644 test/ReadableView-test.js diff --git a/src/BigBed.js b/src/BigBed.js index a6e275fa..38676884 100644 --- a/src/BigBed.js +++ b/src/BigBed.js @@ -10,8 +10,7 @@ var Q = require('q'), pako = require('pako'); // for gzip inflation -var ReadableView = require('./ReadableView'), - RemoteFile = require('./RemoteFile'), +var RemoteFile = require('./RemoteFile'), Interval = require('./Interval'), ContigInterval = require('./ContigInterval'), utils = require('./utils.js'), diff --git a/src/ReadableView.js b/src/ReadableView.js deleted file mode 100644 index fb4ebab3..00000000 --- a/src/ReadableView.js +++ /dev/null @@ -1,72 +0,0 @@ -/** @flow */ - -/** - * Wrapper around an ArrayBuffer which facilitates reading different types of - * values from it, from start to finish. - */ -class ReadableView { - offset: number; - dataView: DataView; - constructor(dataView: DataView) { - this.offset = 0; - this.dataView = dataView; - } - - // Read an unsigned 8-bit integer and advance the current position. - readUint8(): number { - var num = this.dataView.getUint8(this.offset); - this.offset++; - return num; - } - - // Read an unsigned 16-bit integer and advance the current position. - readUint16(): number { - return this.readUint8() + - this.readUint8() * (1 << 8); - } - - // Read an unsigned 32-bit integer and advance the current position. - readUint32(): number { - return this.readUint8() + - this.readUint8() * (1 << 8 ) + - this.readUint8() * (1 << 16) + - this.readUint8() * (1 << 24); - } - - // Read a sequence of 32-bit integers and advance the current position. - readUint32Array(n: number): number[] { - var result: number[] = []; - for (var i = 0; i < n; i++) { - result.push(this.readUint32()); - } - return result; - } - - /** - * Extract a sequence of ASCII characters as a string. - * This throws if any non-ASCII characters are encountered. - */ - readAscii(length: number): string { - var result = ''; - for (var i = 0; i < length; i++) { - var c = this.readUint8(); - if (c > 127) { - throw 'Encountered non-ASCII character ' + c; - } - result += String.fromCharCode(c); - } - return result; - } - - // Returns the number of bytes remaining in the buffer. - bytesRemaining(): number { - return this.dataView.byteLength - this.offset; - } - - // Returns the current offset in the buffer. - tell(): number { - return this.offset; - } -} - -module.exports = ReadableView; diff --git a/src/TwoBit.js b/src/TwoBit.js index a7496977..1979e50f 100644 --- a/src/TwoBit.js +++ b/src/TwoBit.js @@ -6,10 +6,11 @@ 'use strict'; var Q = require('q'), - _ = require('underscore'); + _ = require('underscore'), + jBinary = require('jbinary'); -var ReadableView = require('./ReadableView'), - RemoteFile = require('./RemoteFile'); +var RemoteFile = require('./RemoteFile'), + twoBitTypes = require('./formats/twoBitTypes'); var BASE_PAIRS = [ 'T', // 0=00 @@ -40,33 +41,22 @@ type TwoBitHeader = { sequences: Array; } -var TWO_BIT_MAGIC = 0x1A412743; - /** * Parses a single SequenceRecord from the start of the ArrayBuffer. * fileOffset is the position of this sequence within the 2bit file. */ -function parseSequenceRecord(dataView: DataView, fileOffset: number): SequenceRecord { - var bytes = new ReadableView(dataView); - var dnaSize = bytes.readUint32(), - nBlockCount = bytes.readUint32(), - nBlockStarts = bytes.readUint32Array(nBlockCount), - nBlockSizes = bytes.readUint32Array(nBlockCount), - // The masks can be quite large (~2MB for chr1) and we mostly don't care - // about them. So we ignore them, but we do need to know their length. - maskBlockCount = bytes.readUint32(); - // maskBlockCount maskBlockStarts = bytes.readUint32Array(maskBlockCount), - // maskBlockSizes = bytes.readUint32Array(maskBlockCount), - // reserved = bytes.readUint32(); - - var dnaOffset = bytes.tell() + 8 * maskBlockCount + 4; +function parseSequenceRecord(buffer: ArrayBuffer, fileOffset: number): SequenceRecord { + var jb = new jBinary(buffer, twoBitTypes.TYPE_SET); + var header = jb.read('SequenceRecord'); + + var dnaOffset = jb.tell() + 8 * header.maskBlockCount + 4; return { - numBases: dnaSize, - unknownBlockStarts: nBlockStarts, - unknownBlockLengths: nBlockSizes, - numMaskBlocks: maskBlockCount, + numBases: header.dnaSize, + unknownBlockStarts: header.nBlockStarts, + unknownBlockLengths: header.nBlockSizes, + numMaskBlocks: header.maskBlockCount, maskBlockStarts: [], maskBlockLengths: [], dnaOffsetFromHeader: dnaOffset, @@ -75,35 +65,13 @@ function parseSequenceRecord(dataView: DataView, fileOffset: number): SequenceRe } -/** - * Parses the 2bit file header. - */ -function parseHeader(dataView: DataView): TwoBitHeader { - var bytes = new ReadableView(dataView); - var magic = bytes.readUint32(); - if (magic != TWO_BIT_MAGIC) { - throw 'Invalid magic'; - } - var version = bytes.readUint32(); - if (version != 0) { - throw 'Unknown version of 2bit'; - } - var sequenceCount = bytes.readUint32(), - reserved = bytes.readUint32(); - - var sequences: Array = []; - for (var i = 0; i < sequenceCount; i++) { - var nameSize = bytes.readUint8(); - var name = bytes.readAscii(nameSize); - var offset = bytes.readUint32(); - sequences.push({name, offset}); - } - // hg19 header is 1671 bytes to this point +function parseHeader(buffer: ArrayBuffer): TwoBitHeader { + var jb = new jBinary(buffer, twoBitTypes.TYPE_SET); + var header = jb.read('Header'); return { - sequenceCount, - reserved, - sequences + sequenceCount: header.sequenceCount, + sequences: header.sequences }; } @@ -115,6 +83,7 @@ function parseHeader(dataView: DataView): TwoBitHeader { * modification. */ function unpackDNA(dataView: DataView, startBasePair: number, numBasePairs: number): Array { + // TODO: use jBinary bitfield for this var basePairs: Array = []; basePairs.length = dataView.byteLength * 4; // pre-allocate var basePairIdx = -startBasePair; @@ -167,7 +136,7 @@ class TwoBit { // TODO: if 16k is insufficient, fetch the right amount. this.remoteFile.getBytes(0, 16*1024).then(function(buffer) { - var header = parseHeader(new DataView(buffer)); + var header = parseHeader(buffer); deferredHeader.resolve(header); }).done(); } @@ -209,7 +178,7 @@ class TwoBit { // TODO: if 4k is insufficient, fetch the right amount. return this.remoteFile.getBytes(seq.offset, 4095).then( - buf => parseSequenceRecord(new DataView(buf), seq.offset)); + buf => parseSequenceRecord(buf, seq.offset)); }); } } diff --git a/src/formats/twoBitTypes.js b/src/formats/twoBitTypes.js new file mode 100644 index 00000000..2d71c2c4 --- /dev/null +++ b/src/formats/twoBitTypes.js @@ -0,0 +1,43 @@ +/** + * TwoBit is a packed genome format. + * See http://genome.ucsc.edu/FAQ/FAQformat.html#format7 + */ + +'use strict'; + +var jBinary = require('jbinary'); + +var TYPE_SET = { + 'jBinary.littleEndian': true, + + 'Header': { + magic: ['const', 'uint32', 0x1A412743, true], + version: ['const', 'uint32', 0, true], + + sequenceCount: 'uint32', + reserved: 'uint32', + + sequences: ['array', 'SequenceHeader', 'sequenceCount'] + }, + + 'SequenceHeader': { + nameSize: 'uint8', + name: ['string', 'nameSize'], + offset: 'uint32' + }, + + 'SequenceRecord': { + dnaSize: 'uint32', + nBlockCount: 'uint32', + nBlockStarts: ['array', 'uint32', 'nBlockCount'], + nBlockSizes: ['array', 'uint32', 'nBlockCount'], + // The masks can be quite large (~2MB for chr1) and we mostly don't care + // about them. So we ignore them, but we do need to know their length. + maskBlockCount: 'uint32', + // maskBlockStarts: ['array', 'uint32', 'maskBlockCount'] + // maskBlockSizes: ['array', 'uint32', 'maskBlockCount'] + // reserved: 'uint32' + } +}; + +module.exports = {TYPE_SET}; diff --git a/test/ReadableView-test.js b/test/ReadableView-test.js deleted file mode 100644 index c2caff32..00000000 --- a/test/ReadableView-test.js +++ /dev/null @@ -1,72 +0,0 @@ -var chai = require('chai'); -var expect = chai.expect; - -var ReadableView = require('../src/ReadableView'); - -describe('ReadableView', function() { - it('should read 8-bit unsigned ints', function() { - var u8 = new Uint8Array(5); - u8[0] = 100; - u8[1] = 255; - u8[2] = 0; - u8[3] = 33; - u8[4] = 127; - - var bytes = new ReadableView(new DataView(u8.buffer)); - expect(bytes.tell()).to.equal(0); - expect(bytes.bytesRemaining()).to.equal(5); - expect(bytes.readUint8()).to.equal(100); - expect(bytes.tell()).to.equal(1); - expect(bytes.bytesRemaining()).to.equal(4); - expect(bytes.readUint8()).to.equal(255); - expect(bytes.readUint8()).to.equal(0); - expect(bytes.readUint8()).to.equal(33); - expect(bytes.readUint8()).to.equal(127); - expect(bytes.bytesRemaining()).to.equal(0); - expect(bytes.tell()).to.equal(5); - }); - - it('should read strings', function() { - var u8 = new Uint8Array(6); - u8[0] = 4; - u8[1] = '2'.charCodeAt(0); - u8[2] = 'b'.charCodeAt(0); - u8[3] = 'i'.charCodeAt(0); - u8[4] = 't'.charCodeAt(0); - u8[5] = '?'.charCodeAt(0); - - var bytes = new ReadableView(new DataView(u8.buffer)); - expect(bytes.tell()).to.equal(0); - expect(bytes.bytesRemaining()).to.equal(6); - expect(bytes.readUint8()).to.equal(4); - expect(bytes.readAscii(4)).to.equal('2bit'); - expect(bytes.tell()).to.equal(5); - expect(bytes.bytesRemaining()).to.equal(1); - expect(bytes.readAscii(1)).to.equal('?'); - expect(bytes.bytesRemaining()).to.equal(0); - }); - - it('should read uint32 arrays', function() { - var u32 = new Uint32Array(5); - u32[0] = 1; - u32[1] = 2; - u32[2] = 12345678; - u32[3] = 1234567890; - u32[4] = 3; - - var bytes = new ReadableView(new DataView(u32.buffer)); - expect(bytes.tell()).to.equal(0); - expect(bytes.bytesRemaining()).to.equal(20); - expect(bytes.readUint32Array(5)).to.deep.equal([1,2,12345678,1234567890,3]); - expect(bytes.tell()).to.equal(20); - expect(bytes.bytesRemaining()).to.equal(0); - }); - - it('should read a large uint32', function() { - var u32 = new Uint32Array(1); - u32[0] = 0xebf28987; - - var bytes = new ReadableView(new DataView(u32.buffer)); - expect(bytes.readUint32()).to.equal(0xebf28987); - }); -}); From a2e43f3c0374cb39a031bc1db0eadaf1eae489a6 Mon Sep 17 00:00:00 2001 From: Dan Vanderkam Date: Sat, 14 Mar 2015 11:21:43 -0400 Subject: [PATCH 4/4] Expand BigBed tests & cleanup --- src/BigBed.js | 114 +++++++++++++++++++++++-------------- src/Controls.js | 11 ++-- src/TwoBit.js | 1 - src/formats/helpers.js | 1 + src/formats/twoBitTypes.js | 1 - src/main.js | 6 -- test/BigBed-test.js | 62 +++++++++++++++++--- test/jbinary-test.js | 60 ------------------- types/types.js | 4 +- 9 files changed, 134 insertions(+), 126 deletions(-) delete mode 100644 test/jbinary-test.js diff --git a/src/BigBed.js b/src/BigBed.js index 38676884..5f1d0dd5 100644 --- a/src/BigBed.js +++ b/src/BigBed.js @@ -21,19 +21,16 @@ function parseHeader(buffer) { // TODO: check Endianness using magic. Possibly use jDataView.littleEndian // to flip the endianness for jBinary consumption. // NB: dalliance doesn't support big endian formats. - var jb = new jBinary(buffer, bbi.TYPE_SET); - var header = jb.read('Header'); - - return header; + return new jBinary(buffer, bbi.TYPE_SET).read('Header'); } +// The "CIR" tree contains a mapping from sequence -> block offsets. +// It stands for "Chromosome Index R tree" function parseCirTree(buffer) { - var jb = new jBinary(buffer, bbi.TYPE_SET); - var cirTree = jb.read('CirTree'); - - return cirTree; + return new jBinary(buffer, bbi.TYPE_SET).read('CirTree'); } +// Extract a map from contig name --> contig ID from the bigBed header. function generateContigMap(twoBitHeader): {[key:string]: number} { // Just assume it's a flat "tree" for now. var nodes = twoBitHeader.chromosomeTree.nodes.contents; @@ -46,6 +43,16 @@ function generateContigMap(twoBitHeader): {[key:string]: number} { })); } +// Generate the reverse map from contig ID --> contig name. +function reverseContigMap(contigMap: {[key:string]: number}): Array { + var ary = []; + _.forEach(contigMap, (index, name) => { + ary[index] = name; + }); + return ary; +} + +// Map contig name to contig ID. Leading "chr" is optional. function getContigId(contigMap, contig) { if (contig in contigMap) { return contigMap[contig]; @@ -57,15 +64,7 @@ function getContigId(contigMap, contig) { return null; } -function reverseContigMap(contigMap: {[key:string]: number}): Array { - var ary = []; - _.forEach(contigMap, (index, name) => { - ary[index] = name; - }); - return ary; -} - -// Get all blocks in the file containing features which intersect with contigRange. +// Find all blocks containing features which intersect with contigRange. function findOverlappingBlocks(twoBitHeader, cirTree, contigRange) { // Do a recursive search through the index tree var matchingBlocks = []; @@ -101,32 +100,74 @@ function extractFeaturesInRange(buffer, dataRange, blocks, contigRange) { var beds = jb.read('BedBlock'); beds = beds.filter(function(bed) { - var bedInterval = new ContigInterval(bed.chrId, bed.start, bed.stop); - var r = contigRange.intersects(bedInterval); - return r; + // Note: BED intervals are explicitly half-open. + // The "- 1" converts them to closed intervals for ContigInterval. + var bedInterval = new ContigInterval(bed.chrId, bed.start, bed.stop - 1); + return contigRange.intersects(bedInterval); }); return beds; })); } +// Fetch the relevant blocks from the bigBed file and extract the features +// which overlap the given range. +function fetchFeatures(contigRange, header, cirTree, contigMap, remoteFile) { + var blocks = findOverlappingBlocks(header, cirTree, contigRange); + if (blocks.length == 0) { + return []; + } + + // Find the range in the file which contains all relevant blocks. + // In theory there could be gaps between blocks, but it's hard to see how. + var range = Interval.boundingInterval( + blocks.map(n => new Interval(+n.offset, n.offset+n.size))); + + return remoteFile.getBytes(range.start, range.length()) + .then(buffer => { + var reverseMap = reverseContigMap(contigMap); + var features = extractFeaturesInRange(buffer, range, blocks, contigRange) + features.forEach(f => { + f.contig = reverseMap[f.chrId]; + delete f.chrId; + }); + return features; + }); +} + + +type BedRow = { + // Half-open interval for the BED row. + contig: string; + start: number; + stop: number; + // Remaining fields in the BED row (typically tab-delimited) + rest: string; +} + class BigBed { remoteFile: RemoteFile; header: Q.Promise; cirTree: Q.Promise; + contigMap: Q.Promise<{[key:string]: number}>; + /** + * Prepare to request features from a remote bigBed file. + * The remote source must support HTTP Range headers. + * This will kick off several async requests for portions of the file. + */ constructor(url: string) { this.remoteFile = new RemoteFile(url); this.header = this.remoteFile.getBytes(0, 64*1024).then(parseHeader); this.contigMap = this.header.then(generateContigMap); - // Next: fetch [header.unzoomedIndexOffset, zoomHeaders[0].dataOffset] and parse - // the "CIR" tree. + // Next: fetch the block index and parse out the "CIR" tree. this.cirTree = this.header.then(header => { // zoomHeaders[0].dataOffset is the next entry in the file. // We assume the "cirTree" section goes all the way to that point. // Lacking zoom headers, assume it's 4k. + // TODO: fetch more than 4k if necessary var start = header.unzoomedIndexOffset, zoomHeader = header.zoomHeaders[0], length = zoomHeader ? zoomHeader.dataOffset - start : 4096; @@ -139,9 +180,13 @@ class BigBed { this.cirTree.done(); } - // Returns all BED entries which overlap the range. - // TODO: factor logic out into a helper - getFeaturesInRange(contig: string, start: number, stop: number): Q.Promise { + /** + * Returns all BED entries which overlap the range. + * Note: while the requested range is inclusive on both ends, ranges in + * bigBed format files are half-open (inclusive at the start, exclusive at + * the end). + */ + getFeaturesInRange(contig: string, start: number, stop: number): Q.Promise> { return Q.spread([this.header, this.cirTree, this.contigMap], (header, cirTree, contigMap) => { var contigIx = getContigId(contigMap, contig); @@ -149,24 +194,7 @@ class BigBed { throw `Invalid contig ${contig}`; } var contigRange = new ContigInterval(contigIx, start, stop); - - var blocks = findOverlappingBlocks(header, cirTree, contigRange); - if (blocks.length == 0) { - return []; - } - - var range = Interval.boundingInterval( - blocks.map(n => new Interval(+n.offset, n.offset+n.size))); - return this.remoteFile.getBytes(range.start, range.length()) - .then(buffer => { - var reverseMap = reverseContigMap(contigMap); - var features = extractFeaturesInRange(buffer, range, blocks, contigRange) - features.forEach(f => { - f.contig = reverseMap[f.chrId]; - delete f.chrId; - }); - return features; - }); + return fetchFeatures(contigRange, header, cirTree, contigMap, this.remoteFile); }); } } diff --git a/src/Controls.js b/src/Controls.js index 3801d09f..c6a9879d 100644 --- a/src/Controls.js +++ b/src/Controls.js @@ -14,8 +14,7 @@ var Controls = React.createClass({ // XXX: can we be more specific than this with Flow? onChange: React.PropTypes.func.isRequired }, - makeRange: function() { - // XXX Removing the Number() should lead to type errors, but doesn't. + makeRange: function(): GenomeRange { return { contig: this.refs.contig.getDOMNode().value, start: Number(this.refs.start.getDOMNode().value), @@ -35,8 +34,10 @@ var Controls = React.createClass({ this.refs.start.getDOMNode().value = r.start; this.refs.stop.getDOMNode().value = r.stop; - var contigIdx = this.props.contigList.indexOf(r.contig); - this.refs.contig.getDOMNode().selectedIndex = contigIdx; + if (this.props.contigList) { + var contigIdx = this.props.contigList.indexOf(r.contig); + this.refs.contig.getDOMNode().selectedIndex = contigIdx; + } }, render: function(): any { var contigOptions = this.props.contigList @@ -56,7 +57,7 @@ var Controls = React.createClass({ ); }, - componentDidUpdate: function(prevProps, prevState) { + componentDidUpdate: function(prevProps: Object) { if (!_.isEqual(prevProps.range, this.props.range)) { this.updateRangeUI(); } diff --git a/src/TwoBit.js b/src/TwoBit.js index 1979e50f..d8552e7c 100644 --- a/src/TwoBit.js +++ b/src/TwoBit.js @@ -37,7 +37,6 @@ type SequenceRecord = { type TwoBitHeader = { sequenceCount: number; - reserved: number; sequences: Array; } diff --git a/src/formats/helpers.js b/src/formats/helpers.js index 235f4a8d..cdf508cb 100644 --- a/src/formats/helpers.js +++ b/src/formats/helpers.js @@ -3,6 +3,7 @@ */ var jBinary = require('jbinary'); +// Read a jBinary type at an offset in the buffer specified by another field. function typeAtOffset(typeName, offsetFieldName) { return jBinary.Template({ baseType: typeName, diff --git a/src/formats/twoBitTypes.js b/src/formats/twoBitTypes.js index 2d71c2c4..74934bae 100644 --- a/src/formats/twoBitTypes.js +++ b/src/formats/twoBitTypes.js @@ -13,7 +13,6 @@ var TYPE_SET = { 'Header': { magic: ['const', 'uint32', 0x1A412743, true], version: ['const', 'uint32', 0, true], - sequenceCount: 'uint32', reserved: 'uint32', diff --git a/src/main.js b/src/main.js index 90e84fd2..a1c2688c 100644 --- a/src/main.js +++ b/src/main.js @@ -1,7 +1,6 @@ /* @flow */ var React = require('react'), TwoBit = require('./TwoBit'), - BigBed = require('./BigBed'), Root = require('./Root'), createTwoBitDataSource = require('./TwoBitDataSource'); @@ -24,8 +23,3 @@ genome.getFeaturesInRange('chr1', 123000, 124000).done(); var root = React.render(, document.getElementById('root')); - -var ensembl = new BigBed('/ensGene.bb'); - -window.ensembl = ensembl; -window.genome = genome; diff --git a/test/BigBed-test.js b/test/BigBed-test.js index 56081a92..90436bd1 100644 --- a/test/BigBed-test.js +++ b/test/BigBed-test.js @@ -1,19 +1,15 @@ -// Things to test: -// - getFeatures which return no features -// - getFeatures which crosses a block boundary -// - getFeatures which crosses a contig boundary (not currently possible) - var chai = require('chai'); var expect = chai.expect; var assert = chai.assert; +var Q = require('q'); var BigBed = require('../src/BigBed'); describe('BigBed', function() { function getTestBigBed() { // This file was generated using UCSC tools: // cd kent/src/utils/bedToBigBed/tests; make - // This file is compressed, little endian and contains autoSQL. + // It is compressed, little endian, has autoSQL and two blocks. return new BigBed('/test/data/itemRgb.bb'); } @@ -22,8 +18,9 @@ describe('BigBed', function() { bb.getFeaturesInRange('chrX', 151077036, 151078532) .then(features => { - // chrX 151077031 151078198 MID_BLUE 0 - 151077031 151078198 0,0,128 - // chrX 151078198 151079365 VIOLET_RED1 0 - 151078198 151079365 255,62,150 + // Here's what these two lines in the file look like: + // chrX 151077031 151078198 MID_BLUE 0 - 151077031 151078198 0,0,128 + // chrX 151078198 151079365 VIOLET_RED1 0 - 151078198 151079365 255,62,150 expect(features).to.have.length(2); expect(features[0].contig).to.equal('chrX'); expect(features[0].start).to.equal(151077031); @@ -47,4 +44,53 @@ describe('BigBed', function() { }) .done(); }); + + it('should have inclusive ranges', function(done) { + // The matches looks like this: + // chrX 151071196 151072363 RED + // chrX 151094536 151095703 PeachPuff + var red = [151071196, 151072362], // note: stop is inclusive + peachpuff = [151094536, 151095702]; + + var bb = getTestBigBed(); + var expectN = n => features => { + expect(features).to.have.length(n); + }; + + Q.all([ + // request for precisely one row from the file. + bb.getFeaturesInRange('chrX', red[0], red[1]) + .then(expectN(1)), + // the additional base in the range hits another row. + bb.getFeaturesInRange('chrX', red[0], 1 + red[1]) + .then(expectN(2)), + // this overlaps exactly one base pair of the first feature. + bb.getFeaturesInRange('chrX', red[0] - 1000, red[0]) + .then(expectN(1)), + // but this range ends one base pair before it. + bb.getFeaturesInRange('chrX', red[0] - 1000, red[0] - 1) + .then(expectN(0)) + ]).then(() => { + done(); + }).done(); + }); + + it('should add "chr" to contig names', function(done) { + var bb = getTestBigBed(); + + bb.getFeaturesInRange('X', 151077036, 151078532) + .then(features => { + // (same as 'should extract features in a range' test) + expect(features).to.have.length(2); + expect(features[0].contig).to.equal('chrX'); + expect(features[1].contig).to.equal('chrX'); + done(); + }) + .done(); + }); + + // Things left to test: + // - getFeatures which crosses a block boundary + // - uncompressed bigBed file. }); + diff --git a/test/jbinary-test.js b/test/jbinary-test.js deleted file mode 100644 index b597bd27..00000000 --- a/test/jbinary-test.js +++ /dev/null @@ -1,60 +0,0 @@ -// This is a playground to ensure that I understand how jBinary works. -var chai = require('chai'); -var expect = chai.expect; - -var jBinary = require('jbinary'); - -describe('jBinary', function() { - it('should read two-bit headers', function() { - var twoBitTypeSet = { - 'jBinary.all': 'File', - 'jBinary.littleEndian': true, - 'File': { - magic: ['const', 'uint32', 0x1A412743, true], - version: ['const', 'uint32', 0, true], - sequenceCount: 'uint32', - reserved: 'uint32' - } - }; - - var byteArray = [ - 0x43, 0x27, 0x41, 0x1a, - 0x00, 0x00, 0x00, 0x00, - 0x5d, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00]; - var u8array = new Uint8Array(byteArray.length); - byteArray.forEach((val, idx) => { u8array[idx] = val; }); - - var jb = new jBinary(u8array.buffer, twoBitTypeSet); - var header = jb.readAll(); - - expect(header.magic).to.equal(0x1A412743); // two bit magic - expect(header.version).to.equal(0); - expect(header.sequenceCount).to.equal(93); - expect(header.reserved).to.equal(0); - }); - - it('should advance through a sequence', function() { - var uint8TypeSet = { - 'jBinary.all': 'File', - 'jBinary.littleEndian': true, - 'File': { - value: 'uint8' - } - }; - - var u8array = new Uint8Array(16); - for (var i = 0; i < 16; i++) { - u8array[i] = i * i; - } - var buffer = u8array.buffer; - - var jb = new jBinary(buffer, uint8TypeSet); - var num = 0; - while (jb.tell() < buffer.byteLength) { - var x = jb.read('File'); - expect(x).to.deep.equal({value: num * num}); - num++; - } - }); -}); diff --git a/types/types.js b/types/types.js index 5daabf22..3357c71d 100644 --- a/types/types.js +++ b/types/types.js @@ -1,5 +1,5 @@ declare class GenomeRange { contig: string; - start: number; - stop: number; // XXX inclusive or exclusive? + start: number; // inclusive + stop: number; // inclusive }