Skip to content

Commit

Permalink
Merge e8ace77 into a2f4298
Browse files Browse the repository at this point in the history
  • Loading branch information
danvk committed Mar 14, 2015
2 parents a2f4298 + e8ace77 commit bdef8e1
Show file tree
Hide file tree
Showing 21 changed files with 567 additions and 357 deletions.
6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@
"dependencies": {
"backbone": "^1.1.2",
"d3": "^3.5.5",
"jbinary": "^2.1.2",
"pako": "^0.2.5",
"q": "^1.1.2",
"react": "^0.12.2",
"underscore": "^1.7.0"
},
"devDependencies": {
"arraybuffer-slice": "^0.1.2",
"chai": "^2.0.0",
"coveralls": "^2.11.2",
"es5-shim": "^4.1.0",
Expand All @@ -43,7 +46,6 @@
"react-tools": "^0.12.2",
"reactify": "^1.0.0",
"sinon": "^1.12.2",
"source-map": "^0.3.0",
"text-encoding": "^0.5.2"
"source-map": "^0.3.0"
}
}
174 changes: 174 additions & 0 deletions src/BigBed.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/**
* Parser for bigBed format.
* Based on UCSC's src/inc/bbiFile.h
*/
'use strict';

var Q = require('q'),
_ = require('underscore'),
jBinary = require('jbinary'),
pako = require('pako'); // for gzip inflation


var RemoteFile = require('./RemoteFile'),
Interval = require('./Interval'),
ContigInterval = require('./ContigInterval'),
utils = require('./utils.js'),
bbi = require('./formats/bbi');


function parseHeader(buffer) {
// TODO: check Endianness using magic. Possibly use jDataView.littleEndian
// to flip the endianness for jBinary consumption.
// NB: dalliance doesn't support big endian formats.
var jb = new jBinary(buffer, bbi.TYPE_SET);
var header = jb.read('Header');

return header;
}

function parseCirTree(buffer) {
var jb = new jBinary(buffer, bbi.TYPE_SET);
var cirTree = jb.read('CirTree');

return cirTree;
}

function generateContigMap(twoBitHeader): {[key:string]: number} {
// Just assume it's a flat "tree" for now.
var nodes = twoBitHeader.chromosomeTree.nodes.contents;
if (!nodes) {
throw 'Invalid chromosome tree';
}
return _.object(nodes.map(function({id, key}) {
// remove trailing nulls from the key string
return [key.replace(/\0.*/, ''), id];
}));
}

function getContigId(contigMap, contig) {
if (contig in contigMap) {
return contigMap[contig];
}
var chr = 'chr' + contig;
if (chr in contigMap) {
return contigMap[chr];
}
return null;
}

function reverseContigMap(contigMap: {[key:string]: number}): Array<string> {
var ary = [];
_.forEach(contigMap, (index, name) => {
ary[index] = name;
});
return ary;
}

// Get all blocks in the file containing features which intersect with contigRange.
function findOverlappingBlocks(twoBitHeader, cirTree, contigRange) {
// Do a recursive search through the index tree
var matchingBlocks = [];
var tupleRange = [[contigRange.contig, contigRange.start()],
[contigRange.contig, contigRange.stop()]];
var find = function(node) {
if (node.contents) {
node.contents.forEach(find);
} else {
var nodeRange = [[node.startChromIx, node.startBase],
[node.endChromIx, node.endBase]];
if (utils.tupleRangeOverlaps(nodeRange, tupleRange)) {
matchingBlocks.push(node);
}
}
};
find(cirTree.blocks);

return matchingBlocks;
}

function extractFeaturesInRange(buffer, dataRange, blocks, contigRange) {
return _.flatten(blocks.map(block => {
var blockOffset = block.offset - dataRange.start,
blockLimit = blockOffset + block.size,
// TODO: where does the +2 come from? (I copied it from dalliance)
blockBuffer = buffer.slice(blockOffset + 2, blockLimit);
// TODO: only inflate if necessary
var inflatedBuffer = pako.inflateRaw(new Uint8Array(blockBuffer));

var jb = new jBinary(inflatedBuffer, bbi.TYPE_SET);
// TODO: parse only one BedEntry at a time, as many as is necessary.
var beds = jb.read('BedBlock');

beds = beds.filter(function(bed) {
var bedInterval = new ContigInterval(bed.chrId, bed.start, bed.stop);
var r = contigRange.intersects(bedInterval);
return r;
});

return beds;
}));
}


class BigBed {
remoteFile: RemoteFile;
header: Q.Promise<any>;
cirTree: Q.Promise<any>;

constructor(url: string) {
this.remoteFile = new RemoteFile(url);
this.header = this.remoteFile.getBytes(0, 64*1024).then(parseHeader);
this.contigMap = this.header.then(generateContigMap);

// Next: fetch [header.unzoomedIndexOffset, zoomHeaders[0].dataOffset] and parse
// the "CIR" tree.
this.cirTree = this.header.then(header => {
// zoomHeaders[0].dataOffset is the next entry in the file.
// We assume the "cirTree" section goes all the way to that point.
// Lacking zoom headers, assume it's 4k.
var start = header.unzoomedIndexOffset,
zoomHeader = header.zoomHeaders[0],
length = zoomHeader ? zoomHeader.dataOffset - start : 4096;
return this.remoteFile.getBytes(start, length).then(parseCirTree);
});

// XXX: are these necessary? what's the right way to propagate errors?
this.header.done();
this.contigMap.done();
this.cirTree.done();
}

// Returns all BED entries which overlap the range.
// TODO: factor logic out into a helper
getFeaturesInRange(contig: string, start: number, stop: number): Q.Promise<any> {
return Q.spread([this.header, this.cirTree, this.contigMap],
(header, cirTree, contigMap) => {
var contigIx = getContigId(contigMap, contig);
if (contigIx === null) {
throw `Invalid contig ${contig}`;
}
var contigRange = new ContigInterval(contigIx, start, stop);

var blocks = findOverlappingBlocks(header, cirTree, contigRange);
if (blocks.length == 0) {
return [];
}

var range = Interval.boundingInterval(
blocks.map(n => new Interval(+n.offset, n.offset+n.size)));
return this.remoteFile.getBytes(range.start, range.length())
.then(buffer => {
var reverseMap = reverseContigMap(contigMap);
var features = extractFeaturesInRange(buffer, range, blocks, contigRange)
features.forEach(f => {
f.contig = reverseMap[f.chrId];
delete f.chrId;
});
return features;
});
});
}
}

module.exports = BigBed;
67 changes: 0 additions & 67 deletions src/ReadableView.js

This file was deleted.

11 changes: 7 additions & 4 deletions src/RemoteFile.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,22 @@ class RemoteFile {
url: string;
fileLength: number;
chunks: Array<Chunk>; // regions of file that have already been loaded.
numNetworkRequests: number; // track this for debugging/testing

constructor(url: string) {
this.url = url;
this.fileLength = -1; // unknown
this.chunks = [];
this.numNetworkRequests = 0;
}

getBytes(start: number, length: number): Q.Promise<DataView> {
getBytes(start: number, length: number): Q.Promise<ArrayBuffer> {
var stop = start + length;
// First check the cache.
for (var i = 0; i < this.chunks.length; i++) {
var chunk = this.chunks[i];
if (chunk.start <= start && chunk.stop >= stop) {
return Q.when(new DataView(chunk.buffer, start - chunk.start, length));
return Q.when(chunk.buffer.slice(start - chunk.start, stop - chunk.start));
}
}

Expand All @@ -40,7 +42,7 @@ class RemoteFile {
return this.getFromNetwork(start, start + length - 1);
}

getFromNetwork(start: number, stop: number): Q.Promise<DataView> {
getFromNetwork(start: number, stop: number): Q.Promise<ArrayBuffer> {
var deferred = Q.defer();

var xhr = new XMLHttpRequest();
Expand All @@ -55,10 +57,11 @@ class RemoteFile {

var newChunk = { start, stop: start + buffer.byteLength - 1, buffer };
remoteFile.chunks.push(newChunk);
deferred.resolve(new DataView(buffer));
deferred.resolve(buffer);
};

// TODO: `reject`, `notify` on progress
this.numNetworkRequests++;
xhr.send();

return deferred.promise;
Expand Down
Loading

0 comments on commit bdef8e1

Please sign in to comment.