Skip to content

Commit

Permalink
Use index chunks in BaiFile
Browse files Browse the repository at this point in the history
  • Loading branch information
danvk committed Apr 15, 2015
1 parent cbadd5e commit e2962f7
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 32 deletions.
83 changes: 54 additions & 29 deletions src/bai.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

import type * as RemoteFile from './RemoteFile';
import type * as ContigInterval from './ContigInterval';
import type * as Q from 'q';
import type * as VirtualOffset from './VirtualOffset';

var bamTypes = require('./formats/bamTypes');
var jBinary = require('jbinary');
var jDataView = require('jdataview');
var _ = require('underscore');
var jBinary = require('jbinary'),
jDataView = require('jdataview'),
_ = require('underscore'),
Q = require('q'),
bamTypes = require('./formats/bamTypes');


// In the event that index chunks aren't available from an external source, it
Expand Down Expand Up @@ -115,45 +115,66 @@ function optimizeChunkList(chunkList: Chunk[], minimumOffset: VirtualOffset): Ch
return newChunks;
}

// This version of BaiFile is not completely immediate, but it does guarantee
// that the index chunks are available.
class ImmediateBaiFile {
buffer: ArrayBuffer;
buffer: ?ArrayBuffer;
remoteFile: RemoteFile;
indexChunks: Object;

constructor(buffer: ArrayBuffer) {
constructor(buffer: ?ArrayBuffer, remoteFile: RemoteFile, indexChunks?: Object) {
this.buffer = buffer;
this.indexChunks = computeIndexChunks(buffer);
this.remoteFile = remoteFile;
if (buffer) {
this.indexChunks = computeIndexChunks(buffer);
} else {
if (indexChunks) {
this.indexChunks = indexChunks;
} else {
throw 'Without index chunks, the entire BAI buffer must be loaded';
}
}
}

getChunksForInterval(range: ContigInterval<number>): Chunk[] {
getChunksForInterval(range: ContigInterval<number>): Q.Promise<Chunk[]> {
if (range.contig < 0 || range.contig > this.indexChunks.chunks.length) {
throw `Invalid contig ${range.contig}`;
}

var bins = reg2bins(range.start(), range.stop() + 1);

var contigIndex = this.indexForContig(range.contig);
return this.indexForContig(range.contig).then(contigIndex => {
var chunks = _.chain(contigIndex.bins)
.filter(b => bins.indexOf(b.bin) >= 0)
.map(b => readChunks(b.chunks))
.flatten()
.value();

var chunks = _.chain(contigIndex.bins)
.filter(b => bins.indexOf(b.bin) >= 0)
.map(b => readChunks(b.chunks))
.flatten()
.value();
var linearIndex = readIntervals(contigIndex.intervals);
var startIdx = Math.max(0, Math.floor(range.start() / 16384));
var minimumOffset = linearIndex[startIdx];

var linearIndex = readIntervals(contigIndex.intervals);
var startIdx = Math.max(0, Math.floor(range.start() / 16384));
var minimumOffset = linearIndex[startIdx];
chunks = optimizeChunkList(chunks, minimumOffset);

chunks = optimizeChunkList(chunks, minimumOffset);

return chunks;
return chunks;
});
}

// Retrieve and parse the index for a particular contig.
// TODO: make this async
indexForContig(contig: number): Object {
indexForContig(contig: number): Q.Promise<Object> {
var [start, stop] = this.indexChunks.chunks[contig];
var jb = new jBinary(this.buffer.slice(start, stop), bamTypes.TYPE_SET);
return jb.read('BaiIndex');
return this.getSlice(start, stop).then(buffer => {
var jb = new jBinary(buffer, bamTypes.TYPE_SET);
return jb.read('BaiIndex');
});
}

getSlice(start: number, stop: number): Q.Promise<ArrayBuffer> {
if (this.buffer) {
return Q.when(this.buffer.slice(start, stop));
} else {
return this.remoteFile.getBytes(start, stop - start + 1);
}
}
}

Expand All @@ -162,11 +183,15 @@ class BaiFile {
remoteFile: RemoteFile;
immediate: Q.Promise<ImmediateBaiFile>;

constructor(remoteFile: RemoteFile) {
constructor(remoteFile: RemoteFile, indexChunks?: Object) {
this.remoteFile = remoteFile;
this.immediate = remoteFile.getAll().then(buf => {
return new ImmediateBaiFile(buf);
});
if (indexChunks) {
this.immediate = Q.when(new ImmediateBaiFile(null, remoteFile, indexChunks));
} else {
this.immediate = remoteFile.getAll().then(buf => {
return new ImmediateBaiFile(buf, remoteFile, indexChunks);
});
}
this.immediate.done();
}

Expand Down
6 changes: 4 additions & 2 deletions src/bam.js
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,11 @@ class Bam {
index: ?BaiFile;
header: Q.Promise<Object>;

constructor(remoteFile: RemoteFile, remoteIndexFile?: RemoteFile) {
constructor(remoteFile: RemoteFile,
remoteIndexFile?: RemoteFile,
indexChunks?: Object) {
this.remoteFile = remoteFile;
this.index = remoteIndexFile ? new BaiFile(remoteIndexFile) : null;
this.index = remoteIndexFile ? new BaiFile(remoteIndexFile, indexChunks) : null;

var sizePromise = this.index ? this.index.getHeaderSize() : Q.when(2 * 65535);
this.header = sizePromise.then(size => {
Expand Down
27 changes: 27 additions & 0 deletions test/RecordedRemoteFile.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
* This is a thin wrapper around RemoteFile which records all network requests.
* @flow
*/

'use strict';

import type * as Q from 'q';

var RemoteFile = require('../src/RemoteFile'),
Interval = require('../src/Interval');

class RecordedRemoteFile extends RemoteFile {
requests: Array<Interval>;

constructor(url: string) {
super(url);
this.requests = [];
}

getFromNetwork(start: number, stop: number): Q.Promise<ArrayBuffer> {
this.requests.push(new Interval(start, stop));
return super.getFromNetwork(start, stop);
}
}

module.exports = RecordedRemoteFile;
24 changes: 23 additions & 1 deletion test/bai-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ var jBinary = require('jbinary');
var BaiFile = require('../src/bai'),
bamTypes = require('../src/formats/bamTypes'),
ContigInterval = require('../src/ContigInterval'),
RemoteFile = require('../src/RemoteFile');
RemoteFile = require('../src/RemoteFile'),
RecordedRemoteFile = require('./RecordedRemoteFile');

function chunkToString(chunk) {
return `${chunk.chunk_beg}-${chunk.chunk_end}`;
Expand Down Expand Up @@ -47,6 +48,27 @@ describe('BAI', function() {
}).done();
});

it('should use index chunks', function(done) {
var remoteFile = new RecordedRemoteFile('/test/data/index_test.bam.bai');
var bai = new BaiFile(remoteFile,
{
'chunks': [[8, 144], [144, 13776]],
'minBlockIndex': 65536
});

// contig 0 = chrM
bai.getChunksForInterval(new ContigInterval(0, 10400, 10600)).then(chunks => {
expect(chunks).to.have.length(1);
expect(chunkToString(chunks[0])).to.equal('0:8384-0:11328');

var requests = remoteFile.requests;
expect(requests).to.have.length(1);
expect(requests[0].toString()).to.equal('[8, 144]');

done();
}).done();
});

it('should compute index chunks', function(done) {
var bai = new BaiFile(new RemoteFile('/test/data/index_test.bam.bai'));
bai.immediate.then(imm => {
Expand Down

0 comments on commit e2962f7

Please sign in to comment.