Skip to content

Commit

Permalink
Merge 616c3c1 into 6a58a5c
Browse files Browse the repository at this point in the history
  • Loading branch information
danvk committed Mar 18, 2015
2 parents 6a58a5c + 616c3c1 commit b464bed
Show file tree
Hide file tree
Showing 13 changed files with 238 additions and 99 deletions.
1 change: 1 addition & 0 deletions .flowconfig
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[ignore]
.*node_modules/flow-bin.*
.*node_modules/jsxhint.*
.*node_modules/.*mocha.*
.*build.*

[include]
Expand Down
4 changes: 2 additions & 2 deletions lib/q.js
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,10 @@ declare module "q" {
}

// If no value provided, returned promise will be of void type
declare function when(): Promise<void>;
declare function when<T, U>(): Promise<void>;

// if no fulfill, reject, or progress provided, returned promise will be of same type
declare function when<T>(value: T | IPromise<T>): Promise<T>;
declare function when<T, U>(value: T | IPromise<T>): Promise<T>;

// If a non-promise value is provided, it will not reject or progress
declare function when<T, U>(value: T | IPromise<T>, onFulfilled: (val: T) => U | IPromise<U>, onRejected?: (reason: any) => U | IPromise<U>, onProgress?: (progress: any) => any): Promise<U>;
Expand Down
4 changes: 4 additions & 0 deletions lib/underscore.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,9 @@ declare module "underscore" {
declare function flatten<S>(a: S[][]): S[];

declare function chain<S>(obj: S): any;
declare function any<T>(list: Array<T>, pred: (el: T)=>boolean): boolean;

declare function each<T>(o: {[key:string]: T}, iteratee: (val: T, key: string)=>void): void;
declare function each<T>(a: T[], iteratee: (val: T, key: string)=>void): void;
}

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"chai": "^2.0.0",
"coveralls": "^2.11.2",
"es5-shim": "^4.1.0",
"flow-bin": "^0.4.0",
"flow-bin": "^0.6.0",
"grunt": "^0.4.5",
"grunt-browserify": "^3.3.0",
"grunt-contrib-connect": "^0.9.0",
Expand Down
119 changes: 88 additions & 31 deletions src/BigBed.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ function parseCirTree(buffer) {
return new jBinary(buffer, bbi.TYPE_SET).read('CirTree');
}

// TODO: create an "ImmediateTwoBit" class and make most of the following
// functions methods on it.

// Extract a map from contig name --> contig ID from the bigBed header.
function generateContigMap(twoBitHeader): {[key:string]: number} {
// Just assume it's a flat "tree" for now.
Expand Down Expand Up @@ -86,36 +89,36 @@ function findOverlappingBlocks(twoBitHeader, cirTree, contigRange) {
return matchingBlocks;
}

function extractFeaturesInRange(buffer, dataRange, blocks, contigRange) {
return _.flatten(blocks.map(block => {
var blockOffset = block.offset - dataRange.start,
blockLimit = blockOffset + block.size,
// TODO: where does the +2 come from? (I copied it from dalliance)
blockBuffer = buffer.slice(blockOffset + 2, blockLimit);
// TODO: only inflate if necessary
var inflatedBuffer = pako.inflateRaw(new Uint8Array(blockBuffer));

var jb = new jBinary(inflatedBuffer, bbi.TYPE_SET);
// TODO: parse only one BedEntry at a time, as many as is necessary.
var beds = jb.read('BedBlock');

beds = beds.filter(function(bed) {
// Note: BED intervals are explicitly half-open.
// The "- 1" converts them to closed intervals for ContigInterval.
var bedInterval = new ContigInterval(bed.chrId, bed.start, bed.stop - 1);
return contigRange.intersects(bedInterval);
});
function extractFeaturesFromBlock(buffer, dataRange, block) {
var blockOffset = block.offset - dataRange.start,
blockLimit = blockOffset + block.size,
// TODO: where does the +2 come from? (I copied it from dalliance)
blockBuffer = buffer.slice(blockOffset + 2, blockLimit);
// TODO: only inflate if necessary
var inflatedBuffer = pako.inflateRaw(new Uint8Array(blockBuffer));

return beds;
}));
var jb = new jBinary(inflatedBuffer, bbi.TYPE_SET);
// TODO: parse only one BedEntry at a time & use an iterator.
return jb.read('BedBlock');
}

// Fetch the relevant blocks from the bigBed file and extract the features
// which overlap the given range.
function fetchFeatures(contigRange, header, cirTree, contigMap, remoteFile) {
// bed entries have a chromosome ID. This converts that to a contig string.
// Note: modifies beds in-place.
function attachContigToBedRows(beds, contigMap) {
var reverseMap = reverseContigMap(contigMap);
beds.forEach(bed => {
bed.contig = reverseMap[bed.chrId];
delete bed.chrId;
});
return beds;
}


// Internal function for fetching features by block.
function fetchFeaturesByBlock(contigRange, header, cirTree, remoteFile): Array<BedBlock> {
var blocks = findOverlappingBlocks(header, cirTree, contigRange);
if (blocks.length == 0) {
return [];
return Q.when([]);
}

// Find the range in the file which contains all relevant blocks.
Expand All @@ -125,13 +128,36 @@ function fetchFeatures(contigRange, header, cirTree, contigMap, remoteFile) {

return remoteFile.getBytes(range.start, range.length())
.then(buffer => {
var reverseMap = reverseContigMap(contigMap);
var features = extractFeaturesInRange(buffer, range, blocks, contigRange)
features.forEach(f => {
f.contig = reverseMap[f.chrId];
delete f.chrId;
return blocks.map(block => {
var beds = extractFeaturesFromBlock(buffer, range, block);
if (block.startChromIx != block.endChromIx) {
throw `Can't handle blocks which span chromosomes!`;
}

return {
range: new ContigInterval(block.startChromIx, block.startBase, block.endBase),
rows: beds
};
});
return features;
});
}


// Fetch the relevant blocks from the bigBed file and extract the features
// which overlap the given range.
function fetchFeatures(contigRange, header, cirTree, contigMap, remoteFile) {
return fetchFeaturesByBlock(contigRange, header, cirTree, remoteFile)
.then(bedsByBlock => {
var beds = _.flatten(_.pluck(bedsByBlock, 'rows'));

beds = beds.filter(function(bed) {
// Note: BED intervals are explicitly half-open.
// The "- 1" converts them to closed intervals for ContigInterval.
var bedInterval = new ContigInterval(bed.chrId, bed.start, bed.stop - 1);
return contigRange.intersects(bedInterval);
});

return attachContigToBedRows(beds, contigMap);
});
}

Expand All @@ -145,6 +171,12 @@ type BedRow = {
rest: string;
}

// All features found in range.
type BedBlock = {
range: ContigInterval<string>;
rows: BedRow[];
}


class BigBed {
remoteFile: RemoteFile;
Expand Down Expand Up @@ -197,6 +229,31 @@ class BigBed {
return fetchFeatures(contigRange, header, cirTree, contigMap, this.remoteFile);
});
}

/**
* Returns all features in blocks overlapping the given range.
* Because these features must all be fetched, decompressed and parsed
* anyway, this can be helpful for upstream caching.
*/
getFeatureBlocksOverlapping(range: ContigInterval<string>): Q.Promise<Array<BedBlock>> {
return Q.spread([this.header, this.cirTree, this.contigMap],
(header, cirTree, contigMap) => {
var contigIx = getContigId(contigMap, range.contig);
if (contigIx === null) {
throw `Invalid contig ${contig}`;
}
var indexRange = new ContigInterval(contigIx, range.start(), range.stop());
return fetchFeaturesByBlock(indexRange, header, cirTree, this.remoteFile)
.then(featureBlocks => {
// Convert chrIds to contig strings.
featureBlocks.forEach(fb => {
fb.range.contig = reverseContigMap(contigMap)[fb.range.contig];
attachContigToBedRows(fb.rows, contigMap);
});
return featureBlocks;
});
});
}
}

module.exports = BigBed;
58 changes: 41 additions & 17 deletions src/BigBedDataSource.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ var ContigInterval = require('./ContigInterval'),


type Gene = {
position: ContigInterval;
position: ContigInterval<string>;
id: string; // transcript ID, e.g. "ENST00000269305"
strand: string; // '+' or '-'
codingRegion: Interval; // locus of coding start
Expand All @@ -30,26 +30,33 @@ type BedRow = {
// Remaining fields in the BED row (typically tab-delimited)
rest: string;
}
type BedBlock = {
range: ContigInterval<string>;
rows: BedRow[];
}

declare class BigBed {
getFeaturesInRange: (contig: string, start: number, stop: number) => Q.Promise<Array<BedRow>>;
getFeatureBlocksOverlapping(range: ContigInterval): Q.Promise<Array<BedBlock>>;
}


// Flow type for export.
type BigBedSource = {
rangeChanged: (newRange: GenomeRange) => void;
getGenesInRange: (range: ContigInterval) => Gene[];
getGenesInRange: (range: ContigInterval<string>) => Gene[];
on: (event: string, handler: Function) => void;
off: (event: string) => void;
trigger: (event: string, ...args:any) => void;
}

// The fields are described at http://genome.ucsc.edu/FAQ/FAQformat#format1
function parseBedFeature(f): Gene {
var position = new ContigInterval(f.contig, f.start, f.stop),
x = f.rest.split('\t'),
exonLengths = x[7].split(',').map(Number),
exonStarts = x[8].split(',').map(Number),
// exons arrays sometimes have trailing commas
exonLengths = x[7].replace(/,*$/, '').split(',').map(Number),
exonStarts = x[8].replace(/,*$/, '').split(',').map(Number),
exons = _.zip(exonStarts, exonLengths)
.map(function([start, length]) {
return new Interval(f.start + start, f.start + start + length);
Expand All @@ -69,27 +76,43 @@ function parseBedFeature(f): Gene {

function createBigBedDataSource(remoteSource: BigBed): BigBedSource {
// Collection of genes that have already been loaded.
var genes: Array<Gene> = [];
window.genes = genes;
var genes: {[key:string]: Gene} = {};

// Ranges for which we have complete information -- no need to hit network.
var coveredRanges: Array<ContigInterval<string>> = []

function addGene(newGene) {
if (!_.findWhere(genes, {id: newGene.id})) {
genes.push(newGene);
if (!genes[newGene.id]) {
genes[newGene.id] = newGene;
}
}

function getGenesInRange(range: ContigInterval): Gene[] {
function getGenesInRange(range: ContigInterval<string>): Gene[] {
if (!range) return [];
return genes.filter(gene => range.intersects(gene.position));
var results = [];
_.each(genes, gene => {
if (range.intersects(gene.position)) {
results.push(gene);
}
});
return results;
}

function fetch(range: GenomeRange) {
// TODO: add an API for requesting the entire block of genes.
return remoteSource.getFeaturesInRange(range.contig, range.start, range.stop)
.then(features => {
var genes = features.map(parseBedFeature);
genes.forEach(gene => addGene(gene));
});
var interval = new ContigInterval(range.contig, range.start, range.stop);

// Check if this interval is already in the cache.
if (_.any(coveredRanges, r => r.intersects(interval))) {
return Q.when();
}

return remoteSource.getFeatureBlocksOverlapping(interval).then(featureBlocks => {
featureBlocks.forEach(fb => {
coveredRanges.push(fb.range);
var genes = fb.rows.map(parseBedFeature);
genes.forEach(gene => addGene(gene));
});
});
}

var o = {
Expand All @@ -102,7 +125,8 @@ function createBigBedDataSource(remoteSource: BigBed): BigBedSource {

// These are here to make Flow happy.
on: () => {},
off: () => {}
off: () => {},
trigger: () => {}
};
_.extend(o, Events); // Make this an event emitter

Expand Down
8 changes: 4 additions & 4 deletions src/ContigInterval.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ var Interval = require('./Interval');
* The contig may be either a string ("chr22") or a number (in case the contigs
* are indexed, for example).
*/
class ContigInterval {
contig: string|number;
class ContigInterval<T: (number|string)> {
contig: T;
interval: Interval;

constructor(contig: string|number, start: number, stop: number) {
constructor(contig: T, start: number, stop: number) {
this.contig = contig;
this.interval = new Interval(start, stop);
}
Expand All @@ -28,7 +28,7 @@ class ContigInterval {
return this.interval.length();
}

intersects(other: ContigInterval): boolean {
intersects(other: ContigInterval<T>): boolean {
return (this.contig === other.contig &&
this.interval.intersects(other.interval));
}
Expand Down
Loading

0 comments on commit b464bed

Please sign in to comment.