From 8051e5c1c7efae1c4becf4267d4072c7eca95bc3 Mon Sep 17 00:00:00 2001
From: Dan Vanderkam <danvdk@gmail.com>
Date: Wed, 11 Mar 2015 12:37:02 -0400
Subject: [PATCH 1/4] Parse bigBed headers using jBinary.

---
 package.json              |   2 +
 src/BigBed.js             | 319 ++++++++++++++++++++++++++++++++++++++
 src/ReadableView.js       |  15 +-
 src/RemoteFile.js         |   1 +
 src/main.js               |   6 +
 test/BigBed-test.js       |   4 +
 test/ReadableView-test.js |   8 +
 test/jbinary-test.js      |  59 +++++++
 8 files changed, 409 insertions(+), 5 deletions(-)
 create mode 100644 src/BigBed.js
 create mode 100644 test/BigBed-test.js
 create mode 100644 test/jbinary-test.js

diff --git a/package.json b/package.json
index 28db7f47..9bc196f3 100644
--- a/package.json
+++ b/package.json
@@ -16,6 +16,8 @@
   "dependencies": {
     "backbone": "^1.1.2",
     "d3": "^3.5.5",
+    "jbinary": "^2.1.2",
+    "pako": "^0.2.5",
     "q": "^1.1.2",
     "react": "^0.12.2",
     "underscore": "^1.7.0"
diff --git a/src/BigBed.js b/src/BigBed.js
new file mode 100644
index 00000000..6ee1cc3a
--- /dev/null
+++ b/src/BigBed.js
@@ -0,0 +1,319 @@
+/**
+ * Parser for bigBed format.
+ * Based on UCSC's src/inc/bbiFile.h
+ */
+'use strict';
+
+var Q = require('q'),
+    _ = require('underscore'),
+    jBinary = require('jbinary'),
+    pako = require('pako');  // for gzip inflation
+    
+
+var ReadableView = require('./ReadableView'),
+    RemoteFile = require('./RemoteFile');
+
+function typeAtOffset(typeName, offsetFieldName) {
+  return jBinary.Template({
+      baseType: typeName,
+      read: function(context) {
+        if (+context[offsetFieldName] == 0) {
+          return null;
+        } else {
+          return this.binary.read(this.baseType, +context[offsetFieldName]);
+        }
+      }
+    });
+}
+
+var BigBedTypeSet = {
+  'jBinary.all': 'File',
+  'jBinary.littleEndian': true,
+
+  'File': {
+    _magic: ['const', 'uint32', 0x8789F2EB, true],
+    version: ['const', 'uint16', 4, true],
+    zoomLevels: 'uint16',
+    chromosomeTreeOffset: 'uint64',
+    unzoomedDataOffset: 'uint64',
+    unzoomedIndexOffset: 'uint64',
+    fieldCount: 'uint16',
+    definedFieldCount: 'uint16',
+    // 0 if no autoSql information
+    autoSqlOffset: 'uint64',
+    totalSummaryOffset: 'uint64',
+    // Size of uncompression buffer.  0 if uncompressed.
+    uncompressBufSize: 'uint32',
+    // Offset to header extension 0 if no such extension
+    // TODO: support extended headers (not used in ensGene.bb)
+    extensionOffset: 'uint64',
+    zoomHeaders: ['array', 'ZoomHeader', 'zoomLevels'],
+
+    totalSummary: typeAtOffset('TotalSummary', 'totalSummaryOffset'),
+    chromosomeTree: typeAtOffset('BPlusTree', 'chromosomeTreeOffset')
+  },
+
+  'TotalSummary': {
+    basesCovered: 'uint64',
+    minVal: 'float64',     // for bigBed minimum depth of coverage
+    maxVal: 'float64',     // for bigBed maximum depth of coverage
+    sumData: 'float64',    // for bigBed sum of coverage
+    sumSquared: 'float64'  // for bigBed sum of coverage squared
+  },
+
+  'ZoomHeader': {
+    reductionLevel: 'uint32',
+    _reserved: 'uint32',
+    dataOffset: 'uint64',
+    indexOffset: 'uint64'
+  },
+
+  'BPlusTree': {
+    magic: ['const', 'uint32', 0x78CA8C91, true],
+    // Number of children per block (not byte size of block)
+    blockSize: 'uint32',
+    // Number of significant bytes in key
+    keySize: 'uint32',
+    // Number of bytes in value
+    valSize: 'uint32',
+    // Number of items in index
+    itemCount: 'uint64',
+    _reserved2: ['skip', 4],
+    _reserved3: ['skip', 4],
+    nodes: 'BPlusTreeNode'  // ['array', 'BPlusTreeNode', 'itemCount']
+  },
+  
+  'BPlusTreeNode': {
+    isLeaf: 'uint8',  // 1 = yes, 0 = no
+    _reserved: 'uint8',
+    count: 'uint16',
+    contents: ['array', ['if', 'isLeaf', {
+      key: ['string', 'keySize'],
+      // Note: bigBed allows more general values; this is what Ensembl uses.
+      // value: ['string', 'valSize']
+      id: 'uint32',
+      size: 'uint32'
+    }, {
+      key: ['string', 'keySize'],
+      offset: 'uint64'
+    }], 'count']
+  }
+};
+
+var CirTreeTypeSet = {
+  'jBinary.all': 'File',
+  'jBinary.littleEndian': true,
+
+  'File': {
+    _magic: ['const', 'uint32', 0x2468ACE0, true],
+    blockSize: 'uint32',
+    itemCount: 'uint64',
+    startChromIx: 'uint32',
+    startBase: 'uint32',
+    endChromIx: 'uint32',
+    endBase: 'uint32',
+    fileSize: 'uint64',
+    itemsPerSlot: 'uint32',
+    _reserved: ['skip', 4],
+    blocks: 'CirNode'
+  },
+
+  'CirNode': {
+    isLeaf: 'uint8',  // 1 = yes, 0 = no
+    _reserved: 'uint8',
+    count: 'uint16',
+    contents: ['array', ['if', 'isLeaf', {
+      startChromIx: 'uint32',
+      startBase: 'uint32',
+      endChromIx: 'uint32',
+      endBase: 'uint32',
+      offset: 'uint64',
+      size: 'uint64'
+    }, {
+      startChromIx: 'uint32',
+      startBase: 'uint32',
+      endChromIx: 'uint32',
+      endBase: 'uint32',
+      offset: 'uint64',
+    }], 'count']
+  }
+};
+
+
+var BigBedBlock = {
+  'jBinary.all': 'File',
+  'jBinary.littleEndian': true,
+
+  'File': ['array', 'BedEntry'],
+  'BedEntry': {
+    'chrId': 'uint32',
+    'start': 'uint32',
+    'end': 'uint32',
+    'rest': 'string0'
+  }
+};
+
+
+function parseHeader(dataView: DataView) {
+  // TODO: check Endianness using magic. Possibly use jDataView.littleEndian
+  // to flip the endianness for jBinary consumption.
+  // NB: dalliance doesn't support big endian formats.
+  var jb = new jBinary(dataView.buffer, BigBedTypeSet);
+  var header = jb.readAll();
+  console.log(header);
+
+  return header;
+}
+
+function parseCirTree(dataView: DataView) {
+  var jb = new jBinary(dataView.buffer, CirTreeTypeSet);
+  var cirTree = jb.readAll();
+  console.log(cirTree);
+
+  return cirTree;
+}
+
+function generateContigMap(twoBitHeader): {[key:string]: number} {
+  // Just assume it's a flat "tree" for now.
+  var nodes = twoBitHeader.chromosomeTree.nodes.contents;
+  if (!nodes) {
+    throw 'Invalid chromosome tree';
+  }
+  return _.object(nodes.map(function({id, key}) {
+    // remove trailing nulls from the key string
+    return [key.replace(/\0.*/, ''), id];
+  }));
+}
+
+function getContigId(contigMap, contig) {
+  return contigMap[contig] || contigMap['chr' + contig] || null;
+}
+
+function intersectIntervals(intervals: Array<[number, number]>): [number, number] {
+  if (!intervals.length) {
+    throw 'Tried to intersect zero intervals';
+  }
+  var result = intervals[0];
+  intervals.slice(1).forEach(function([a, b]) {
+    result[0] = Math.min(a, result[0]);
+    result[1] = Math.max(b, result[1]);
+  });
+  return result;
+}
+
+// TODO: factor out into module
+var lessOrEqual = function(c1, p1, c2, p2) {
+  return c1 < c2 || (c1 == c2 && p1 <= p2);
+};
+var contains = function(startContig, startPos, endContig, endPos, contig, pos) {
+  return lessOrEqual(startContig, startPos, contig, pos) &&
+         lessOrEqual(contig, pos, endContig, endPos);
+};
+
+var overlaps = function(startContig, startBase, endContig, endBase, contig, start, stop) {
+  return contains(startContig, startBase, endContig, endBase, contig, start) ||
+         contains(startContig, startBase, endContig, endBase, contig, stop);
+};
+
+// Get a byte range in the file containing a superset of the interval.
+function findByteRange(twoBitHeader, cirTree, contigIx: number, start: number, stop: number): ?[number, number] {
+
+  // Do a recursive search through the index tree
+  var matchingIntervals = [];
+  var find = function(node) {
+    if (node.contents) {
+      node.contents.forEach(find);
+    } else {
+      if (overlaps(node.startChromIx, node.startBase,
+                   node.endChromIx, node.endBase,
+                   contigIx, start, stop)) {
+        matchingIntervals.push(node);
+      }
+    }
+  };
+  find(cirTree.blocks);
+
+  return matchingIntervals;
+
+  // Intersect the intervals.
+  // XXX UCSC allows discontiguous intervals. When would this ever happen?
+  return intersectIntervals(
+      matchingIntervals.map(n => [+n.offset, n.offset+n.size]));
+}
+
+function extractFeaturesInRange(dataView, dataRange, blocks, contigIx, start, stop) {
+  console.log('Fetched ', dataRange);
+  var buffer = dataView.buffer;
+
+  return _.flatten(blocks.map(block => {
+    var blockOffset = block.offset - dataRange[0],
+        blockLimit = blockOffset + block.size,
+        // TODO: where does the +2 come from? (I copied it from dalliance)
+        blockBuffer = buffer.slice(blockOffset + 2, blockLimit);
+    // TODO: only inflate if necessary
+    var inflatedBuffer = pako.inflateRaw(new Uint8Array(blockBuffer));
+
+    var jb = new jBinary(inflatedBuffer, BigBedBlock);
+    // TODO: parse only one record at a time, as many as is necessary.
+    var beds = jb.readAll();
+
+    console.log(beds);
+
+    beds = beds.filter(function(bed) {
+      return overlaps(bed.chrId, bed.start, bed.chrId, bed.end, contigIx, start, stop);
+    });
+
+    return beds;
+  }));
+}
+
+
+class BigBed {
+  remoteFile: RemoteFile;
+  header: Q.Promise<any>;
+  cirTree: Q.Promise<any>;
+
+  constructor(url: string) {
+    this.remoteFile = new RemoteFile(url);
+    this.header = this.remoteFile.getBytes(0, 64*1024).then(parseHeader);
+    this.contigMap = this.header.then(generateContigMap);
+
+    // Next: fetch [header.unzoomedIndexOffset, zoomHeaders[0].dataOffset] and parse
+    // the "CIR" tree.
+    this.cirTree = this.header.then(header => {
+      // zoomHeaders[0].dataOffset is the next entry in the file.
+      // We assume the "cirTree" section goes all the way to that point.
+      var start = header.unzoomedIndexOffset,
+          length = header.zoomHeaders[0].dataOffset - start;
+      return this.remoteFile.getBytes(start, length).then(parseCirTree);
+    });
+    
+    // XXX: are these necessary? what's the right way to propagate errors?
+    this.header.done();
+    this.contigMap.done();
+    this.cirTree.done();
+  }
+
+  // Returns all BED entries which overlap the range.
+  // TODO: factor logic out into a helper
+  getFeaturesInRange(contig: string, start: number, stop: number): Q.Promise<any> {
+    return Q.spread([this.header, this.cirTree, this.contigMap],
+                    (header, cirTree, contigMap) => {
+      var contigIx = getContigId(contigMap, contig);
+      if (contigIx === null) {
+        throw `Invalid contig ${contig}`;
+      }
+
+      var blocks = findByteRange(header, cirTree, contigIx, start, stop);
+      if (!blocks) {
+        return null;  // XXX better to throw?
+      }
+      console.log(blocks);
+      var range = intersectIntervals(blocks.map(n => [+n.offset, n.offset+n.size]));
+      return this.remoteFile.getBytes(range[0], range[1] - range[0])
+          .then(dataView => extractFeaturesInRange(dataView, range, blocks, contigIx, start, stop));
+    });
+  }
+}
+
+module.exports = BigBed;
diff --git a/src/ReadableView.js b/src/ReadableView.js
index baf125e9..fb4ebab3 100644
--- a/src/ReadableView.js
+++ b/src/ReadableView.js
@@ -19,13 +19,18 @@ class ReadableView {
     return num;
   }
 
+  // Read an unsigned 16-bit integer and advance the current position.
+  readUint16(): number {
+    return this.readUint8() +
+           this.readUint8() * (1 << 8);
+  }
+
   // Read an unsigned 32-bit integer and advance the current position.
   readUint32(): number {
-    var num = this.readUint8()             |
-              this.readUint8() * (1 << 8 ) |
-              this.readUint8() * (1 << 16) |
-              this.readUint8() * (1 << 24);
-    return num;
+    return this.readUint8()             +
+           this.readUint8() * (1 << 8 ) +
+           this.readUint8() * (1 << 16) +
+           this.readUint8() * (1 << 24);
   }
 
   // Read a sequence of 32-bit integers and advance the current position.
diff --git a/src/RemoteFile.js b/src/RemoteFile.js
index 78b32a99..492e014f 100644
--- a/src/RemoteFile.js
+++ b/src/RemoteFile.js
@@ -24,6 +24,7 @@ class RemoteFile {
     this.chunks = [];
   }
 
+  // TODO: return a buffer, not a DataView
   getBytes(start: number, length: number): Q.Promise<DataView> {
     var stop = start + length;
     // First check the cache.
diff --git a/src/main.js b/src/main.js
index a1c2688c..90e84fd2 100644
--- a/src/main.js
+++ b/src/main.js
@@ -1,6 +1,7 @@
 /* @flow */
 var React = require('react'),
     TwoBit = require('./TwoBit'),
+    BigBed = require('./BigBed'),
     Root = require('./Root'),
     createTwoBitDataSource = require('./TwoBitDataSource');
 
@@ -23,3 +24,8 @@ genome.getFeaturesInRange('chr1', 123000, 124000).done();
 
 var root = React.render(<Root referenceSource={dataSource} />,
                         document.getElementById('root'));
+
+var ensembl = new BigBed('/ensGene.bb');
+
+window.ensembl = ensembl;
+window.genome = genome;
diff --git a/test/BigBed-test.js b/test/BigBed-test.js
new file mode 100644
index 00000000..b82b1b99
--- /dev/null
+++ b/test/BigBed-test.js
@@ -0,0 +1,4 @@
+// Things to test:
+// - getFeatures which return no features
+// - getFeatures which crosses a block boundary
+// - getFeatures which crosses a contig boundary (not currently possible)
diff --git a/test/ReadableView-test.js b/test/ReadableView-test.js
index 01291a56..c2caff32 100644
--- a/test/ReadableView-test.js
+++ b/test/ReadableView-test.js
@@ -61,4 +61,12 @@ describe('ReadableView', function() {
     expect(bytes.tell()).to.equal(20);
     expect(bytes.bytesRemaining()).to.equal(0);
   });
+
+  it('should read a large uint32', function() {
+    var u32 = new Uint32Array(1);
+    u32[0] = 0xebf28987;
+
+    var bytes = new ReadableView(new DataView(u32.buffer));
+    expect(bytes.readUint32()).to.equal(0xebf28987);
+  });
 });
diff --git a/test/jbinary-test.js b/test/jbinary-test.js
new file mode 100644
index 00000000..975fc2e3
--- /dev/null
+++ b/test/jbinary-test.js
@@ -0,0 +1,59 @@
+// This is a playground to ensure that I understand how jBinary works.
+var chai = require('chai');
+var expect = chai.expect;
+
+var jBinary = require('jbinary');
+
+describe('jBinary', function() {
+  it('should read two-bit headers', function() {
+    var twoBitTypeSet = {
+      'jBinary.all': 'File',
+      'jBinary.littleEndian': true,
+      'File': {
+        magic: ['const', 'uint32', 0x1A412743, true],
+        version: ['const', 'uint32', 0, true],
+        sequenceCount: 'uint32',
+        reserved: 'uint32'
+      }
+    };
+
+    var byteArray = [
+        0x43, 0x27, 0x41, 0x1a,
+        0x00, 0x00, 0x00, 0x00,
+        0x5d, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00];
+    var u8array = new Uint8Array(byteArray.length);
+    byteArray.forEach((val, idx) => { u8array[idx] = val; });
+
+    var jb = new jBinary(u8array.buffer, twoBitTypeSet);
+    var header = jb.readAll();
+    console.log(header);
+
+    expect(header.magic).to.equal(0x1A412743);  // two bit magic
+    expect(header.version).to.equal(0);
+    expect(header.sequenceCount).to.equal(93);
+    expect(header.reserved).to.equal(0);
+  });
+
+  it('should advance through a sequence', function() {
+    var uint8TypeSet = {
+      'jBinary.all': 'File',
+      'jBinary.littleEndian': true,
+      'File': {
+        value: 'uint8'
+      }
+    };
+
+    var u8array = new Uint8Array(16);
+    for (var i = 0; i < 16; i++) {
+      u8array[i] = i * i;
+    }
+    var buffer = u8array.buffer;
+
+    var jb = new jBinary(buffer, uint8TypeSet);
+    while (jb.tell() < buffer.byteLength) {
+      var x = jb.read({value: 'uint8'});
+      console.log(jb.tell(), x);
+    }
+  });
+});

From abb8f8adf9f94896cdf08c850a2fe20961f8d008 Mon Sep 17 00:00:00 2001
From: Dan Vanderkam <danvdk@gmail.com>
Date: Fri, 13 Mar 2015 11:49:25 -0400
Subject: [PATCH 2/4] BigBed test

---
 package.json                    |   4 +-
 src/BigBed.js                   | 268 ++++++++------------------------
 src/RemoteFile.js               |  12 +-
 src/TwoBit.js                   |   9 +-
 src/formats/bbi.js              | 127 +++++++++++++++
 src/formats/helpers.js          |  19 +++
 test/BigBed-test.js             |  46 ++++++
 test/FakeXMLHttpRequest-test.js |  54 -------
 test/FakeXMLHttpRequest.js      |  92 -----------
 test/RemoteFile-test.js         |  44 +++---
 test/coverage.html              |   2 +-
 test/data/0to9.txt              |   1 +
 test/data/hello.txt             |   2 +
 test/data/itemRgb.bb            | Bin 0 -> 26695 bytes
 test/data/itemRgb.bed           |  22 +++
 test/jbinary-test.js            |   7 +-
 test/runner.html                |   2 +-
 17 files changed, 325 insertions(+), 386 deletions(-)
 create mode 100644 src/formats/bbi.js
 create mode 100644 src/formats/helpers.js
 delete mode 100644 test/FakeXMLHttpRequest-test.js
 delete mode 100644 test/FakeXMLHttpRequest.js
 create mode 100644 test/data/0to9.txt
 create mode 100644 test/data/hello.txt
 create mode 100644 test/data/itemRgb.bb
 create mode 100644 test/data/itemRgb.bed

diff --git a/package.json b/package.json
index 9bc196f3..beae556e 100644
--- a/package.json
+++ b/package.json
@@ -23,6 +23,7 @@
     "underscore": "^1.7.0"
   },
   "devDependencies": {
+    "arraybuffer-slice": "^0.1.2",
     "chai": "^2.0.0",
     "coveralls": "^2.11.2",
     "es5-shim": "^4.1.0",
@@ -45,7 +46,6 @@
     "react-tools": "^0.12.2",
     "reactify": "^1.0.0",
     "sinon": "^1.12.2",
-    "source-map": "^0.3.0",
-    "text-encoding": "^0.5.2"
+    "source-map": "^0.3.0"
   }
 }
diff --git a/src/BigBed.js b/src/BigBed.js
index 6ee1cc3a..a6e275fa 100644
--- a/src/BigBed.js
+++ b/src/BigBed.js
@@ -11,164 +11,26 @@ var Q = require('q'),
     
 
 var ReadableView = require('./ReadableView'),
-    RemoteFile = require('./RemoteFile');
-
-function typeAtOffset(typeName, offsetFieldName) {
-  return jBinary.Template({
-      baseType: typeName,
-      read: function(context) {
-        if (+context[offsetFieldName] == 0) {
-          return null;
-        } else {
-          return this.binary.read(this.baseType, +context[offsetFieldName]);
-        }
-      }
-    });
-}
-
-var BigBedTypeSet = {
-  'jBinary.all': 'File',
-  'jBinary.littleEndian': true,
-
-  'File': {
-    _magic: ['const', 'uint32', 0x8789F2EB, true],
-    version: ['const', 'uint16', 4, true],
-    zoomLevels: 'uint16',
-    chromosomeTreeOffset: 'uint64',
-    unzoomedDataOffset: 'uint64',
-    unzoomedIndexOffset: 'uint64',
-    fieldCount: 'uint16',
-    definedFieldCount: 'uint16',
-    // 0 if no autoSql information
-    autoSqlOffset: 'uint64',
-    totalSummaryOffset: 'uint64',
-    // Size of uncompression buffer.  0 if uncompressed.
-    uncompressBufSize: 'uint32',
-    // Offset to header extension 0 if no such extension
-    // TODO: support extended headers (not used in ensGene.bb)
-    extensionOffset: 'uint64',
-    zoomHeaders: ['array', 'ZoomHeader', 'zoomLevels'],
-
-    totalSummary: typeAtOffset('TotalSummary', 'totalSummaryOffset'),
-    chromosomeTree: typeAtOffset('BPlusTree', 'chromosomeTreeOffset')
-  },
-
-  'TotalSummary': {
-    basesCovered: 'uint64',
-    minVal: 'float64',     // for bigBed minimum depth of coverage
-    maxVal: 'float64',     // for bigBed maximum depth of coverage
-    sumData: 'float64',    // for bigBed sum of coverage
-    sumSquared: 'float64'  // for bigBed sum of coverage squared
-  },
-
-  'ZoomHeader': {
-    reductionLevel: 'uint32',
-    _reserved: 'uint32',
-    dataOffset: 'uint64',
-    indexOffset: 'uint64'
-  },
-
-  'BPlusTree': {
-    magic: ['const', 'uint32', 0x78CA8C91, true],
-    // Number of children per block (not byte size of block)
-    blockSize: 'uint32',
-    // Number of significant bytes in key
-    keySize: 'uint32',
-    // Number of bytes in value
-    valSize: 'uint32',
-    // Number of items in index
-    itemCount: 'uint64',
-    _reserved2: ['skip', 4],
-    _reserved3: ['skip', 4],
-    nodes: 'BPlusTreeNode'  // ['array', 'BPlusTreeNode', 'itemCount']
-  },
-  
-  'BPlusTreeNode': {
-    isLeaf: 'uint8',  // 1 = yes, 0 = no
-    _reserved: 'uint8',
-    count: 'uint16',
-    contents: ['array', ['if', 'isLeaf', {
-      key: ['string', 'keySize'],
-      // Note: bigBed allows more general values; this is what Ensembl uses.
-      // value: ['string', 'valSize']
-      id: 'uint32',
-      size: 'uint32'
-    }, {
-      key: ['string', 'keySize'],
-      offset: 'uint64'
-    }], 'count']
-  }
-};
-
-var CirTreeTypeSet = {
-  'jBinary.all': 'File',
-  'jBinary.littleEndian': true,
-
-  'File': {
-    _magic: ['const', 'uint32', 0x2468ACE0, true],
-    blockSize: 'uint32',
-    itemCount: 'uint64',
-    startChromIx: 'uint32',
-    startBase: 'uint32',
-    endChromIx: 'uint32',
-    endBase: 'uint32',
-    fileSize: 'uint64',
-    itemsPerSlot: 'uint32',
-    _reserved: ['skip', 4],
-    blocks: 'CirNode'
-  },
-
-  'CirNode': {
-    isLeaf: 'uint8',  // 1 = yes, 0 = no
-    _reserved: 'uint8',
-    count: 'uint16',
-    contents: ['array', ['if', 'isLeaf', {
-      startChromIx: 'uint32',
-      startBase: 'uint32',
-      endChromIx: 'uint32',
-      endBase: 'uint32',
-      offset: 'uint64',
-      size: 'uint64'
-    }, {
-      startChromIx: 'uint32',
-      startBase: 'uint32',
-      endChromIx: 'uint32',
-      endBase: 'uint32',
-      offset: 'uint64',
-    }], 'count']
-  }
-};
+    RemoteFile = require('./RemoteFile'),
+    Interval = require('./Interval'),
+    ContigInterval = require('./ContigInterval'),
+    utils = require('./utils.js'),
+    bbi = require('./formats/bbi');
 
 
-var BigBedBlock = {
-  'jBinary.all': 'File',
-  'jBinary.littleEndian': true,
-
-  'File': ['array', 'BedEntry'],
-  'BedEntry': {
-    'chrId': 'uint32',
-    'start': 'uint32',
-    'end': 'uint32',
-    'rest': 'string0'
-  }
-};
-
-
-function parseHeader(dataView: DataView) {
+function parseHeader(buffer) {
   // TODO: check Endianness using magic. Possibly use jDataView.littleEndian
   // to flip the endianness for jBinary consumption.
   // NB: dalliance doesn't support big endian formats.
-  var jb = new jBinary(dataView.buffer, BigBedTypeSet);
-  var header = jb.readAll();
-  console.log(header);
+  var jb = new jBinary(buffer, bbi.TYPE_SET);
+  var header = jb.read('Header');
 
   return header;
 }
 
-function parseCirTree(dataView: DataView) {
-  var jb = new jBinary(dataView.buffer, CirTreeTypeSet);
-  var cirTree = jb.readAll();
-  console.log(cirTree);
+function parseCirTree(buffer) {
+  var jb = new jBinary(buffer, bbi.TYPE_SET);
+  var cirTree = jb.read('CirTree');
 
   return cirTree;
 }
@@ -186,81 +48,63 @@ function generateContigMap(twoBitHeader): {[key:string]: number} {
 }
 
 function getContigId(contigMap, contig) {
-  return contigMap[contig] || contigMap['chr' + contig] || null;
+  if (contig in contigMap) {
+    return contigMap[contig];
+  }
+  var chr = 'chr' + contig;
+  if (chr in contigMap) {
+    return contigMap[chr];
+  }
+  return null;
 }
 
-function intersectIntervals(intervals: Array<[number, number]>): [number, number] {
-  if (!intervals.length) {
-    throw 'Tried to intersect zero intervals';
-  }
-  var result = intervals[0];
-  intervals.slice(1).forEach(function([a, b]) {
-    result[0] = Math.min(a, result[0]);
-    result[1] = Math.max(b, result[1]);
+function reverseContigMap(contigMap: {[key:string]: number}): Array<string> {
+  var ary = [];
+  _.forEach(contigMap, (index, name) => {
+    ary[index] = name;
   });
-  return result;
+  return ary;
 }
 
-// TODO: factor out into module
-var lessOrEqual = function(c1, p1, c2, p2) {
-  return c1 < c2 || (c1 == c2 && p1 <= p2);
-};
-var contains = function(startContig, startPos, endContig, endPos, contig, pos) {
-  return lessOrEqual(startContig, startPos, contig, pos) &&
-         lessOrEqual(contig, pos, endContig, endPos);
-};
-
-var overlaps = function(startContig, startBase, endContig, endBase, contig, start, stop) {
-  return contains(startContig, startBase, endContig, endBase, contig, start) ||
-         contains(startContig, startBase, endContig, endBase, contig, stop);
-};
-
-// Get a byte range in the file containing a superset of the interval.
-function findByteRange(twoBitHeader, cirTree, contigIx: number, start: number, stop: number): ?[number, number] {
-
+// Get all blocks in the file containing features which intersect with contigRange.
+function findOverlappingBlocks(twoBitHeader, cirTree, contigRange) {
   // Do a recursive search through the index tree
-  var matchingIntervals = [];
+  var matchingBlocks = [];
+  var tupleRange = [[contigRange.contig, contigRange.start()],
+                    [contigRange.contig, contigRange.stop()]];
   var find = function(node) {
     if (node.contents) {
       node.contents.forEach(find);
     } else {
-      if (overlaps(node.startChromIx, node.startBase,
-                   node.endChromIx, node.endBase,
-                   contigIx, start, stop)) {
-        matchingIntervals.push(node);
+      var nodeRange = [[node.startChromIx, node.startBase],
+                       [node.endChromIx, node.endBase]];
+      if (utils.tupleRangeOverlaps(nodeRange, tupleRange)) {
+        matchingBlocks.push(node);
       }
     }
   };
   find(cirTree.blocks);
 
-  return matchingIntervals;
-
-  // Intersect the intervals.
-  // XXX UCSC allows discontiguous intervals. When would this ever happen?
-  return intersectIntervals(
-      matchingIntervals.map(n => [+n.offset, n.offset+n.size]));
+  return matchingBlocks;
 }
 
-function extractFeaturesInRange(dataView, dataRange, blocks, contigIx, start, stop) {
-  console.log('Fetched ', dataRange);
-  var buffer = dataView.buffer;
-
+function extractFeaturesInRange(buffer, dataRange, blocks, contigRange) {
   return _.flatten(blocks.map(block => {
-    var blockOffset = block.offset - dataRange[0],
+    var blockOffset = block.offset - dataRange.start,
         blockLimit = blockOffset + block.size,
         // TODO: where does the +2 come from? (I copied it from dalliance)
         blockBuffer = buffer.slice(blockOffset + 2, blockLimit);
     // TODO: only inflate if necessary
     var inflatedBuffer = pako.inflateRaw(new Uint8Array(blockBuffer));
 
-    var jb = new jBinary(inflatedBuffer, BigBedBlock);
-    // TODO: parse only one record at a time, as many as is necessary.
-    var beds = jb.readAll();
-
-    console.log(beds);
+    var jb = new jBinary(inflatedBuffer, bbi.TYPE_SET);
+    // TODO: parse only one BedEntry at a time, as many as is necessary.
+    var beds = jb.read('BedBlock');
 
     beds = beds.filter(function(bed) {
-      return overlaps(bed.chrId, bed.start, bed.chrId, bed.end, contigIx, start, stop);
+      var bedInterval = new ContigInterval(bed.chrId, bed.start, bed.stop);
+      var r = contigRange.intersects(bedInterval);
+      return r;
     });
 
     return beds;
@@ -283,8 +127,10 @@ class BigBed {
     this.cirTree = this.header.then(header => {
       // zoomHeaders[0].dataOffset is the next entry in the file.
       // We assume the "cirTree" section goes all the way to that point.
+      // Lacking zoom headers, assume it's 4k.
       var start = header.unzoomedIndexOffset,
-          length = header.zoomHeaders[0].dataOffset - start;
+          zoomHeader = header.zoomHeaders[0],
+          length = zoomHeader ? zoomHeader.dataOffset - start : 4096;
       return this.remoteFile.getBytes(start, length).then(parseCirTree);
     });
     
@@ -303,15 +149,25 @@ class BigBed {
       if (contigIx === null) {
         throw `Invalid contig ${contig}`;
       }
+      var contigRange = new ContigInterval(contigIx, start, stop);
 
-      var blocks = findByteRange(header, cirTree, contigIx, start, stop);
-      if (!blocks) {
-        return null;  // XXX better to throw?
+      var blocks = findOverlappingBlocks(header, cirTree, contigRange);
+      if (blocks.length == 0) {
+        return [];
       }
-      console.log(blocks);
-      var range = intersectIntervals(blocks.map(n => [+n.offset, n.offset+n.size]));
-      return this.remoteFile.getBytes(range[0], range[1] - range[0])
-          .then(dataView => extractFeaturesInRange(dataView, range, blocks, contigIx, start, stop));
+
+      var range = Interval.boundingInterval(
+          blocks.map(n => new Interval(+n.offset, n.offset+n.size)));
+      return this.remoteFile.getBytes(range.start, range.length())
+          .then(buffer => {
+            var reverseMap = reverseContigMap(contigMap);
+            var features = extractFeaturesInRange(buffer, range, blocks, contigRange)
+            features.forEach(f => {
+              f.contig = reverseMap[f.chrId];
+              delete f.chrId;
+            });
+            return features;
+          });
     });
   }
 }
diff --git a/src/RemoteFile.js b/src/RemoteFile.js
index 492e014f..6033dc59 100644
--- a/src/RemoteFile.js
+++ b/src/RemoteFile.js
@@ -17,21 +17,22 @@ class RemoteFile {
   url: string;
   fileLength: number;
   chunks: Array<Chunk>;  // regions of file that have already been loaded.
+  numNetworkRequests: number;  // track this for debugging/testing
 
   constructor(url: string) {
     this.url = url;
     this.fileLength = -1;  // unknown
     this.chunks = [];
+    this.numNetworkRequests = 0;
   }
 
-  // TODO: return a buffer, not a DataView
-  getBytes(start: number, length: number): Q.Promise<DataView> {
+  getBytes(start: number, length: number): Q.Promise<ArrayBuffer> {
     var stop = start + length;
     // First check the cache.
     for (var i = 0; i < this.chunks.length; i++) {
       var chunk = this.chunks[i];
       if (chunk.start <= start && chunk.stop >= stop) {
-        return Q.when(new DataView(chunk.buffer, start - chunk.start, length));
+        return Q.when(chunk.buffer.slice(start - chunk.start, stop - chunk.start));
       }
     }
 
@@ -41,7 +42,7 @@ class RemoteFile {
     return this.getFromNetwork(start, start + length - 1);
   }
 
-  getFromNetwork(start: number, stop: number): Q.Promise<DataView> {
+  getFromNetwork(start: number, stop: number): Q.Promise<ArrayBuffer> {
     var deferred = Q.defer();
 
     var xhr = new XMLHttpRequest();
@@ -56,10 +57,11 @@ class RemoteFile {
 
       var newChunk = { start, stop: start + buffer.byteLength - 1, buffer };
       remoteFile.chunks.push(newChunk);
-      deferred.resolve(new DataView(buffer));
+      deferred.resolve(buffer);
     };
 
     // TODO: `reject`, `notify` on progress
+    this.numNetworkRequests++;
     xhr.send();
 
     return deferred.promise;
diff --git a/src/TwoBit.js b/src/TwoBit.js
index 991163f6..a7496977 100644
--- a/src/TwoBit.js
+++ b/src/TwoBit.js
@@ -166,8 +166,8 @@ class TwoBit {
     this.header = deferredHeader.promise;
 
     // TODO: if 16k is insufficient, fetch the right amount.
-    this.remoteFile.getBytes(0, 16*1024).then(function(dataView) {
-        var header = parseHeader(dataView);
+    this.remoteFile.getBytes(0, 16*1024).then(function(buffer) {
+        var header = parseHeader(new DataView(buffer));
         deferredHeader.resolve(header);
       }).done();
   }
@@ -184,7 +184,8 @@ class TwoBit {
       var dnaOffset = header.offset + header.dnaOffsetFromHeader;
       var offset = Math.floor(dnaOffset + start/4);
       var byteLength = Math.ceil((stop - start + 1) / 4) + 1;
-      return this.remoteFile.getBytes(offset, byteLength).then(dataView => {
+      return this.remoteFile.getBytes(offset, byteLength).then(buffer => {
+        var dataView = new DataView(buffer);
         return markUnknownDNA(
             unpackDNA(dataView, start % 4, stop - start + 1), start, header)
             .join('');
@@ -208,7 +209,7 @@ class TwoBit {
 
       // TODO: if 4k is insufficient, fetch the right amount.
       return this.remoteFile.getBytes(seq.offset, 4095).then(
-          dataView => parseSequenceRecord(dataView, seq.offset));
+          buf => parseSequenceRecord(new DataView(buf), seq.offset));
     });
   }
 }
diff --git a/src/formats/bbi.js b/src/formats/bbi.js
new file mode 100644
index 00000000..d67b5d46
--- /dev/null
+++ b/src/formats/bbi.js
@@ -0,0 +1,127 @@
+/**
+ * BBI is the shared structure between bigBed and bigWig.
+ * These structures are based on UCSC's src/inc/bbiFile.h
+ */
+
+'use strict';
+
+var jBinary = require('jbinary'),
+    {typeAtOffset} = require('./helpers');
+
+var TYPE_SET = {
+  'jBinary.littleEndian': true,
+
+  'Header': {
+    _magic: ['const', 'uint32', 0x8789F2EB, true],
+    version: ['const', 'uint16', 4, true],
+    zoomLevels: 'uint16',
+    chromosomeTreeOffset: 'uint64',
+    unzoomedDataOffset: 'uint64',
+    unzoomedIndexOffset: 'uint64',
+    fieldCount: 'uint16',
+    definedFieldCount: 'uint16',
+    // 0 if no autoSql information
+    autoSqlOffset: 'uint64',
+    totalSummaryOffset: 'uint64',
+    // Size of uncompression buffer.  0 if uncompressed.
+    uncompressBufSize: 'uint32',
+    // Offset to header extension 0 if no such extension
+    // TODO: support extended headers (not used in ensGene.bb)
+    extensionOffset: 'uint64',
+    zoomHeaders: ['array', 'ZoomHeader', 'zoomLevels'],
+
+    totalSummary: typeAtOffset('TotalSummary', 'totalSummaryOffset'),
+    chromosomeTree: typeAtOffset('BPlusTree', 'chromosomeTreeOffset')
+  },
+
+  'TotalSummary': {
+    basesCovered: 'uint64',
+    minVal: 'float64',     // for bigBed minimum depth of coverage
+    maxVal: 'float64',     // for bigBed maximum depth of coverage
+    sumData: 'float64',    // for bigBed sum of coverage
+    sumSquared: 'float64'  // for bigBed sum of coverage squared
+  },
+
+  'ZoomHeader': {
+    reductionLevel: 'uint32',
+    _reserved: 'uint32',
+    dataOffset: 'uint64',
+    indexOffset: 'uint64'
+  },
+
+  'BPlusTree': {
+    magic: ['const', 'uint32', 0x78CA8C91, true],
+    // Number of children per block (not byte size of block)
+    blockSize: 'uint32',
+    // Number of significant bytes in key
+    keySize: 'uint32',
+    // Number of bytes in value
+    valSize: 'uint32',
+    // Number of items in index
+    itemCount: 'uint64',
+    _reserved2: ['skip', 4],
+    _reserved3: ['skip', 4],
+    nodes: 'BPlusTreeNode'  // ['array', 'BPlusTreeNode', 'itemCount']
+  },
+  
+  'BPlusTreeNode': {
+    isLeaf: 'uint8',  // 1 = yes, 0 = no
+    _reserved: 'uint8',
+    count: 'uint16',
+    contents: ['array', ['if', 'isLeaf', {
+      key: ['string', 'keySize'],
+      // Note: bigBed allows more general values; this is what Ensembl uses.
+      // value: ['string', 'valSize']
+      id: 'uint32',
+      size: 'uint32'
+    }, {
+      key: ['string', 'keySize'],
+      offset: 'uint64'
+    }], 'count']
+  },
+
+  'CirTree': {
+    _magic: ['const', 'uint32', 0x2468ACE0, true],
+    blockSize: 'uint32',
+    itemCount: 'uint64',
+    startChromIx: 'uint32',
+    startBase: 'uint32',
+    endChromIx: 'uint32',
+    endBase: 'uint32',
+    fileSize: 'uint64',
+    itemsPerSlot: 'uint32',
+    _reserved: ['skip', 4],
+    blocks: 'CirNode'
+  },
+
+  'CirNode': {
+    isLeaf: 'uint8',  // 1 = yes, 0 = no
+    _reserved: 'uint8',
+    count: 'uint16',
+    contents: ['array', ['if', 'isLeaf', {
+      startChromIx: 'uint32',
+      startBase: 'uint32',
+      endChromIx: 'uint32',
+      endBase: 'uint32',
+      offset: 'uint64',
+      size: 'uint64'
+    }, {
+      startChromIx: 'uint32',
+      startBase: 'uint32',
+      endChromIx: 'uint32',
+      endBase: 'uint32',
+      offset: 'uint64',
+    }], 'count']
+  },
+
+  'BedEntry': {
+    'chrId': 'uint32',
+    'start': 'uint32',
+    'stop': 'uint32',
+    'rest': 'string0'
+  },
+
+  'BedBlock': ['array', 'BedEntry'],
+};
+
+module.exports = {TYPE_SET};
diff --git a/src/formats/helpers.js b/src/formats/helpers.js
new file mode 100644
index 00000000..235f4a8d
--- /dev/null
+++ b/src/formats/helpers.js
@@ -0,0 +1,19 @@
+/**
+ * Helpers for specifying file formats using jBinary.
+ */
+var jBinary = require('jbinary');
+
+function typeAtOffset(typeName, offsetFieldName) {
+  return jBinary.Template({
+      baseType: typeName,
+      read: function(context) {
+        if (+context[offsetFieldName] == 0) {
+          return null;
+        } else {
+          return this.binary.read(this.baseType, +context[offsetFieldName]);
+        }
+      }
+    });
+}
+
+module.exports = {typeAtOffset};
diff --git a/test/BigBed-test.js b/test/BigBed-test.js
index b82b1b99..56081a92 100644
--- a/test/BigBed-test.js
+++ b/test/BigBed-test.js
@@ -2,3 +2,49 @@
 // - getFeatures which return no features
 // - getFeatures which crosses a block boundary
 // - getFeatures which crosses a contig boundary (not currently possible)
+
+var chai = require('chai');
+var expect = chai.expect;
+var assert = chai.assert;
+
+var BigBed = require('../src/BigBed');
+
+describe('BigBed', function() {
+  function getTestBigBed() {
+    // This file was generated using UCSC tools:
+    // cd kent/src/utils/bedToBigBed/tests; make
+    // This file is compressed, little endian and contains autoSQL.
+    return new BigBed('/test/data/itemRgb.bb');
+  }
+
+  it('should extract features in a range', function(done) {
+    var bb = getTestBigBed();
+
+    bb.getFeaturesInRange('chrX', 151077036, 151078532)
+        .then(features => {
+          // chrX	151077031	151078198	MID_BLUE	0	-	151077031	151078198	0,0,128
+          // chrX	151078198	151079365	VIOLET_RED1	0	-	151078198	151079365	255,62,150
+          expect(features).to.have.length(2);
+          expect(features[0].contig).to.equal('chrX');
+          expect(features[0].start).to.equal(151077031);
+          expect(features[0].stop).to.equal(151078198);
+          expect(features[1].contig).to.equal('chrX');
+          expect(features[1].start).to.equal(151078198);
+          expect(features[1].stop).to.equal(151079365);
+
+          var rest0 = features[0].rest.split('\t');
+          expect(rest0).to.have.length(6)
+          expect(rest0[0]).to.equal('MID_BLUE');
+          expect(rest0[2]).to.equal('-');
+          expect(rest0[5]).to.equal('0,0,128');
+
+          var rest1 = features[1].rest.split('\t');
+          expect(rest1).to.have.length(6)
+          expect(rest1[0]).to.equal('VIOLET_RED1');
+          expect(rest1[2]).to.equal('-');
+          expect(rest1[5]).to.equal('255,62,150');
+          done();
+        })
+        .done();
+  });
+});
diff --git a/test/FakeXMLHttpRequest-test.js b/test/FakeXMLHttpRequest-test.js
deleted file mode 100644
index 6af47f5c..00000000
--- a/test/FakeXMLHttpRequest-test.js
+++ /dev/null
@@ -1,54 +0,0 @@
-var sinon = require('sinon'),
-    chai = require('chai'),
-    expect = chai.expect;
-
-var FakeXHR = require('./FakeXMLHttpRequest');
-
-describe('FakeXMLHttpRequest', () => {
-  beforeEach(() => {
-    FakeXHR.install();
-  });
-  afterEach(() => {
-    FakeXHR.restore();
-  });
-
-  it('should intercept simple XHRs', (done) => {
-    FakeXHR.addResponse('http://example.com/file.txt', 'hello');
-
-    var xhr = new XMLHttpRequest();
-    xhr.open('GET', 'http://example.com/file.txt');
-    xhr.onload = function(e) {
-      expect(this.response).to.equal('hello');
-      done();
-    };
-    xhr.onerror = function(e) {
-      throw e;
-      done();
-    };
-    xhr.send();
-  });
-
-  it('should intercept arraybuffer XHRs', (done) => {
-    var buf = new Uint8Array(4);
-    buf[0] = 1;
-    buf[1] = 2;
-    buf[2] = 3;
-    buf[3] = 4;
-    FakeXHR.addResponse('http://example.com/file.txt', buf.buffer);
-
-    var xhr = new XMLHttpRequest();
-    xhr.open('GET', 'http://example.com/file.txt');
-    xhr.responseType = 'arraybuffer';
-    xhr.onload = function(e) {
-      var buf = this.response;
-      expect(buf).to.be.an.instanceof(ArrayBuffer);
-      expect(buf.byteLength).to.equal(4);
-      done();
-    };
-    xhr.onerror = function(e) {
-      throw e;
-      done();
-    };
-    xhr.send();
-  });
-});
diff --git a/test/FakeXMLHttpRequest.js b/test/FakeXMLHttpRequest.js
deleted file mode 100644
index 71f490de..00000000
--- a/test/FakeXMLHttpRequest.js
+++ /dev/null
@@ -1,92 +0,0 @@
-/** @flow */
-
-/**
- * Tiny fake for just the portions of XHR level 2 that pileup.js needs.
- * This should be deleted once FauxJax or Sinon support XHR2.
- * (Specifically, we need to let xhr.response be an ArrayBuffer.)
- */
-class FakeXMLHttpRequest {
-  method: string;
-  url: string;
-  responseType: string;
-  requestHeaders: Object;
-  response: any;
-  onload: (e: Object) => void;
-  onerror: (e: any) => void;
-
-  constructor() {
-    this.method = '';
-    this.url = '';
-    this.responseType = '';
-    this.requestHeaders = {};
-    this.response = null;
-    this.onload = e => {};
-    this.onerror = e => {
-      throw e;
-    };
-  }
-
-  open(method: string, url: string) {
-    this.method = method;
-    this.url = url;
-  }
-
-  setRequestHeader(header: string, value: string) {
-    this.requestHeaders[header] = value;
-  }
-
-  send(): void {
-    if (!this.method || !this.url) {
-      throw 'must call open() before send()';
-    }
-    FakeXMLHttpRequest.numRequests++;
-    var rs = FakeXMLHttpRequest.responses;
-    for (var i = 0; i < rs.length; i++) {
-      var url = rs[i][0], response = rs[i][1];
-      if (url == this.url) {
-        this.response = response;
-        window.setTimeout(() => {
-          this.onload.call(this, {});
-        }, 0);
-        break;
-      }
-    }
-
-    if (!this.response) {
-      this.onerror.call(this, 'Unable to find response for ' + this.url);
-    }
-  }
-
-  static responses: Array<[string, any]>;
-  static addResponse(url: string, response: any) {
-    var rs = FakeXMLHttpRequest.responses;
-    if (!rs) {
-      FakeXMLHttpRequest.responses = rs = [];
-    }
-    rs.push([url, response]);
-  }
-
-  static _origXhr: any;
-  static numRequests: number;
-  static install(): void {
-    if (FakeXMLHttpRequest._origXhr) {
-      throw "Can't double-install FakeXMLHttpRequest";
-    }
-    FakeXMLHttpRequest._origXhr = XMLHttpRequest;
-    XMLHttpRequest = FakeXMLHttpRequest;
-    FakeXMLHttpRequest.numRequests = 0;
-  }
-  
-  static restore(): void {
-    if (!FakeXMLHttpRequest._origXhr) {
-      throw "Can't restore XMLHttpRequest without installing FakeXMLHttpRequest";
-    }
-
-    XMLHttpRequest = FakeXMLHttpRequest._origXhr;
-    FakeXMLHttpRequest._origXhr = null;
-    FakeXMLHttpRequest.responses = [];
-  }
-}
-
-
-module.exports = FakeXMLHttpRequest;
diff --git a/test/RemoteFile-test.js b/test/RemoteFile-test.js
index f3b78c0d..83d5ff4f 100644
--- a/test/RemoteFile-test.js
+++ b/test/RemoteFile-test.js
@@ -1,30 +1,38 @@
 var chai = require('chai'),
-    expect = chai.expect;
-
-var FakeXHR = require('./FakeXMLHttpRequest');
+    expect = chai.expect,
+    jBinary = require('jbinary');
 
 var RemoteFile = require('../src/RemoteFile');
 
 describe('RemoteFile', () => {
-  beforeEach(() => {
-    FakeXHR.install();
-  });
-  afterEach(() => {
-    FakeXHR.restore();
-  });
-
-  it('should fetch a subset of a file', (done) => {
-    FakeXHR.addResponse('http://example.com/file.txt',
-                        new TextEncoder('utf-8').encode('01234567890').buffer);
+  function bufferToText(buf) {
+    return new jBinary(buf).read('string');
+  }
 
-    var f = new RemoteFile('http://example.com/file.txt');
-    var promisedData = f.getBytes(10, 11);
+  it('should fetch a subset of a file', done => {
+    var f = new RemoteFile('/test/data/0to9.txt');
+    var promisedData = f.getBytes(4, 5);
 
-    expect(FakeXHR.numRequests).to.equal(1);
-    // expect(req.requestHeaders.Range).to.equal('bytes=10-29');
+    expect(f.numNetworkRequests).to.equal(1);
     promisedData.then(buf => {
-      expect(buf.byteLength).to.equal(11);
+      expect(buf.byteLength).to.equal(5);
+      expect(bufferToText(buf)).to.equal('45678');
       done();
     }).done();
   });
+
+  it('should fetch subsets from cache', done => {
+    var f = new RemoteFile('/test/data/0to9.txt');
+    f.getBytes(0, 10).then(buf => {
+      expect(buf.byteLength).to.equal(10);
+      expect(bufferToText(buf)).to.equal('0123456789');
+      expect(f.numNetworkRequests).to.equal(1);
+      f.getBytes(4, 5).then(buf => {
+        expect(buf.byteLength).to.equal(5);
+        expect(bufferToText(buf)).to.equal('45678');
+        expect(f.numNetworkRequests).to.equal(1);  // it was cached
+        done();
+      }).done();
+    }).done();
+  });
 });
diff --git a/test/coverage.html b/test/coverage.html
index 74b456af..577ab1e6 100644
--- a/test/coverage.html
+++ b/test/coverage.html
@@ -10,7 +10,7 @@
   <!-- Polyfills for PhantomJS -->
   <script src="../node_modules/es5-shim/es5-shim.min.js"></script>
   <script src="../node_modules/es5-shim/es5-sham.min.js"></script>
-  <script src="../node_modules/text-encoding/lib/encoding.js"></script>
+  <script src="../node_modules/arraybuffer-slice/index.js"></script>
 
   <!-- Mocha -->
   <script src="../node_modules/mocha/mocha.js"></script>
diff --git a/test/data/0to9.txt b/test/data/0to9.txt
new file mode 100644
index 00000000..11f11f9b
--- /dev/null
+++ b/test/data/0to9.txt
@@ -0,0 +1 @@
+0123456789
diff --git a/test/data/hello.txt b/test/data/hello.txt
new file mode 100644
index 00000000..94954abd
--- /dev/null
+++ b/test/data/hello.txt
@@ -0,0 +1,2 @@
+hello
+world
diff --git a/test/data/itemRgb.bb b/test/data/itemRgb.bb
new file mode 100644
index 0000000000000000000000000000000000000000..c31b73c9497e7d8bec275fed7678cd6187b53679
GIT binary patch
literal 26695
zcmeI*eN5D57y$4dAaKkL1#yyd@N$QIOxenTK|m%P&S5ls3F60`9Ctmr2zPKTf{p>_
z5Moe4AcD%~2Z-Zi22BJJ73K%Zhz?YQA<i!pCWB3JGIZ$LYfo=8G0XlVd*0;wzHOgA
zefy(L|9H|c@Mv?k35H><#`J=2X3PrRl1$m2kMS{_$Bt!O4CaqvIr=fX>wmMPq%m;>
ziczkS5b_9uq%4FIjXIemG>Ax{2o<ShcFU!dl#4+W9VE0sbaT-yNlK$stwFL_jXK_!
z*^H1doKO%NLM0;*NhBue2D<%fQXNkqwFZ$gsY0QSle3AElq!loOc%0*l4>YlB-HgN
zK}{+trCNoQDt5On!|O+VtdcY57SjD2kKrb(lExF<T#+!CUO{RFQc^_x-!hXjwMHK%
zT*9m%1$~hScXP*aoDG%5N;NCpS0OsFl$`?-&Sb98%^(#OtCX$fE{vWeduj9;lVfQm
zBv+CNankjOj8!Ma$x#e}bn(cAWGIkKEmx~t|Cx=uVD6ItO)ho9>!~yZ{fAyhuy&Co
zAxBbD=McUoh8fa%#W?Qi=I-t$7PC`uwZQD74WEwR=CNyR-(=S1&#W`OZosVk$F_aT
z_bEp7vCI=}PXCy(VdK$T=zC@vv#q<;mrswEGf(!eoJ8ZPTwqGcsw<7EEB@2X5^gr1
z<Ow_~bB1z%O?B4J?=IQ6NpLXua-|V}abj$tOVHBF${4A%zqwgd^Kfe1TqA)Xx9RLy
zT2WV)%bDaq(lqn>(zfk)c{-Zc)*+HC@+#Y67kX&UBJVmMM~9<jl8WMUvo=~%FU%#M
z93bxoM_U#|<cCcy4d4wn6t<P74*6uvUc9-s!YpW#|LAj>B;?5Ls`Td5PEG_RsmBMG
zwY_-iSv(e-J@!Fa;liO1Nw!7ix`4rE-(Mo#ZB=h?>$4SNH$5aj91$ja6z3(sbG^9w
zs>KU$x0pkx*EcC-k{k_gSMTT7&kM?Obu~KL5WpLIpYNBpbo#ra(f5yw0tMEc*Y@8P
zZauJoU+FzE64`swJJ6;8Um>zeO_#Mff5?BY<dT{5mtw(4?wOCuGZ&`!J^yC!gcS*U
zRzEvmQgm=wxcm4JKN-unnpor*Hu$aahEQuCN+tdxjn~=kYr}JLn_c}@-H8KDr`-J9
zvP4nOoe$@_MvC`czT&8Kt#KBFR4I~bQyUM}-8j~DULJnG&Ow`GT_Q4hT<Sm_j(gyB
z@sXzEt7+YN)$M75aO>8y>FwI}iGlr_T8HnP2;BFe;NIe_b3cFPfHlsWv@6PMci587
z-Rmy2H9MLNe~xbbc=nREeyqQ<e8Wn0?zPUOK8YZ|useL9Xu;0n0gQL!S+C`%ry9D0
zJ3DIaW_QH<#^c3SGsb)p1Z2fnNKmhTW6xxtimKMCmfvIATiUYA{`9R~IXAr~tbKLV
zjO%zxiA<i6F$Ha-o{T)SyyJQ6>U~e~Jx{L*`Q<*QKBg0~;82snTh+0S7=7y*-l09N
zJU(+b%ITl}<-@&q=ukG2iD7QkqRl+MCHJBV;F7$GWpjjbFR&TR0nk7I1V8`;KmY_l
z00ck)1V8`;KmY_l00ck)1V8`;KmY{Bg@747OS;$)9UXnFwB>?r%*6}K9n5=i&#o^Y
zYE@CVsZm~!t$oiU<Xtp}Ue^1K9=jKyaoWl(*E+dtoJiCnsHxwGi;PV&9+%^&#iJy_
zNQ&7P%)<5M3FZ{~rY}#VOvbP&_T1K&r3be7U>I}Kj*W@ZCx9LVKmY_l00ck)1V8`;
zKmY_l00ck)1V8`;KmY_l00cl_+zK$C#l0#^)I0ClKCGRYccv}cvf1qTjizbl{<`wR
zPdn!4Hzr=P$+WdadnI{VhfTbNU*FEOHaEs*_1lE#D-ebs*%?+Bc6f0>Z>T^-a50i`
zYY7tv0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X&<ijn0fP#J!b;RPl!CJN
zeP_3&^+|)4`QRbKT#T2!{N!Qhj)@9=!GWtM=yDkr9Lz$w^lvCQTnk}i+__NbKmY_l
p00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1jetx0R1n7{sP|FWm^CM

literal 0
HcmV?d00001

diff --git a/test/data/itemRgb.bed b/test/data/itemRgb.bed
new file mode 100644
index 00000000..c40179c1
--- /dev/null
+++ b/test/data/itemRgb.bed
@@ -0,0 +1,22 @@
+chrX	151071196	151072363	RED	0	+	151071196	151072363	255,0,0
+chrX	151072363	151073530	GREEN	0	+	151072363	151073530	0,255,0
+chrX	151073530	151074697	BLUE	0	+	151073530	151074697	0,0,255
+chrX	151074697	151075864	MID_RED	0	+	151074697	151075864	128,0,0
+chrX	151075864	151077031	MID_GREEN	0	-	151075864	151077031	0,128,0
+chrX	151077031	151078198	MID_BLUE	0	-	151077031	151078198	0,0,128
+chrX	151078198	151079365	VIOLET_RED1	0	-	151078198	151079365	255,62,150
+chrX	151079365	151080532	ORANGE_RED1	0	+	151079365	151080532	255,69,0
+chrX	151080532	151081699	CHOCOLATE1	0	-	151080532	151081699	255,127,36
+chrX	151081699	151082866	GOLD1	0	-	151081699	151082866	255,215,0
+chrX	151082866	151084033	AQUAMARINE1	0	-	151082866	151084033	127,255,212
+chrX	151084033	151085200	TURQUOISE1	0	+	151084033	151085200	0,245,255
+chrX	151085200	151086367	DEEPSKYBLUE1	0	-	151085200	151086367	0,191,255
+chrX	151086367	151087534	DODGERBLUE1	0	-	151086367	151087534	30,144,255
+chrX	151087534	151088701	DarkViolet	0	+	151087534	151088701	148,0,211
+chrX	151088701	151089868	firebrick	0	-	151088701	151089868	178,34,34
+chrX	151089868	151091035	YELLOW	0	-	151089868	151091035	255,255,0
+chrX	151091035	151092202	LawnGreen	0	+	151091035	151092202	124,252,0
+chrX	151092202	151093369	CadetBlue	0	+	151092202	151093369	95,158,160
+chrX	151093369	151094536	LightSlateGray	0	-	151093369	151094536	119,136,153
+chrX	151094536	151095703	PeachPuff	0	-	151094536	151095703	255,218,185
+chrY	0	100	YCOLOR	0	+	25	75	255,0,0
diff --git a/test/jbinary-test.js b/test/jbinary-test.js
index 975fc2e3..b597bd27 100644
--- a/test/jbinary-test.js
+++ b/test/jbinary-test.js
@@ -27,7 +27,6 @@ describe('jBinary', function() {
 
     var jb = new jBinary(u8array.buffer, twoBitTypeSet);
     var header = jb.readAll();
-    console.log(header);
 
     expect(header.magic).to.equal(0x1A412743);  // two bit magic
     expect(header.version).to.equal(0);
@@ -51,9 +50,11 @@ describe('jBinary', function() {
     var buffer = u8array.buffer;
 
     var jb = new jBinary(buffer, uint8TypeSet);
+    var num = 0;
     while (jb.tell() < buffer.byteLength) {
-      var x = jb.read({value: 'uint8'});
-      console.log(jb.tell(), x);
+      var x = jb.read('File');
+      expect(x).to.deep.equal({value: num * num});
+      num++;
     }
   });
 });
diff --git a/test/runner.html b/test/runner.html
index 4d91458b..e2b1a275 100644
--- a/test/runner.html
+++ b/test/runner.html
@@ -10,7 +10,7 @@
   <!-- Polyfills for PhantomJS -->
   <script src="../node_modules/es5-shim/es5-shim.min.js"></script>
   <script src="../node_modules/es5-shim/es5-sham.min.js"></script>
-  <script src="../node_modules/text-encoding/lib/encoding.js"></script>
+  <script src="../node_modules/arraybuffer-slice/index.js"></script>
 
   <!-- Mocha -->
   <script src="../node_modules/mocha/mocha.js"></script>

From e156fa3f943e9f01cfe71704c4781df48e83d440 Mon Sep 17 00:00:00 2001
From: Dan Vanderkam <danvdk@gmail.com>
Date: Sat, 14 Mar 2015 00:07:45 -0400
Subject: [PATCH 3/4] Port TwoBit to jBinary; kill ReadableView

---
 src/BigBed.js              |  3 +-
 src/ReadableView.js        | 72 -------------------------------------
 src/TwoBit.js              | 73 +++++++++++---------------------------
 src/formats/twoBitTypes.js | 43 ++++++++++++++++++++++
 test/ReadableView-test.js  | 72 -------------------------------------
 5 files changed, 65 insertions(+), 198 deletions(-)
 delete mode 100644 src/ReadableView.js
 create mode 100644 src/formats/twoBitTypes.js
 delete mode 100644 test/ReadableView-test.js

diff --git a/src/BigBed.js b/src/BigBed.js
index a6e275fa..38676884 100644
--- a/src/BigBed.js
+++ b/src/BigBed.js
@@ -10,8 +10,7 @@ var Q = require('q'),
     pako = require('pako');  // for gzip inflation
     
 
-var ReadableView = require('./ReadableView'),
-    RemoteFile = require('./RemoteFile'),
+var RemoteFile = require('./RemoteFile'),
     Interval = require('./Interval'),
     ContigInterval = require('./ContigInterval'),
     utils = require('./utils.js'),
diff --git a/src/ReadableView.js b/src/ReadableView.js
deleted file mode 100644
index fb4ebab3..00000000
--- a/src/ReadableView.js
+++ /dev/null
@@ -1,72 +0,0 @@
-/** @flow */
-
-/**
- * Wrapper around an ArrayBuffer which facilitates reading different types of
- * values from it, from start to finish.
- */
-class ReadableView {
-  offset: number;
-  dataView: DataView;
-  constructor(dataView: DataView) {
-    this.offset = 0;
-    this.dataView = dataView;
-  }
-
-  // Read an unsigned 8-bit integer and advance the current position.
-  readUint8(): number {
-    var num = this.dataView.getUint8(this.offset);
-    this.offset++;
-    return num;
-  }
-
-  // Read an unsigned 16-bit integer and advance the current position.
-  readUint16(): number {
-    return this.readUint8() +
-           this.readUint8() * (1 << 8);
-  }
-
-  // Read an unsigned 32-bit integer and advance the current position.
-  readUint32(): number {
-    return this.readUint8()             +
-           this.readUint8() * (1 << 8 ) +
-           this.readUint8() * (1 << 16) +
-           this.readUint8() * (1 << 24);
-  }
-
-  // Read a sequence of 32-bit integers and advance the current position.
-  readUint32Array(n: number): number[] {
-    var result: number[] = [];
-    for (var i = 0; i < n; i++) {
-      result.push(this.readUint32());
-    }
-    return result;
-  }
-
-  /**
-   * Extract a sequence of ASCII characters as a string.
-   * This throws if any non-ASCII characters are encountered.
-   */
-  readAscii(length: number): string {
-    var result = '';
-    for (var i = 0; i < length; i++) {
-      var c = this.readUint8();
-      if (c > 127) {
-        throw 'Encountered non-ASCII character ' + c;
-      }
-      result += String.fromCharCode(c);
-    }
-    return result;
-  }
-
-  // Returns the number of bytes remaining in the buffer.
-  bytesRemaining(): number {
-    return this.dataView.byteLength - this.offset;
-  }
-
-  // Returns the current offset in the buffer.
-  tell(): number {
-    return this.offset;
-  }
-}
-
-module.exports = ReadableView;
diff --git a/src/TwoBit.js b/src/TwoBit.js
index a7496977..1979e50f 100644
--- a/src/TwoBit.js
+++ b/src/TwoBit.js
@@ -6,10 +6,11 @@
 'use strict';
 
 var Q = require('q'),
-    _ = require('underscore');
+    _ = require('underscore'),
+    jBinary = require('jbinary');
 
-var ReadableView = require('./ReadableView'),
-    RemoteFile = require('./RemoteFile');
+var RemoteFile = require('./RemoteFile'),
+    twoBitTypes = require('./formats/twoBitTypes');
 
 var BASE_PAIRS = [
   'T',  // 0=00
@@ -40,33 +41,22 @@ type TwoBitHeader = {
   sequences: Array<FileIndexEntry>;
 }
 
-var TWO_BIT_MAGIC = 0x1A412743;
-
 
 /**
  * Parses a single SequenceRecord from the start of the ArrayBuffer.
  * fileOffset is the position of this sequence within the 2bit file.
  */
-function parseSequenceRecord(dataView: DataView, fileOffset: number): SequenceRecord {
-  var bytes = new ReadableView(dataView);
-  var dnaSize = bytes.readUint32(),
-      nBlockCount = bytes.readUint32(),
-      nBlockStarts = bytes.readUint32Array(nBlockCount),
-      nBlockSizes = bytes.readUint32Array(nBlockCount),
-      // The masks can be quite large (~2MB for chr1) and we mostly don't care
-      // about them.  So we ignore them, but we do need to know their length.
-      maskBlockCount = bytes.readUint32();
-      // maskBlockCount maskBlockStarts = bytes.readUint32Array(maskBlockCount),
-      // maskBlockSizes = bytes.readUint32Array(maskBlockCount),
-      // reserved = bytes.readUint32();
-
-  var dnaOffset = bytes.tell() + 8 * maskBlockCount + 4;
+function parseSequenceRecord(buffer: ArrayBuffer, fileOffset: number): SequenceRecord {
+  var jb = new jBinary(buffer, twoBitTypes.TYPE_SET);
+  var header = jb.read('SequenceRecord');
+
+  var dnaOffset = jb.tell() + 8 * header.maskBlockCount + 4;
 
   return {
-    numBases: dnaSize,
-    unknownBlockStarts: nBlockStarts,
-    unknownBlockLengths: nBlockSizes,
-    numMaskBlocks: maskBlockCount,
+    numBases: header.dnaSize,
+    unknownBlockStarts: header.nBlockStarts,
+    unknownBlockLengths: header.nBlockSizes,
+    numMaskBlocks: header.maskBlockCount,
     maskBlockStarts: [],
     maskBlockLengths: [],
     dnaOffsetFromHeader: dnaOffset,
@@ -75,35 +65,13 @@ function parseSequenceRecord(dataView: DataView, fileOffset: number): SequenceRe
 }
 
 
-/**
- * Parses the 2bit file header.
- */
-function parseHeader(dataView: DataView): TwoBitHeader {
-  var bytes = new ReadableView(dataView);
-  var magic = bytes.readUint32();
-  if (magic != TWO_BIT_MAGIC) {
-    throw 'Invalid magic';
-  }
-  var version = bytes.readUint32();
-  if (version != 0) {
-    throw 'Unknown version of 2bit';
-  }
-  var sequenceCount = bytes.readUint32(),
-      reserved = bytes.readUint32();
-
-  var sequences: Array<FileIndexEntry> = [];
-  for (var i = 0; i < sequenceCount; i++) {
-    var nameSize = bytes.readUint8();
-    var name = bytes.readAscii(nameSize);
-    var offset = bytes.readUint32();
-    sequences.push({name, offset});
-  }
-  // hg19 header is 1671 bytes to this point
+function parseHeader(buffer: ArrayBuffer): TwoBitHeader {
+  var jb = new jBinary(buffer, twoBitTypes.TYPE_SET);
+  var header = jb.read('Header');
 
   return {
-    sequenceCount,
-    reserved,
-    sequences
+    sequenceCount: header.sequenceCount,
+    sequences: header.sequences
   };
 }
 
@@ -115,6 +83,7 @@ function parseHeader(dataView: DataView): TwoBitHeader {
  * modification.
  */
 function unpackDNA(dataView: DataView, startBasePair: number, numBasePairs: number): Array<string> {
+  // TODO: use jBinary bitfield for this
   var basePairs: Array<string> = [];
   basePairs.length = dataView.byteLength * 4;  // pre-allocate
   var basePairIdx = -startBasePair;
@@ -167,7 +136,7 @@ class TwoBit {
 
     // TODO: if 16k is insufficient, fetch the right amount.
     this.remoteFile.getBytes(0, 16*1024).then(function(buffer) {
-        var header = parseHeader(new DataView(buffer));
+        var header = parseHeader(buffer);
         deferredHeader.resolve(header);
       }).done();
   }
@@ -209,7 +178,7 @@ class TwoBit {
 
       // TODO: if 4k is insufficient, fetch the right amount.
       return this.remoteFile.getBytes(seq.offset, 4095).then(
-          buf => parseSequenceRecord(new DataView(buf), seq.offset));
+          buf => parseSequenceRecord(buf, seq.offset));
     });
   }
 }
diff --git a/src/formats/twoBitTypes.js b/src/formats/twoBitTypes.js
new file mode 100644
index 00000000..2d71c2c4
--- /dev/null
+++ b/src/formats/twoBitTypes.js
@@ -0,0 +1,43 @@
+/**
+ * TwoBit is a packed genome format.
+ * See http://genome.ucsc.edu/FAQ/FAQformat.html#format7
+ */
+
+'use strict';
+
+var jBinary = require('jbinary');
+
+var TYPE_SET = {
+  'jBinary.littleEndian': true,
+
+  'Header': {
+    magic: ['const', 'uint32', 0x1A412743, true],
+    version: ['const', 'uint32', 0, true],
+
+    sequenceCount: 'uint32',
+    reserved: 'uint32',
+
+    sequences: ['array', 'SequenceHeader', 'sequenceCount']
+  },
+
+  'SequenceHeader': {
+    nameSize: 'uint8',
+    name: ['string', 'nameSize'],
+    offset: 'uint32'
+  },
+
+  'SequenceRecord': {
+    dnaSize: 'uint32',
+    nBlockCount: 'uint32',
+    nBlockStarts: ['array', 'uint32', 'nBlockCount'],
+    nBlockSizes: ['array', 'uint32', 'nBlockCount'],
+    // The masks can be quite large (~2MB for chr1) and we mostly don't care
+    // about them.  So we ignore them, but we do need to know their length.
+    maskBlockCount: 'uint32',
+    // maskBlockStarts: ['array', 'uint32', 'maskBlockCount']
+    // maskBlockSizes: ['array', 'uint32', 'maskBlockCount']
+    // reserved: 'uint32'
+  }
+};
+
+module.exports = {TYPE_SET};
diff --git a/test/ReadableView-test.js b/test/ReadableView-test.js
deleted file mode 100644
index c2caff32..00000000
--- a/test/ReadableView-test.js
+++ /dev/null
@@ -1,72 +0,0 @@
-var chai = require('chai');
-var expect = chai.expect;
-
-var ReadableView = require('../src/ReadableView');
-
-describe('ReadableView', function() {
-  it('should read 8-bit unsigned ints', function() {
-    var u8 = new Uint8Array(5);
-    u8[0] = 100;
-    u8[1] = 255;
-    u8[2] = 0;
-    u8[3] = 33;
-    u8[4] = 127;
-
-    var bytes = new ReadableView(new DataView(u8.buffer));
-    expect(bytes.tell()).to.equal(0);
-    expect(bytes.bytesRemaining()).to.equal(5);
-    expect(bytes.readUint8()).to.equal(100);
-    expect(bytes.tell()).to.equal(1);
-    expect(bytes.bytesRemaining()).to.equal(4);
-    expect(bytes.readUint8()).to.equal(255);
-    expect(bytes.readUint8()).to.equal(0);
-    expect(bytes.readUint8()).to.equal(33);
-    expect(bytes.readUint8()).to.equal(127);
-    expect(bytes.bytesRemaining()).to.equal(0);
-    expect(bytes.tell()).to.equal(5);
-  });
-
-  it('should read strings', function() {
-    var u8 = new Uint8Array(6);
-    u8[0] = 4;
-    u8[1] = '2'.charCodeAt(0);
-    u8[2] = 'b'.charCodeAt(0);
-    u8[3] = 'i'.charCodeAt(0);
-    u8[4] = 't'.charCodeAt(0);
-    u8[5] = '?'.charCodeAt(0);
-
-    var bytes = new ReadableView(new DataView(u8.buffer));
-    expect(bytes.tell()).to.equal(0);
-    expect(bytes.bytesRemaining()).to.equal(6);
-    expect(bytes.readUint8()).to.equal(4);
-    expect(bytes.readAscii(4)).to.equal('2bit');
-    expect(bytes.tell()).to.equal(5);
-    expect(bytes.bytesRemaining()).to.equal(1);
-    expect(bytes.readAscii(1)).to.equal('?');
-    expect(bytes.bytesRemaining()).to.equal(0);
-  });
-
-  it('should read uint32 arrays', function() {
-    var u32 = new Uint32Array(5);
-    u32[0] = 1;
-    u32[1] = 2;
-    u32[2] = 12345678;
-    u32[3] = 1234567890;
-    u32[4] = 3;
-
-    var bytes = new ReadableView(new DataView(u32.buffer));
-    expect(bytes.tell()).to.equal(0);
-    expect(bytes.bytesRemaining()).to.equal(20);
-    expect(bytes.readUint32Array(5)).to.deep.equal([1,2,12345678,1234567890,3]);
-    expect(bytes.tell()).to.equal(20);
-    expect(bytes.bytesRemaining()).to.equal(0);
-  });
-
-  it('should read a large uint32', function() {
-    var u32 = new Uint32Array(1);
-    u32[0] = 0xebf28987;
-
-    var bytes = new ReadableView(new DataView(u32.buffer));
-    expect(bytes.readUint32()).to.equal(0xebf28987);
-  });
-});

From a2e43f3c0374cb39a031bc1db0eadaf1eae489a6 Mon Sep 17 00:00:00 2001
From: Dan Vanderkam <danvdk@gmail.com>
Date: Sat, 14 Mar 2015 11:21:43 -0400
Subject: [PATCH 4/4] Expand BigBed tests & cleanup

---
 src/BigBed.js              | 114 +++++++++++++++++++++++--------------
 src/Controls.js            |  11 ++--
 src/TwoBit.js              |   1 -
 src/formats/helpers.js     |   1 +
 src/formats/twoBitTypes.js |   1 -
 src/main.js                |   6 --
 test/BigBed-test.js        |  62 +++++++++++++++++---
 test/jbinary-test.js       |  60 -------------------
 types/types.js             |   4 +-
 9 files changed, 134 insertions(+), 126 deletions(-)
 delete mode 100644 test/jbinary-test.js

diff --git a/src/BigBed.js b/src/BigBed.js
index 38676884..5f1d0dd5 100644
--- a/src/BigBed.js
+++ b/src/BigBed.js
@@ -21,19 +21,16 @@ function parseHeader(buffer) {
   // TODO: check Endianness using magic. Possibly use jDataView.littleEndian
   // to flip the endianness for jBinary consumption.
   // NB: dalliance doesn't support big endian formats.
-  var jb = new jBinary(buffer, bbi.TYPE_SET);
-  var header = jb.read('Header');
-
-  return header;
+  return new jBinary(buffer, bbi.TYPE_SET).read('Header');
 }
 
+// The "CIR" tree contains a mapping from sequence -> block offsets.
+// It stands for "Chromosome Index R tree"
 function parseCirTree(buffer) {
-  var jb = new jBinary(buffer, bbi.TYPE_SET);
-  var cirTree = jb.read('CirTree');
-
-  return cirTree;
+  return new jBinary(buffer, bbi.TYPE_SET).read('CirTree');
 }
 
+// Extract a map from contig name --> contig ID from the bigBed header.
 function generateContigMap(twoBitHeader): {[key:string]: number} {
   // Just assume it's a flat "tree" for now.
   var nodes = twoBitHeader.chromosomeTree.nodes.contents;
@@ -46,6 +43,16 @@ function generateContigMap(twoBitHeader): {[key:string]: number} {
   }));
 }
 
+// Generate the reverse map from contig ID --> contig name.
+function reverseContigMap(contigMap: {[key:string]: number}): Array<string> {
+  var ary = [];
+  _.forEach(contigMap, (index, name) => {
+    ary[index] = name;
+  });
+  return ary;
+}
+
+// Map contig name to contig ID. Leading "chr" is optional.
 function getContigId(contigMap, contig) {
   if (contig in contigMap) {
     return contigMap[contig];
@@ -57,15 +64,7 @@ function getContigId(contigMap, contig) {
   return null;
 }
 
-function reverseContigMap(contigMap: {[key:string]: number}): Array<string> {
-  var ary = [];
-  _.forEach(contigMap, (index, name) => {
-    ary[index] = name;
-  });
-  return ary;
-}
-
-// Get all blocks in the file containing features which intersect with contigRange.
+// Find all blocks containing features which intersect with contigRange.
 function findOverlappingBlocks(twoBitHeader, cirTree, contigRange) {
   // Do a recursive search through the index tree
   var matchingBlocks = [];
@@ -101,32 +100,74 @@ function extractFeaturesInRange(buffer, dataRange, blocks, contigRange) {
     var beds = jb.read('BedBlock');
 
     beds = beds.filter(function(bed) {
-      var bedInterval = new ContigInterval(bed.chrId, bed.start, bed.stop);
-      var r = contigRange.intersects(bedInterval);
-      return r;
+      // Note: BED intervals are explicitly half-open.
+      // The "- 1" converts them to closed intervals for ContigInterval.
+      var bedInterval = new ContigInterval(bed.chrId, bed.start, bed.stop - 1);
+      return contigRange.intersects(bedInterval);
     });
 
     return beds;
   }));
 }
 
+// Fetch the relevant blocks from the bigBed file and extract the features
+// which overlap the given range.
+function fetchFeatures(contigRange, header, cirTree, contigMap, remoteFile) {
+  var blocks = findOverlappingBlocks(header, cirTree, contigRange);
+  if (blocks.length == 0) {
+    return [];
+  }
+
+  // Find the range in the file which contains all relevant blocks.
+  // In theory there could be gaps between blocks, but it's hard to see how.
+  var range = Interval.boundingInterval(
+      blocks.map(n => new Interval(+n.offset, n.offset+n.size)));
+
+  return remoteFile.getBytes(range.start, range.length())
+      .then(buffer => {
+        var reverseMap = reverseContigMap(contigMap);
+        var features = extractFeaturesInRange(buffer, range, blocks, contigRange)
+        features.forEach(f => {
+          f.contig = reverseMap[f.chrId];
+          delete f.chrId;
+        });
+        return features;
+      });
+}
+
+
+type BedRow = {
+  // Half-open interval for the BED row.
+  contig: string;
+  start: number;
+  stop: number;
+  // Remaining fields in the BED row (typically tab-delimited)
+  rest: string;
+}
+
 
 class BigBed {
   remoteFile: RemoteFile;
   header: Q.Promise<any>;
   cirTree: Q.Promise<any>;
+  contigMap: Q.Promise<{[key:string]: number}>;
 
+  /**
+   * Prepare to request features from a remote bigBed file.
+   * The remote source must support HTTP Range headers.
+   * This will kick off several async requests for portions of the file.
+   */
   constructor(url: string) {
     this.remoteFile = new RemoteFile(url);
     this.header = this.remoteFile.getBytes(0, 64*1024).then(parseHeader);
     this.contigMap = this.header.then(generateContigMap);
 
-    // Next: fetch [header.unzoomedIndexOffset, zoomHeaders[0].dataOffset] and parse
-    // the "CIR" tree.
+    // Next: fetch the block index and parse out the "CIR" tree.
     this.cirTree = this.header.then(header => {
       // zoomHeaders[0].dataOffset is the next entry in the file.
       // We assume the "cirTree" section goes all the way to that point.
       // Lacking zoom headers, assume it's 4k.
+      // TODO: fetch more than 4k if necessary
       var start = header.unzoomedIndexOffset,
           zoomHeader = header.zoomHeaders[0],
           length = zoomHeader ? zoomHeader.dataOffset - start : 4096;
@@ -139,9 +180,13 @@ class BigBed {
     this.cirTree.done();
   }
 
-  // Returns all BED entries which overlap the range.
-  // TODO: factor logic out into a helper
-  getFeaturesInRange(contig: string, start: number, stop: number): Q.Promise<any> {
+  /**
+   * Returns all BED entries which overlap the range.
+   * Note: while the requested range is inclusive on both ends, ranges in
+   * bigBed format files are half-open (inclusive at the start, exclusive at
+   * the end).
+   */
+  getFeaturesInRange(contig: string, start: number, stop: number): Q.Promise<Array<BedRow>> {
     return Q.spread([this.header, this.cirTree, this.contigMap],
                     (header, cirTree, contigMap) => {
       var contigIx = getContigId(contigMap, contig);
@@ -149,24 +194,7 @@ class BigBed {
         throw `Invalid contig ${contig}`;
       }
       var contigRange = new ContigInterval(contigIx, start, stop);
-
-      var blocks = findOverlappingBlocks(header, cirTree, contigRange);
-      if (blocks.length == 0) {
-        return [];
-      }
-
-      var range = Interval.boundingInterval(
-          blocks.map(n => new Interval(+n.offset, n.offset+n.size)));
-      return this.remoteFile.getBytes(range.start, range.length())
-          .then(buffer => {
-            var reverseMap = reverseContigMap(contigMap);
-            var features = extractFeaturesInRange(buffer, range, blocks, contigRange)
-            features.forEach(f => {
-              f.contig = reverseMap[f.chrId];
-              delete f.chrId;
-            });
-            return features;
-          });
+      return fetchFeatures(contigRange, header, cirTree, contigMap, this.remoteFile);
     });
   }
 }
diff --git a/src/Controls.js b/src/Controls.js
index 3801d09f..c6a9879d 100644
--- a/src/Controls.js
+++ b/src/Controls.js
@@ -14,8 +14,7 @@ var Controls = React.createClass({
     // XXX: can we be more specific than this with Flow?
     onChange: React.PropTypes.func.isRequired
   },
-  makeRange: function() {
-    // XXX Removing the Number() should lead to type errors, but doesn't.
+  makeRange: function(): GenomeRange {
     return {
       contig: this.refs.contig.getDOMNode().value,
       start: Number(this.refs.start.getDOMNode().value),
@@ -35,8 +34,10 @@ var Controls = React.createClass({
     this.refs.start.getDOMNode().value = r.start;
     this.refs.stop.getDOMNode().value = r.stop;
 
-    var contigIdx = this.props.contigList.indexOf(r.contig);
-    this.refs.contig.getDOMNode().selectedIndex = contigIdx;
+    if (this.props.contigList) {
+      var contigIdx = this.props.contigList.indexOf(r.contig);
+      this.refs.contig.getDOMNode().selectedIndex = contigIdx;
+    }
   },
   render: function(): any {
     var contigOptions = this.props.contigList
@@ -56,7 +57,7 @@ var Controls = React.createClass({
       </form>
     );
   },
-  componentDidUpdate: function(prevProps, prevState) {
+  componentDidUpdate: function(prevProps: Object) {
     if (!_.isEqual(prevProps.range, this.props.range)) {
       this.updateRangeUI();
     }
diff --git a/src/TwoBit.js b/src/TwoBit.js
index 1979e50f..d8552e7c 100644
--- a/src/TwoBit.js
+++ b/src/TwoBit.js
@@ -37,7 +37,6 @@ type SequenceRecord = {
 
 type TwoBitHeader = {
   sequenceCount: number;
-  reserved: number;
   sequences: Array<FileIndexEntry>;
 }
 
diff --git a/src/formats/helpers.js b/src/formats/helpers.js
index 235f4a8d..cdf508cb 100644
--- a/src/formats/helpers.js
+++ b/src/formats/helpers.js
@@ -3,6 +3,7 @@
  */
 var jBinary = require('jbinary');
 
+// Read a jBinary type at an offset in the buffer specified by another field.
 function typeAtOffset(typeName, offsetFieldName) {
   return jBinary.Template({
       baseType: typeName,
diff --git a/src/formats/twoBitTypes.js b/src/formats/twoBitTypes.js
index 2d71c2c4..74934bae 100644
--- a/src/formats/twoBitTypes.js
+++ b/src/formats/twoBitTypes.js
@@ -13,7 +13,6 @@ var TYPE_SET = {
   'Header': {
     magic: ['const', 'uint32', 0x1A412743, true],
     version: ['const', 'uint32', 0, true],
-
     sequenceCount: 'uint32',
     reserved: 'uint32',
 
diff --git a/src/main.js b/src/main.js
index 90e84fd2..a1c2688c 100644
--- a/src/main.js
+++ b/src/main.js
@@ -1,7 +1,6 @@
 /* @flow */
 var React = require('react'),
     TwoBit = require('./TwoBit'),
-    BigBed = require('./BigBed'),
     Root = require('./Root'),
     createTwoBitDataSource = require('./TwoBitDataSource');
 
@@ -24,8 +23,3 @@ genome.getFeaturesInRange('chr1', 123000, 124000).done();
 
 var root = React.render(<Root referenceSource={dataSource} />,
                         document.getElementById('root'));
-
-var ensembl = new BigBed('/ensGene.bb');
-
-window.ensembl = ensembl;
-window.genome = genome;
diff --git a/test/BigBed-test.js b/test/BigBed-test.js
index 56081a92..90436bd1 100644
--- a/test/BigBed-test.js
+++ b/test/BigBed-test.js
@@ -1,19 +1,15 @@
-// Things to test:
-// - getFeatures which return no features
-// - getFeatures which crosses a block boundary
-// - getFeatures which crosses a contig boundary (not currently possible)
-
 var chai = require('chai');
 var expect = chai.expect;
 var assert = chai.assert;
 
+var Q = require('q');
 var BigBed = require('../src/BigBed');
 
 describe('BigBed', function() {
   function getTestBigBed() {
     // This file was generated using UCSC tools:
     // cd kent/src/utils/bedToBigBed/tests; make
-    // This file is compressed, little endian and contains autoSQL.
+    // It is compressed, little endian, has autoSQL and two blocks.
     return new BigBed('/test/data/itemRgb.bb');
   }
 
@@ -22,8 +18,9 @@ describe('BigBed', function() {
 
     bb.getFeaturesInRange('chrX', 151077036, 151078532)
         .then(features => {
-          // chrX	151077031	151078198	MID_BLUE	0	-	151077031	151078198	0,0,128
-          // chrX	151078198	151079365	VIOLET_RED1	0	-	151078198	151079365	255,62,150
+          // Here's what these two lines in the file look like:
+          // chrX 151077031 151078198 MID_BLUE 0 - 151077031 151078198 0,0,128
+          // chrX 151078198 151079365 VIOLET_RED1 0 - 151078198 151079365 255,62,150
           expect(features).to.have.length(2);
           expect(features[0].contig).to.equal('chrX');
           expect(features[0].start).to.equal(151077031);
@@ -47,4 +44,53 @@ describe('BigBed', function() {
         })
         .done();
   });
+
+  it('should have inclusive ranges', function(done) {
+    // The matches looks like this:
+    // chrX 151071196 151072363 RED
+    // chrX 151094536 151095703 PeachPuff
+    var red = [151071196, 151072362],  // note: stop is inclusive
+        peachpuff = [151094536, 151095702];
+
+    var bb = getTestBigBed();
+    var expectN = n => features => {
+        expect(features).to.have.length(n);
+      };
+
+    Q.all([
+        // request for precisely one row from the file.
+        bb.getFeaturesInRange('chrX', red[0], red[1])
+            .then(expectN(1)),
+        // the additional base in the range hits another row.
+        bb.getFeaturesInRange('chrX', red[0], 1 + red[1])
+            .then(expectN(2)),
+        // this overlaps exactly one base pair of the first feature.
+        bb.getFeaturesInRange('chrX', red[0] - 1000, red[0])
+            .then(expectN(1)),
+        // but this range ends one base pair before it.
+        bb.getFeaturesInRange('chrX', red[0] - 1000, red[0] - 1)
+            .then(expectN(0))
+    ]).then(() => {
+      done();
+    }).done();
+  });
+
+  it('should add "chr" to contig names', function(done) {
+    var bb = getTestBigBed();
+
+    bb.getFeaturesInRange('X', 151077036, 151078532)
+        .then(features => {
+          // (same as 'should extract features in a range' test)
+          expect(features).to.have.length(2);
+          expect(features[0].contig).to.equal('chrX');
+          expect(features[1].contig).to.equal('chrX');
+          done();
+        })
+        .done();
+  });
+
+  // Things left to test:
+  // - getFeatures which crosses a block boundary
+  // - uncompressed bigBed file.
 });
+
diff --git a/test/jbinary-test.js b/test/jbinary-test.js
deleted file mode 100644
index b597bd27..00000000
--- a/test/jbinary-test.js
+++ /dev/null
@@ -1,60 +0,0 @@
-// This is a playground to ensure that I understand how jBinary works.
-var chai = require('chai');
-var expect = chai.expect;
-
-var jBinary = require('jbinary');
-
-describe('jBinary', function() {
-  it('should read two-bit headers', function() {
-    var twoBitTypeSet = {
-      'jBinary.all': 'File',
-      'jBinary.littleEndian': true,
-      'File': {
-        magic: ['const', 'uint32', 0x1A412743, true],
-        version: ['const', 'uint32', 0, true],
-        sequenceCount: 'uint32',
-        reserved: 'uint32'
-      }
-    };
-
-    var byteArray = [
-        0x43, 0x27, 0x41, 0x1a,
-        0x00, 0x00, 0x00, 0x00,
-        0x5d, 0x00, 0x00, 0x00,
-        0x00, 0x00, 0x00, 0x00];
-    var u8array = new Uint8Array(byteArray.length);
-    byteArray.forEach((val, idx) => { u8array[idx] = val; });
-
-    var jb = new jBinary(u8array.buffer, twoBitTypeSet);
-    var header = jb.readAll();
-
-    expect(header.magic).to.equal(0x1A412743);  // two bit magic
-    expect(header.version).to.equal(0);
-    expect(header.sequenceCount).to.equal(93);
-    expect(header.reserved).to.equal(0);
-  });
-
-  it('should advance through a sequence', function() {
-    var uint8TypeSet = {
-      'jBinary.all': 'File',
-      'jBinary.littleEndian': true,
-      'File': {
-        value: 'uint8'
-      }
-    };
-
-    var u8array = new Uint8Array(16);
-    for (var i = 0; i < 16; i++) {
-      u8array[i] = i * i;
-    }
-    var buffer = u8array.buffer;
-
-    var jb = new jBinary(buffer, uint8TypeSet);
-    var num = 0;
-    while (jb.tell() < buffer.byteLength) {
-      var x = jb.read('File');
-      expect(x).to.deep.equal({value: num * num});
-      num++;
-    }
-  });
-});
diff --git a/types/types.js b/types/types.js
index 5daabf22..3357c71d 100644
--- a/types/types.js
+++ b/types/types.js
@@ -1,5 +1,5 @@
 declare class GenomeRange {
   contig: string;
-  start: number;
-  stop: number;  // XXX inclusive or exclusive?
+  start: number;  // inclusive
+  stop: number;  // inclusive
 }