Skip to content

Commit

Permalink
Port TwoBit to jBinary; kill ReadableView
Browse files Browse the repository at this point in the history
  • Loading branch information
danvk committed Mar 14, 2015
1 parent f754b23 commit e8ace77
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 198 deletions.
3 changes: 1 addition & 2 deletions src/BigBed.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ var Q = require('q'),
pako = require('pako'); // for gzip inflation


var ReadableView = require('./ReadableView'),
RemoteFile = require('./RemoteFile'),
var RemoteFile = require('./RemoteFile'),
Interval = require('./Interval'),
ContigInterval = require('./ContigInterval'),
utils = require('./utils.js'),
Expand Down
72 changes: 0 additions & 72 deletions src/ReadableView.js

This file was deleted.

73 changes: 21 additions & 52 deletions src/TwoBit.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
'use strict';

var Q = require('q'),
_ = require('underscore');
_ = require('underscore'),
jBinary = require('jbinary');

var ReadableView = require('./ReadableView'),
RemoteFile = require('./RemoteFile');
var RemoteFile = require('./RemoteFile'),
twoBitTypes = require('./formats/twoBitTypes');

var BASE_PAIRS = [
'T', // 0=00
Expand Down Expand Up @@ -40,33 +41,22 @@ type TwoBitHeader = {
sequences: Array<FileIndexEntry>;
}

var TWO_BIT_MAGIC = 0x1A412743;


/**
* Parses a single SequenceRecord from the start of the ArrayBuffer.
* fileOffset is the position of this sequence within the 2bit file.
*/
function parseSequenceRecord(dataView: DataView, fileOffset: number): SequenceRecord {
var bytes = new ReadableView(dataView);
var dnaSize = bytes.readUint32(),
nBlockCount = bytes.readUint32(),
nBlockStarts = bytes.readUint32Array(nBlockCount),
nBlockSizes = bytes.readUint32Array(nBlockCount),
// The masks can be quite large (~2MB for chr1) and we mostly don't care
// about them. So we ignore them, but we do need to know their length.
maskBlockCount = bytes.readUint32();
// maskBlockCount maskBlockStarts = bytes.readUint32Array(maskBlockCount),
// maskBlockSizes = bytes.readUint32Array(maskBlockCount),
// reserved = bytes.readUint32();

var dnaOffset = bytes.tell() + 8 * maskBlockCount + 4;
function parseSequenceRecord(buffer: ArrayBuffer, fileOffset: number): SequenceRecord {
var jb = new jBinary(buffer, twoBitTypes.TYPE_SET);
var header = jb.read('SequenceRecord');

var dnaOffset = jb.tell() + 8 * header.maskBlockCount + 4;

return {
numBases: dnaSize,
unknownBlockStarts: nBlockStarts,
unknownBlockLengths: nBlockSizes,
numMaskBlocks: maskBlockCount,
numBases: header.dnaSize,
unknownBlockStarts: header.nBlockStarts,
unknownBlockLengths: header.nBlockSizes,
numMaskBlocks: header.maskBlockCount,
maskBlockStarts: [],
maskBlockLengths: [],
dnaOffsetFromHeader: dnaOffset,
Expand All @@ -75,35 +65,13 @@ function parseSequenceRecord(dataView: DataView, fileOffset: number): SequenceRe
}


/**
* Parses the 2bit file header.
*/
function parseHeader(dataView: DataView): TwoBitHeader {
var bytes = new ReadableView(dataView);
var magic = bytes.readUint32();
if (magic != TWO_BIT_MAGIC) {
throw 'Invalid magic';
}
var version = bytes.readUint32();
if (version != 0) {
throw 'Unknown version of 2bit';
}
var sequenceCount = bytes.readUint32(),
reserved = bytes.readUint32();

var sequences: Array<FileIndexEntry> = [];
for (var i = 0; i < sequenceCount; i++) {
var nameSize = bytes.readUint8();
var name = bytes.readAscii(nameSize);
var offset = bytes.readUint32();
sequences.push({name, offset});
}
// hg19 header is 1671 bytes to this point
function parseHeader(buffer: ArrayBuffer): TwoBitHeader {
var jb = new jBinary(buffer, twoBitTypes.TYPE_SET);
var header = jb.read('Header');

return {
sequenceCount,
reserved,
sequences
sequenceCount: header.sequenceCount,
sequences: header.sequences
};
}

Expand All @@ -115,6 +83,7 @@ function parseHeader(dataView: DataView): TwoBitHeader {
* modification.
*/
function unpackDNA(dataView: DataView, startBasePair: number, numBasePairs: number): Array<string> {
// TODO: use jBinary bitfield for this
var basePairs: Array<string> = [];
basePairs.length = dataView.byteLength * 4; // pre-allocate
var basePairIdx = -startBasePair;
Expand Down Expand Up @@ -167,7 +136,7 @@ class TwoBit {

// TODO: if 16k is insufficient, fetch the right amount.
this.remoteFile.getBytes(0, 16*1024).then(function(buffer) {
var header = parseHeader(new DataView(buffer));
var header = parseHeader(buffer);
deferredHeader.resolve(header);
}).done();
}
Expand Down Expand Up @@ -209,7 +178,7 @@ class TwoBit {

// TODO: if 4k is insufficient, fetch the right amount.
return this.remoteFile.getBytes(seq.offset, 4095).then(
buf => parseSequenceRecord(new DataView(buf), seq.offset));
buf => parseSequenceRecord(buf, seq.offset));
});
}
}
Expand Down
43 changes: 43 additions & 0 deletions src/formats/twoBitTypes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/**
* TwoBit is a packed genome format.
* See http://genome.ucsc.edu/FAQ/FAQformat.html#format7
*/

'use strict';

var jBinary = require('jbinary');

var TYPE_SET = {
'jBinary.littleEndian': true,

'Header': {
magic: ['const', 'uint32', 0x1A412743, true],
version: ['const', 'uint32', 0, true],

sequenceCount: 'uint32',
reserved: 'uint32',

sequences: ['array', 'SequenceHeader', 'sequenceCount']
},

'SequenceHeader': {
nameSize: 'uint8',
name: ['string', 'nameSize'],
offset: 'uint32'
},

'SequenceRecord': {
dnaSize: 'uint32',
nBlockCount: 'uint32',
nBlockStarts: ['array', 'uint32', 'nBlockCount'],
nBlockSizes: ['array', 'uint32', 'nBlockCount'],
// The masks can be quite large (~2MB for chr1) and we mostly don't care
// about them. So we ignore them, but we do need to know their length.
maskBlockCount: 'uint32',
// maskBlockStarts: ['array', 'uint32', 'maskBlockCount']
// maskBlockSizes: ['array', 'uint32', 'maskBlockCount']
// reserved: 'uint32'
}
};

module.exports = {TYPE_SET};
72 changes: 0 additions & 72 deletions test/ReadableView-test.js

This file was deleted.

0 comments on commit e8ace77

Please sign in to comment.