Skip to content

Commit

Permalink
Document data sources
Browse files Browse the repository at this point in the history
  • Loading branch information
danvk committed Mar 20, 2015
1 parent a8de3d2 commit b7d0d24
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 15 deletions.
5 changes: 1 addition & 4 deletions test/BigBed-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@ var ContigInterval = require('../src/ContigInterval');

describe('BigBed', function() {
function getTestBigBed() {
// This file was generated using UCSC tools:
// cd kent/src/utils/bedToBigBed/tests; make
// It is compressed, little endian, has autoSQL and two blocks.
return new BigBed('/test/data/itemRgb.bb');
return new BigBed('/test/data/itemRgb.bb'); // See test/data/README.md
}

it('should extract features in a range', function(done) {
Expand Down
6 changes: 1 addition & 5 deletions test/BigBedDataSource-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@ var BigBed = require('../src/BigBed'),

describe('BigBedDataSource', function() {
function getTestSource() {
// This file was created from Biodalliance's ensGene.bb via:
// bigBedToBed ensGene.bb ensGene.bed
// grep '^chr17\t' ensGene.bed > /tmp/ensGene17.bed
// bedToBigBed -type=bed12+2 /tmp/ensGene17.bed <(echo "chr17 78774742")
// test/data/ensembl.chr17.bb
// See test/data/README.md
return createBigBedDataSource(new BigBed('/test/data/ensembl.chr17.bb'));
}

Expand Down
6 changes: 1 addition & 5 deletions test/TwoBit-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@ var TwoBit = require('../src/TwoBit');

describe('TwoBit', function() {
function getTestTwoBit() {
// This file was generated using UCSC tools:
// twoBitToFa -seqList=./test/seqList.txt hg19.2bit /tmp/extract.fa
// perl -i -pe 's/:.*//' /tmp/extract.fa
// faToTwoBit /tmp/extract.fa test/data/test.2bit
return new TwoBit('/test/data/test.2bit');
return new TwoBit('/test/data/test.2bit'); // See test/data/README.md
}

it('should have the right contigs', function(done) {
Expand Down
2 changes: 1 addition & 1 deletion test/components-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ describe('Root component', function() {
var dataSource = createTwoBitDataSource(genome);

// This file contains just the TP53 gene, shifted so that it starts at the
// beginning of chr17 (to match test.2bit).
// beginning of chr17 (to match test.2bit). See test/data/README.md.
var ensembl = new BigBed('/test/data/tp53.shifted.bb');
var ensemblDataSource = createBigBedDataSource(ensembl);

Expand Down
49 changes: 49 additions & 0 deletions test/data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
This directory contains many small data files used in testing.

This file documents how they were generated.


#### test.2bit

This is a small subset of the hg19 reference genome. It contains small swaths
of chr1 and chr22 and a larger swath of chr17. It was generated from hg19.2bit
using UCSC tools:

curl -O http://www.biodalliance.org/datasets/hg19.2bit
twoBitToFa -seqList=./test/data/seqList.txt hg19.2bit /tmp/extract.fa
perl -i -pe 's/:.*//' /tmp/extract.fa
faToTwoBit /tmp/extract.fa test/data/test.2bit


#### itemRgb.bb, itemRgb.bed

This file was generated from UCSC test data:

cd kent/src/utils/bedToBigBed/tests
make
cp output/itemRgb.bb $PILEUP/test/data/

It is compressed, little endian, has autoSQL and two blocks.

`itemRgb.bed` is copied unmodified from `bedToBigBed/tests/input`.


#### ensembl.chr17.bb

This file is derived from `ensGene.bb`. It contains just the genes on chr17.

curl -O http://www.biodalliance.org/datasets/ensGene.bb
bigBedToBed ensGene.bb ensGene.bed
grep '^chr17\t' ensGene.bed > /tmp/ensGene17.bed
bedToBigBed -type=bed12+2 /tmp/ensGene17.bed <(echo "chr17 78774742") test/data/ensembl.chr17.bb

#### tp53.shifted.bb

This is a subset of `ensembl.chr17.bb`, shifted to match the coordinates in
`test.2bit`:

curl -O http://www.biodalliance.org/datasets/ensGene.bb
bigBedToBed ensGene.bb ensGene.bed
grep '^chr17\t' ensGene.bed | grep TP53 | perl -pe 's/(75\d{4,})/$1-7512444/ge' > /tmp/tp53.shifted.bed
bedToBigBed -type=bed12+2 /tmp/tp53.shifted.bed <(echo "chr17 78774742") test/data/tp53.shifted.bb

File renamed without changes.

0 comments on commit b7d0d24

Please sign in to comment.