Skip to content
Browse files

automatic parsing of gzipped data

  • Loading branch information...
1 parent 08c953a commit 5c6a4846b951e49e80a3c468c89a45cb0222ee2c @audy committed May 21, 2012
Showing with 29 additions and 1 deletion.
  1. +12 −0 lib/dna.rb
  2. +10 −0 readme.md
  3. BIN spec/data/test.fasta.gz
  4. +5 −0 spec/dna_spec.rb
  5. +2 −1 spec/spec_helper.rb
View
12 lib/dna.rb
@@ -1,3 +1,5 @@
+require 'zlib'
+
Dir.glob(File.join(File.dirname(__FILE__), 'parsers', '*.rb')).each { |f| require f }
##
@@ -25,6 +27,16 @@ def initialize(handle)
end
def detect_format
+
+ # is gzipped?
+ unless @handle.class == Array # for tests mostly...
+ begin
+ @handle = Zlib::GzipReader.new(@handle)
+ rescue
+ @handle.rewind
+ end
+ end
+
first_line = @handle.first
@handle.rewind if @handle.class == File
View
10 readme.md
@@ -72,4 +72,14 @@ File.open('sequences.qseq') do |handle|
records = Dna.new handle
puts records.first.inspect
end
+
+# even works on gzipped data
+
+File.open('sequences.fasta.gz') do |handle|
+ records = Dna.new handle
+
+ records.each do |record|
+ puts record.length
+ end
+end
```
View
BIN spec/data/test.fasta.gz
Binary file not shown.
View
5 spec/dna_spec.rb
@@ -7,11 +7,16 @@
let(:fastq) { Dna.new @fastq_file }
let(:qseq) { Dna.new @qseq_file }
let(:empty) { Dna.new @empty_file }
+ let(:gzipped) { Dna.new @gzip_file }
it 'doesnt freak out on empty files' do
fasta.format == :empty
end
+ it 'can automatically parse gzipped files' do
+ gzipped.format.should == :fasta
+ end
+
it 'can auto-detect fasta format' do
fasta.format.should == :fasta
end
View
3 spec/spec_helper.rb
@@ -14,6 +14,7 @@
@fasta_file = fasta_file
@fastq_file = fastq_file
@qseq_file = qseq_file
- @empty_fiel = empty_file
+ @empty_file = empty_file
+ @gzip_file = File.open(File.join(path, 'data/test.fasta.gz'))
end
end

0 comments on commit 5c6a484

Please sign in to comment.
Something went wrong with that request. Please try again.