Skip to content

Commit

Permalink
write the parser
Browse files Browse the repository at this point in the history
  • Loading branch information
ggklf committed Mar 11, 2013
1 parent 59c07fa commit d9edf2a
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 56 deletions.
12 changes: 11 additions & 1 deletion lib/bio-cd-hit-report.rb
@@ -1 +1,11 @@
require_relative "bio-cd-hit-report/cd-hit-report"
require_relative 'bio-cd-hit-report/cd-hit-report'

#report = Bio::CdHitReport.new('bin/test.clstr')
##report.report_file = 'bin/test.clstr'
#report.parse.each do |cluster|
#puts cluster.id
##puts cluster.members
#puts cluster.rep_seq
#end

#puts report.parse.length
49 changes: 8 additions & 41 deletions lib/bio-cd-hit-report/cd-hit-report.rb
@@ -1,49 +1,16 @@
module Bio

require_relative 'cluster.rb'
require_relative 'cluster'
require_relative 'parser'

class CdHitReport

def initialize(file)
@file = file
end

def each_cluster(&block)
cluster_objs.each(&block)
end

def total_clusters
cluster_objs.size
end

def get_cluster(name)
cluster_objs.select{|cluster| cluster.name == name.to_s}.pop.members
end

def max_members
cluster_objs.map{|c|c.size}.max
def parse
report = CdHitParser.new
report.report_file = @file
report
end

def min_members
cluster_objs.map{|c| c.size}.min
end

private
def cluster_objs
d = raw_data.map do |line|
cluster = line.split("\n").delete_if{|x| x == ">Cluster "}
id = cluster.first
cluster.shift
#puts id.inspect
Cluster.new(id,cluster)
end
d.delete_if {|obj| obj.id.nil?}
end


def raw_data
File.open(@file).readlines
end

end #class
end #module
end
end
30 changes: 16 additions & 14 deletions lib/bio-cd-hit-report/cluster.rb
@@ -1,28 +1,30 @@
class Cluster
attr_accessor :name, :data

class Cluster < Struct.new(:name,:data)
$/ = ">Cluster "

def id
name
def initialize(arg={})
@name = arg[:name]
@data = arg[:data]
end

def size
entries.size
def id
name.scan(/Cluster\s(.)/).join
end

def members
entries.join(',')
end

def get_seqs(file)
seqs = Bio::FlatFile.auto(file).map{ |f| f}
puts entries.map{|entry| seqs.select {|seq| seq.definition == entry }}
def representative
@data.split("\n").map{|line|line.scan(/>(.+)\.{3}\s\*/)}.flatten
end
alias :rep_seq :representative

private
def entries
data.map {|entry| entry.scan(/>(.+)\.{3}/)}.flatten
def size
entries.size
end
alias :length :size

def entries
@data.split("\n").map{|line|line.scan(/>(.+)\.{3}/)}
end
end

19 changes: 19 additions & 0 deletions lib/bio-cd-hit-report/parser.rb
@@ -0,0 +1,19 @@
class CdHitParser
attr_accessor :report_file

include Enumerable

def each
data,header = nil, nil
File.open(report_file).each do |line|
if line[0].chr == '>'
yield Cluster.new(:name => header,:data => data) if data
data = ''
header = line[1..-1].strip
else
data << line
end
end
yield Cluster.new(:name => header, :data => data)
end
end

0 comments on commit d9edf2a

Please sign in to comment.