Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
54 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,11 @@ | ||
require_relative "bio-cd-hit-report/cd-hit-report" | ||
require_relative 'bio-cd-hit-report/cd-hit-report' | ||
|
||
#report = Bio::CdHitReport.new('bin/test.clstr') | ||
##report.report_file = 'bin/test.clstr' | ||
#report.parse.each do |cluster| | ||
#puts cluster.id | ||
##puts cluster.members | ||
#puts cluster.rep_seq | ||
#end | ||
|
||
#puts report.parse.length |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,49 +1,16 @@ | ||
module Bio | ||
|
||
require_relative 'cluster.rb' | ||
require_relative 'cluster' | ||
require_relative 'parser' | ||
|
||
class CdHitReport | ||
|
||
def initialize(file) | ||
@file = file | ||
end | ||
|
||
def each_cluster(&block) | ||
cluster_objs.each(&block) | ||
end | ||
|
||
def total_clusters | ||
cluster_objs.size | ||
end | ||
|
||
def get_cluster(name) | ||
cluster_objs.select{|cluster| cluster.name == name.to_s}.pop.members | ||
end | ||
|
||
def max_members | ||
cluster_objs.map{|c|c.size}.max | ||
def parse | ||
report = CdHitParser.new | ||
report.report_file = @file | ||
report | ||
end | ||
|
||
def min_members | ||
cluster_objs.map{|c| c.size}.min | ||
end | ||
|
||
private | ||
def cluster_objs | ||
d = raw_data.map do |line| | ||
cluster = line.split("\n").delete_if{|x| x == ">Cluster "} | ||
id = cluster.first | ||
cluster.shift | ||
#puts id.inspect | ||
Cluster.new(id,cluster) | ||
end | ||
d.delete_if {|obj| obj.id.nil?} | ||
end | ||
|
||
|
||
def raw_data | ||
File.open(@file).readlines | ||
end | ||
|
||
end #class | ||
end #module | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,30 @@ | ||
class Cluster | ||
attr_accessor :name, :data | ||
|
||
class Cluster < Struct.new(:name,:data) | ||
$/ = ">Cluster " | ||
|
||
def id | ||
name | ||
def initialize(arg={}) | ||
@name = arg[:name] | ||
@data = arg[:data] | ||
end | ||
|
||
def size | ||
entries.size | ||
def id | ||
name.scan(/Cluster\s(.)/).join | ||
end | ||
|
||
def members | ||
entries.join(',') | ||
end | ||
|
||
def get_seqs(file) | ||
seqs = Bio::FlatFile.auto(file).map{ |f| f} | ||
puts entries.map{|entry| seqs.select {|seq| seq.definition == entry }} | ||
def representative | ||
@data.split("\n").map{|line|line.scan(/>(.+)\.{3}\s\*/)}.flatten | ||
end | ||
alias :rep_seq :representative | ||
|
||
private | ||
def entries | ||
data.map {|entry| entry.scan(/>(.+)\.{3}/)}.flatten | ||
def size | ||
entries.size | ||
end | ||
alias :length :size | ||
|
||
def entries | ||
@data.split("\n").map{|line|line.scan(/>(.+)\.{3}/)} | ||
end | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
class CdHitParser | ||
attr_accessor :report_file | ||
|
||
include Enumerable | ||
|
||
def each | ||
data,header = nil, nil | ||
File.open(report_file).each do |line| | ||
if line[0].chr == '>' | ||
yield Cluster.new(:name => header,:data => data) if data | ||
data = '' | ||
header = line[1..-1].strip | ||
else | ||
data << line | ||
end | ||
end | ||
yield Cluster.new(:name => header, :data => data) | ||
end | ||
end |