Skip to content

Commit

Permalink
Minor improvements to the coverage plot
Browse files Browse the repository at this point in the history
  • Loading branch information
Ricardo H. Ramirez authored and Ricardo H. Ramirez committed Jan 12, 2011
1 parent 6d5ce24 commit 2446329
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 187 deletions.
357 changes: 172 additions & 185 deletions lib/bio/db/sam.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,224 +112,211 @@ def load_reference()
end

def average_coverage(chromosome, qstart, len)
reference = fetch_reference(chromosome, qstart, qstart+len)
len = reference.length if len > reference.length

#reference = fetch_reference(chromosome, qstart,len)
# len = reference.length if len > reference.length


coverages = chromosome_coverage(chromosome, qstart, len)
total = 0
len.times{ |i| total= total + coverages[i] }
avg_cov = total.to_f / len
#LibC.free reference
avg_cov
end

def chromosome_coverage(chromosome, qstart, len)
# reference = fetch_reference(chromosome, qstart,len)
# len = reference.length if len > reference.length
#p qend.to_s + "-" + qstart.to_s + "framesize " + (qend - qstart).to_s
coverages = Array.new(len, 0)

avg_proc = Proc.new do |alignment|
last = qstart + len
first = qstart
last = alignment.calend if last > alignment.calend
first = alignment.pos if first < alignment.pos
chr_cov_proc = Proc.new do |alignment|
#last = qstart + len
#first = qstart
#last = alignment.calend if last > alignment.calend
#first = alignment.pos if first < alignment.pos
# p first
first.upto(last-1) { |i|
coverages[first-i] = 1 + coverages[first-i] }
last = alignment.calend - qstart
first = alignment.pos - qstart
if last < first
tmp = last
last = first
first = last
end

fetch_with_function(chromosome, qstart, qstart+len, avg_proc)
#p coverages

coverages = chromosome_coverage(chromosome, qstart, len)
total = 0
0.upto(len-1) { |i| total= total + coverages[i] }
avg_cov = total.to_f / len
#LibC.free reference
avg_cov
end

def chromosome_coverage(chromosome, qstart, len)
reference = fetch_reference(chromosome, qstart,len)
len = reference.length if len > reference.length
#p qend.to_s + "-" + qstart.to_s + "framesize " + (qend - qstart).to_s
coverages = Array.new(len, 0)

chr_cov_proc = Proc.new do |alignment|
#last = qstart + len
#first = qstart
#last = alignment.calend if last > alignment.calend
#first = alignment.pos if first < alignment.pos
# p first
last = alignment.calend - qstart
first = alignment.pos - qstart
if last < first
tmp = last
last = first
first = last
end

# STDERR.puts "#{first} #{last}\n"
first.upto(last-1) { |i|

coverages[i-1] = 1 + coverages[i-1] if i-1 < len && i > 0
}
end
first.upto(last-1) { |i|

fetch_with_function(chromosome, qstart, qstart+len, chr_cov_proc)
#p coverages
coverages
coverages[i-1] = 1 + coverages[i-1] if i-1 < len && i > 0
}
end

def fetch_reference(chromosome, qstart,qend)
load_reference if @fasta_index.nil? || @fasta_index.null?
query = query_string(chromosome, qstart,qend)
len = FFI::MemoryPointer.new :int
reference = Bio::DB::SAM::Tools.fai_fetch(@fasta_index, query, len)
raise SAMException.new(), "Unable to get sequence for reference: "+query if reference.nil?

reference
end
fetch_with_function(chromosome, qstart, qstart+len, chr_cov_proc)
#p coverages
coverages
end

def query_string(chromosome, qstart,qend)
query = chromosome + ":" + qstart.to_s + "-" + qend.to_s
query
end
def fetch_reference(chromosome, qstart,qend)
load_reference if @fasta_index.nil? || @fasta_index.null?
query = query_string(chromosome, qstart,qend)
len = FFI::MemoryPointer.new :int
reference = Bio::DB::SAM::Tools.fai_fetch(@fasta_index, query, len)
raise SAMException.new(), "Unable to get sequence for reference: "+query if reference.nil?

def fetch(chromosome, qstart, qend)
als = Array.new
fetchAlignment = Proc.new do |alignment|
als.push(alignment.clone)
0
end
fetch_with_function(chromosome, qstart, qend, fetchAlignment)
als
end

def fetch_with_function(chromosome, qstart, qend, function)
load_index if @sam_index.nil? || @sam_index.null?
chr = FFI::MemoryPointer.new :int
beg = FFI::MemoryPointer.new :int
last = FFI::MemoryPointer.new :int
query = query_string(chromosome, qstart,qend)
qpointer = FFI::MemoryPointer.from_string(query)
header = @sam_file[:header]
Bio::DB::SAM::Tools.bam_parse_region(header,qpointer, chr, beg, last)
raise SAMException.new(), "invalid query: " + query if(chr.read_int < 0)
count = 0;

fetchAlignment = Proc.new do |bam_alignment, data|
alignment = Alignment.new
alignment.set(bam_alignment, header)
function.call(alignment)
count = count + 1
0
end
Bio::DB::SAM::Tools.bam_fetch(@sam_file[:x][:bam], @sam_index,chr.read_int,beg.read_int, last.read_int, nil, fetchAlignment)
#LibC.free chr
#LibC.free beg
#LibC.free last
#LibC.free qpointer
count
end
reference
end

def query_string(chromosome, qstart,qend)
query = chromosome + ":" + qstart.to_s + "-" + qend.to_s
query
end

class Tag
attr_accessor :tag, :type, :value
def set(str)
v = str.split(":")
@tag = v[0]
@type = v[1]
@value = v[2]
def fetch(chromosome, qstart, qend)
als = Array.new
fetchAlignment = Proc.new do |alignment|
als.push(alignment.clone)
0
end
fetch_with_function(chromosome, qstart, qend, fetchAlignment)
als
end

def fetch_with_function(chromosome, qstart, qend, function)
load_index if @sam_index.nil? || @sam_index.null?
chr = FFI::MemoryPointer.new :int
beg = FFI::MemoryPointer.new :int
last = FFI::MemoryPointer.new :int
query = query_string(chromosome, qstart,qend)
qpointer = FFI::MemoryPointer.from_string(query)
header = @sam_file[:header]
Bio::DB::SAM::Tools.bam_parse_region(header,qpointer, chr, beg, last)
#raise SAMException.new(), "invalid query: " + query if(chr.read_int < 0)
count = 0;

fetchAlignment = Proc.new do |bam_alignment, data|
alignment = Alignment.new
alignment.set(bam_alignment, header)
function.call(alignment)
count = count + 1
0
end
Bio::DB::SAM::Tools.bam_fetch(@sam_file[:x][:bam], @sam_index,chr.read_int,beg.read_int, last.read_int, nil, fetchAlignment)
#LibC.free chr
#LibC.free beg
#LibC.free last
#LibC.free qpointer
count
end

class Alignment
end

def initialize
ObjectSpace.define_finalizer(self,
self.class.method(:finalize).to_proc)
end
def Alignment.finalize(object_id)
class Tag
attr_accessor :tag, :type, :value
def set(str)
v = str.split(":")
@tag = v[0]
@type = v[1]
@value = v[2]
end
end

# puts "Object #{object_id} dying at #{Time.new}"
# p "?" . object_id.al
# p object_id.al
LibC.free object_id.al
LibC.free object_id.sam
LibC.free object_id.calend
LibC.free object_id.qlen
class Alignment

LibC.free object_id.samstr
end
def initialize
ObjectSpace.define_finalizer(self,
self.class.method(:finalize).to_proc)
end
def Alignment.finalize(object_id)

#Attributes from the format
attr_accessor :qname, :flag, :rname,:pos,:mapq,:cigar, :mrnm, :mpos, :isize, :seq, :qual, :tags, :al, :samstr
#Attributes pulled with the C library
attr_accessor :calend, :qlen
#Attrobites frp, the flag field (see chapter 2.2.2 of the sam file documentation)
#query_strand and mate_strand are true if they are forward. It is the opposite to the definition in the BAM format for clarity.
#primary is the negation of is_negative from the BAM format
attr_accessor :is_paired, :is_mapped, :query_unmapped, :mate_unmapped, :query_strand, :mate_strand, :first_in_pair,:second_in_pair, :primary, :failed_quality, :is_duplicate

def set(bam_alignment, header)
#Create the FFI object
@al = Bio::DB::SAM::Tools::Bam1T.new(bam_alignment)

#set the raw data
tmp_str = Bio::DB::SAM::Tools.bam_format1(header,al)
#self.sam = tmp_str
#ObjectSpace.define_finalizer(self, proc {|id| puts "Finalizer one on #{id}" })
self.sam = String.new(tmp_str)
#LibC.free tmp_str
#Set values calculated by libbam
core = al[:core]
cigar = al[:data][core[:l_qname]]#define bam1_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname))
@calend = Bio::DB::SAM::Tools.bam_calend(core,cigar)
@qlen = Bio::DB::SAM::Tools.bam_cigar2qlen(core,cigar)

#process the flags
@is_paired = @flag & 0x0001 > 0
@is_mapped = @flag & 0x0002 > 0
@query_unmapped = @flag & 0x0004 > 0
@mate_unmapped = @flag & 0x0008 > 0
@query_strand = !(@flag & 0x0010 > 0)
@mate_strand = !(@flag & 0x0020 > 0)
@first_in_pair = @flag & 0x0040 > 0
@second_in_pair = @flag & 0x0080 > 0
@primary = !(@flag & 0x0100 > 0)
@failed_quality = @flag & 0x0200 > 0
@is_duplicate = @flag & 0x0400 > 0

end
# puts "Object #{object_id} dying at #{Time.new}"
# p "?" . object_id.al
# p object_id.al
LibC.free object_id.al
LibC.free object_id.sam
LibC.free object_id.calend
LibC.free object_id.qlen

LibC.free object_id.samstr
end

def sam=(sam)
#p sam
s = sam.split("\t")
self.qname = s[0]
self.flag = s[1].to_i
self.rname = s[2]
self.pos = s[3].to_i
self.mapq = s[4].to_i
self.cigar = s[5]
self.mrnm = s[6]
self.mpos = s[7].to_i
self.isize = s[8].to_i
self.seq = s[9]
self.qual = s[10]
self.tags = {}
11.upto(s.size-1) {|n|
t = Tag.new
t.set(s[n])
tags[t.tag] = t
}
#Attributes from the format
attr_accessor :qname, :flag, :rname,:pos,:mapq,:cigar, :mrnm, :mpos, :isize, :seq, :qual, :tags, :al, :samstr
#Attributes pulled with the C library
attr_accessor :calend, :qlen
#Attrobites frp, the flag field (see chapter 2.2.2 of the sam file documentation)
#query_strand and mate_strand are true if they are forward. It is the opposite to the definition in the BAM format for clarity.
#primary is the negation of is_negative from the BAM format
attr_accessor :is_paired, :is_mapped, :query_unmapped, :mate_unmapped, :query_strand, :mate_strand, :first_in_pair,:second_in_pair, :primary, :failed_quality, :is_duplicate

def set(bam_alignment, header)
#Create the FFI object
@al = Bio::DB::SAM::Tools::Bam1T.new(bam_alignment)

#set the raw data
tmp_str = Bio::DB::SAM::Tools.bam_format1(header,al)
#self.sam = tmp_str
#ObjectSpace.define_finalizer(self, proc {|id| puts "Finalizer one on #{id}" })
self.sam = String.new(tmp_str)
#LibC.free tmp_str
#Set values calculated by libbam
core = al[:core]
cigar = al[:data][core[:l_qname]]#define bam1_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname))
@calend = Bio::DB::SAM::Tools.bam_calend(core,cigar)
@qlen = Bio::DB::SAM::Tools.bam_cigar2qlen(core,cigar)

#process the flags
@is_paired = @flag & 0x0001 > 0
@is_mapped = @flag & 0x0002 > 0
@query_unmapped = @flag & 0x0004 > 0
@mate_unmapped = @flag & 0x0008 > 0
@query_strand = !(@flag & 0x0010 > 0)
@mate_strand = !(@flag & 0x0020 > 0)
@first_in_pair = @flag & 0x0040 > 0
@second_in_pair = @flag & 0x0080 > 0
@primary = !(@flag & 0x0100 > 0)
@failed_quality = @flag & 0x0200 > 0
@is_duplicate = @flag & 0x0400 > 0

end

#<QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> \
#[<TAG>:<VTYPE>:<VALUE> [...]]

end
def sam=(sam)
#p sam
s = sam.split("\t")
self.qname = s[0]
self.flag = s[1].to_i
self.rname = s[2]
self.pos = s[3].to_i
self.mapq = s[4].to_i
self.cigar = s[5]
self.mrnm = s[6]
self.mpos = s[7].to_i
self.isize = s[8].to_i
self.seq = s[9]
self.qual = s[10]
self.tags = {}
11.upto(s.size-1) {|n|
t = Tag.new
t.set(s[n])
tags[t.tag] = t
}


#<QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> \
#[<TAG>:<VTYPE>:<VALUE> [...]]

end

class SAMException < RuntimeError
#we can add further variables to give information of the excpetion
def initialize()
end

class SAMException < RuntimeError
#we can add further variables to give information of the excpetion
def initialize()

end
end
end
end
end


Loading

0 comments on commit 2446329

Please sign in to comment.