Skip to content

Commit

Permalink
Added descriptive stats for the tag_stats program
Browse files Browse the repository at this point in the history
  • Loading branch information
homonecloco committed Feb 26, 2018
1 parent 7d5bc0c commit 1f1eda2
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 4 deletions.
1 change: 1 addition & 0 deletions Gemfile
Expand Up @@ -5,6 +5,7 @@ source "http://rubygems.org"

gem "bio", ">= 1.5.1"
gem "bio-samtools", ">= 2.6.2"
gem "descriptive_statistics"
#gem "rake"

gem "systemu", ">=2.5.2"
Expand Down
40 changes: 36 additions & 4 deletions bin/tag_stats.rb
Expand Up @@ -6,7 +6,16 @@
require 'tmpdir'
require 'bio-samtools'
require 'bio'
require 'descriptive_statistics'

class Bio::DB::Tag
def set(str)
@tag = str[0..1]
@type = str[3]
@value = str[5..-1]
@value = @value.to_i if @type == "i"
end
end

$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
$: << File.expand_path('.')
Expand All @@ -16,21 +25,44 @@
opts[:tag] = "NH"
opts[:bam] = nil
opts[:out] = nil
opts[:ref] = nil

OptionParser.new do |o|
o.banner = "Usage: tag_stats.rb [options]"

o.on("-t", "--tag str", "CSV file with the gene triad names in the named columns 'A','B' and 'D' ") do |o|
o.on("-t", "--tag str", "The tag to extract (default NH)") do |o|
opts[:tag] = o
end

o.on("-b", "--bam FILE" , "FASTA file containing all the possible peptide sequences. ") do |o|
o.on("-b", "--bam FILE" , "BAM file with the alignments ") do |o|
opts[:bam] = o
end

o.on("-o", "--out_file CHAR", "Character used to split the sequence name. The name will be evarything before this token on the name of the sequences") do |o|
o.on("-o", "--out_file CHAR", "File to save the stats") do |o|
opts[:out_file] = o
end
end
o.on("-r", "--reference FILE", "Fasta file with the reference") do |o|
opts[:ref] = o
end
end.parse!

bam = Bio::DB::Sam.new(fasta: opts[:ref], bam: opts[:bam])
tag = opts[:tag]
puts bam.inspect

last_ref = ""
values = []
to_print = [:sum, :min, :max, :mean, :mode, :median, :q1, :q2, :q3]
bam.view do |aln |
if(last_ref != aln.rname)

desc_stats = values.descriptive_statistics
to_print.each { |e| puts "#{last_ref}\t#{e}\t#{desc_stats[e]}" } if(last_ref != "")

values.clear
last_ref = aln.rname
end

values << aln.tags[tag].value

end

0 comments on commit 1f1eda2

Please sign in to comment.