Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
Basic functionality added
  • Loading branch information
danmaclean committed Nov 21, 2012
1 parent 45e8488 commit 314c809
Show file tree
Hide file tree
Showing 6 changed files with 256 additions and 2 deletions.
32 changes: 32 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
GEM
remote: http://rubygems.org/
specs:
activesupport (3.2.9)
i18n (~> 0.6)
multi_json (~> 1.0)
bio (1.4.3)
git (1.2.5)
i18n (0.6.1)
jeweler (1.6.4)
bundler (~> 1.0)
git (>= 1.2.5)
rake
multi_json (1.3.7)
rake (10.0.2)
rcov (1.0.0)
shoulda (3.3.2)
shoulda-context (~> 1.0.1)
shoulda-matchers (~> 1.4.1)
shoulda-context (1.0.1)
shoulda-matchers (1.4.1)
activesupport (>= 3.0.0)

PLATFORMS
ruby

DEPENDENCIES
bio (>= 1.4.2)
bundler (~> 1.0.0)
jeweler (~> 1.6.4)
rcov
shoulda
4 changes: 2 additions & 2 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ Jeweler::Tasks.new do |gem|
gem.name = "bio-synreport"
gem.homepage = "http://github.com/danmaclean/bioruby-synreport"
gem.license = "MIT"
gem.summary = %Q{TODO: one-line summary of your gem}
gem.description = %Q{TODO: longer description of your gem}
gem.summary = %Q{Reports whether a nucleotide change results in synonymous or non-synonymous mutations}
gem.description = %Q{Takes a GFF and genomic sequence file, constructs CDS and when given a position and alternative base will report whether this change is in a coding region and if it results in a synonymous or non-synonymous mutation.}
gem.email = "maclean.daniel@gmail.com"
gem.authors = ["Dan MacLean"]
# dependencies defined in Gemfile
Expand Down
60 changes: 60 additions & 0 deletions bio-synreport.gemspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Generated by jeweler
# DO NOT EDIT THIS FILE DIRECTLY
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
# -*- encoding: utf-8 -*-

Gem::Specification.new do |s|
s.name = "bio-synreport"
s.version = "0.0.0"

s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Dan MacLean"]
s.date = "2012-11-20"
s.description = "TODO: longer description of your gem"
s.email = "maclean.daniel@gmail.com"
s.extra_rdoc_files = [
"LICENSE.txt",
"README.rdoc"
]
s.files = [
".document",
"Gemfile",
"LICENSE.txt",
"README.rdoc",
"Rakefile",
"VERSION",
"lib/bio-synreport.rb",
"test/helper.rb",
"test/test_bio-synreport.rb"
]
s.homepage = "http://github.com/danmaclean/bioruby-synreport"
s.licenses = ["MIT"]
s.require_paths = ["lib"]
s.rubygems_version = "1.8.11"
s.summary = "TODO: one-line summary of your gem"

if s.respond_to? :specification_version then
s.specification_version = 3

if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
s.add_development_dependency(%q<shoulda>, [">= 0"])
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
s.add_development_dependency(%q<rcov>, [">= 0"])
s.add_development_dependency(%q<bio>, [">= 1.4.2"])
else
s.add_dependency(%q<shoulda>, [">= 0"])
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
s.add_dependency(%q<rcov>, [">= 0"])
s.add_dependency(%q<bio>, [">= 1.4.2"])
end
else
s.add_dependency(%q<shoulda>, [">= 0"])
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
s.add_dependency(%q<rcov>, [">= 0"])
s.add_dependency(%q<bio>, [">= 1.4.2"])
end
end

17 changes: 17 additions & 0 deletions examples/test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
$LOAD_PATH.unshift(File.dirname(__FILE__))



require 'bio-synreport'
require 'pp'

db = Bio::Util::SynReport.new(:gff => ARGV[0], :fasta => ARGV[1], :verbose => true)
chr, pos, ref,alt = 'Chr2',7634495, 'a', 't'
pp db.mutation_info(chr,pos,alt)

chr, pos, ref,alt = 'Chr3',123456, 'a', 't'
pp db.mutation_info(chr,pos,alt)

chr, pos, ref,alt = 'Chr2',7626518, 'a', 't'
pp db.mutation_info(chr,pos,alt)
2 changes: 2 additions & 0 deletions lib/bio-synreport.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@
# and put your plugin's code there. It is bad practice to write other code
# directly into this file, because doing so causes confusion if this biogem
# was ever to get merged into the main bioruby tree.

require 'bio/utils/bio-synreport'
143 changes: 143 additions & 0 deletions lib/bio/utils/bio-synreport.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
require 'rubygems'
require 'pp'
require 'bio'

module Bio
class Util

class MrnaModel
attr_accessor :seqname, :gff_id, :strand, :cds, :sequences

def initialize(chr, id, strand, cds_arr, seq_arr)
@seqname, @gff_id, @strand, @cds, @sequences = chr, id, strand, cds_arr, seq_arr
end

def includes?(seq, point)
@cds.each {|start, stop| return true if @seqname == seq and point.to_i >= start and point.to_i <= stop}
false
end

def seq
@sequences.join
end

def substitution_info(chr,point,alt)
cds_start = @cds.first.first
running_total = 0
@cds.each do |start,stop|
if point.to_i >= start and point.to_i <= stop
offset = case @strand
when "+"
#offset =
(point.to_i - start) + running_total
when "-"
(stop - point.to_i) + running_total
end #offset = how far into cds SNP is
codon_number = offset / 3
position_in_codon = offset % 3
#pp [offset, codon_number, position_in_codon]
codon_array = []; Bio::Sequence::NA.new(self.seq).window_search(3,3) {|b| codon_array << b}
codon = codon_array[codon_number]
nt = codon[position_in_codon]
new_codon = codon.dup
new_codon[position_in_codon] = alt.downcase
#pp [codon, position_in_codon, nt, new_codon]
a = Bio::Sequence::NA.new(codon).translate.codes.first
b = Bio::Sequence::NA.new(new_codon).translate.codes.first
sub_type = a == b ? "SYN" : "NON_SYN"
return {:id => @gff_id,
:chr => @seqname,
:strand => @strand,
:position => point,
:original_codon => codon,
:original_residue => a || 'stop',
:mutant_codon => new_codon,
:mutant_residue =>b || 'stop',
:position_in_codon => position_in_codon + 1,
:substitution_type => sub_type
}
end
running_total += (stop - start)
running_total += 1 if @strand == '-' #how far we are into the cds
end
end

end#class end


class SynReport
#attr_accessor :cdshash, :cds_list, :mRNAhash, :seqhash

def initialize(opts)
@gene_array = []
@cdshash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
@mRNAhash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
File.open(opts[:gff], "r").each do |gffline|
record=Bio::GFF::GFF3::Record.new(gffline)
if(record.feature_type == 'gene')
@gene_array << [record.seqname, record.id]
elsif(record.feature_type == 'CDS' or record.feature_type == 'mRNA')
parents = record.get_attributes('Parent')
parents.each do |parent|
if record.feature_type == 'CDS'
@cdshash[record.seqname][parent] << record
else
@mRNAhash[record.seqname][parent] << record
end
end
end
end
$stderr.puts "Loaded GFF..." if opts[:verbose]
@seqhash = {}
Bio::FastaFormat.open(opts[:fasta]).each { |seq| @seqhash[seq.entry_id] = seq.to_seq }
$stderr.puts "Loaded Seq..." if opts[:verbose]

@models = Hash.new {|h,k| h[k] = [] }
$stderr.puts "Building models..." if opts[:verbose]
@gene_array.each do |gene|

mRNAs=@mRNAhash[gene.first][gene.last]
mRNAs.each do |mRNA|
next if @seqhash[gene.first].nil?
cdsa = []
seqs = []
cdsary=@cdshash[gene.first][mRNA.id]
cdsary.each {|c| cdsa << [c.start, c.end]}
cdsa.sort!
cdsa.reverse! if mRNA.strand == '-'

cdsa.each do |cds|

#cdsa << [cds.start, cds.end]
if mRNA.strand == '+'
seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") )
elsif mRNA.strand == "-"
seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") ).complement
end
end
@models[mRNA.seqname] << Bio::Util::MrnaModel.new(mRNA.seqname, mRNA.id, mRNA.strand, cdsa, seqs )
#pp @models[mRNA.seqname][-1].cds if mRNA.id == 'AT2G17530.1' or mRNA.id == 'AT2G17550.1'
end
end
$stderr.puts "Models built..." if opts[:verbose]
end#init end

def is_in_cds?(chr,point)
@self.mutation_info(chr,point) ? true : false
end

#returns mutation info if point in CDS, if not in CDS returns false
def mutation_info(chr,pos,alt)

@models[chr].each do |m|
if m.includes?(chr,pos)
return m.substitution_info(chr,pos,alt)
end
end
false
end


end#class end
end#class util end
end# module end

0 comments on commit 314c809

Please sign in to comment.