forked from wwood/bbbin
/
ununiqify_tree.rb
executable file
·44 lines (35 loc) · 1.52 KB
/
ununiqify_tree.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env ruby
# Take a fasta file and a phylip file that has been uniqified (using for instance
# uniqify_phylip.rb). The only constraint is that they must be in the same order
# in the fasta and the phylip files. Then you rename the nodes of the tree
# (which are currently in phylip unintelligible format) to fasta names,
# which are much more understandable.
require 'rubygems'
require 'bio'
if ARGV.length != 3
$stderr.puts "Usage: ununiqify_tree.rb <fasta> <uniqued_phylip_file> <tree>"
exit
end
# read the fasta and the phylip files, making a hash between them
fasta_seqs = Bio::FlatFile.open(ARGV[0]).entries
phylip_seqs = Bio::FlatFile.open(Bio::Phylip::PhylipFormat, ARGV[1]).entries[0].alignment.to_fastaformat_array
if fasta_seqs.length != phylip_seqs.size
$stderr.puts "Number of sequences in fasta and phylip files differ. Are you doing something wrong?"
end
phylip_to_fasta_name_hash = {}
fasta_seqs.each_with_index do |fasta_name, i|
phylip_to_fasta_name_hash[phylip_seqs[i].definition] = fasta_name.definition
end
# for each node of the tree, rename. warn if there is no hash match
tree = Bio::FlatFile.open(Bio::Newick, ARGV[2]).entries[0].tree
tree.leaves.each do |node|
next if node.name.nil?
newname = phylip_to_fasta_name_hash[node.name]
newname = phylip_to_fasta_name_hash[node.name.gsub(/ \//,'_')] if newname.nil? #bit of a hack
if newname
node.name = newname
else
$stderr.puts "Unexpected node name (left unchanged): '#{node}' '#{node.name}' #{node.class}"
end
end
puts tree.output(:newick)