Skip to content
This repository
Browse code

New module and test for FastTree

  • Loading branch information...
commit 79d1e1c80a5eeeff0199fdc9e070214222cf166a 1 parent 206ebd9
bosborne authored December 04, 2011
434  lib/Bio/Tools/Run/Phylo/FastTree.pm
... ...
@@ -0,0 +1,434 @@
  1
+#
  2
+# BioPerl module for Bio::Tools::Run::Phylo::FastTree
  3
+#
  4
+# Please direct questions and support issues to <bioperl-l@bioperl.org>
  5
+#
  6
+# Copyright Brian Osborne
  7
+#
  8
+# You may distribute this module under the same terms as perl itself
  9
+#
  10
+# POD documentation - main docs before the code
  11
+
  12
+=head1 NAME
  13
+
  14
+Bio::Tools::Run::Phylo::FastTree
  15
+
  16
+=head1 SYNOPSIS
  17
+
  18
+  # Build a FastTree factory
  19
+  $factory = Bio::Tools::Run::Phylo::FastTree->new();
  20
+
  21
+=head1 DESCRIPTION
  22
+
  23
+
  24
+=head1 FEEDBACK
  25
+
  26
+=head2 Mailing Lists
  27
+
  28
+User feedback is an integral part of the evolution of this and other
  29
+Bioperl modules. Send your comments and suggestions preferably to one
  30
+of the Bioperl mailing lists.  Your participation is much appreciated.
  31
+
  32
+  bioperl-l@bioperl.org                  - General discussion
  33
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  34
+
  35
+=head2 Support 
  36
+
  37
+Please direct usage questions or support issues to the mailing list:
  38
+
  39
+I<bioperl-l@bioperl.org>
  40
+
  41
+Do not contact the module maintainer directly. Many experienced experts 
  42
+at bioperl-l will be able look at the problem and quickly 
  43
+address it. Please include a thorough description of the problem 
  44
+with code and data examples if at all possible.
  45
+
  46
+=head2 Reporting Bugs
  47
+
  48
+Report bugs to the Bioperl bug tracking system to help us keep track
  49
+the bugs and their resolution.  Bug reports can be submitted via the web:
  50
+
  51
+ http://redmine.open-bio.org/projects/bioperl/
  52
+
  53
+=head1 AUTHOR -  Brian Osborne
  54
+
  55
+Email briano@bioteam.net
  56
+
  57
+=head1 APPENDIX
  58
+
  59
+The rest of the documentation details each of the object
  60
+methods. Internal methods are usually preceded with a _
  61
+
  62
+=cut
  63
+
  64
+package Bio::Tools::Run::Phylo::FastTree;
  65
+
  66
+use strict;
  67
+use Bio::Seq;
  68
+use Bio::SeqIO;
  69
+use Bio::TreeIO;
  70
+use Bio::AlignIO;
  71
+use Bio::Root::IO;
  72
+
  73
+use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
  74
+
  75
+our %DEFAULTS = ( 'AFORMAT' => 'phylip' );
  76
+our @FastTree_PARAMS = qw(log cat n intree intree1 constraints sprlength topm close 
  77
+    refresh constraintWeight);
  78
+our @FastTree_SWITCHES = qw(quiet nopr nt fastest slow nosupport pseudo gtr wag quote noml 
  79
+    nome mllen gamma spr nni sprlength mlnni mllen slownni nocat notoo 2nd no2nd nj bionj
  80
+);
  81
+our $PROGRAM_NAME = 'FastTree';
  82
+#our $PROGRAM_DIR = Bio::Root::IO->catfile($ENV{FastTreeDIR}) if $ENV{FastTreeDIR};
  83
+
  84
+=head2 new
  85
+
  86
+ Title   : new
  87
+ Usage   : my $treebuilder = Bio::Tools::Run::Phylo::FastTree->new();
  88
+ Function: Constructor
  89
+ Returns : Bio::Tools::Run::Phylo::FastTree
  90
+ Args    : -outfile_name => $outname
  91
+
  92
+=cut
  93
+
  94
+sub new {
  95
+    my ( $class, @args ) = @_;
  96
+    my $self = $class->SUPER::new(@args);
  97
+
  98
+    $self->aformat( $DEFAULTS{'AFORMAT'} );
  99
+
  100
+    $self->_set_from_args(
  101
+        \@args,
  102
+        -methods => [ @FastTree_PARAMS, @FastTree_SWITCHES ],
  103
+        -create  => 1
  104
+    );
  105
+
  106
+    my ($out) = $self->SUPER::_rearrange( [qw(OUTFILE_NAME)], @args );
  107
+
  108
+    $self->outfile_name( $out || '' );
  109
+
  110
+    $self;
  111
+}
  112
+
  113
+=head2 program_name
  114
+
  115
+ Title   : program_name
  116
+ Usage   : $factory->program_name()
  117
+ Function: holds the program name
  118
+ Returns:  string
  119
+ Args    : None
  120
+
  121
+=cut
  122
+
  123
+sub program_name {
  124
+    $PROGRAM_NAME;
  125
+}
  126
+
  127
+=head2 program_dir
  128
+
  129
+ Title   : program_dir
  130
+ Usage   : $factory->program_dir(@params)
  131
+ Function: returns the program directory
  132
+ Returns:  string
  133
+ Args    :
  134
+
  135
+=cut
  136
+
  137
+sub program_dir {
  138
+    undef;
  139
+}
  140
+
  141
+=head2 error_string
  142
+
  143
+ Title   : error_string
  144
+ Usage   : $obj->error_string($newval)
  145
+ Function: Where the output from the last analysus run is stored.
  146
+ Returns : value of error_string
  147
+ Args    : newvalue (optional)
  148
+
  149
+=cut
  150
+
  151
+sub error_string {
  152
+    my ( $self, $value ) = @_;
  153
+    
  154
+    $self->{'error_string'} = $value if ( defined $value );
  155
+    $self->{'error_string'};
  156
+}
  157
+
  158
+=head2  version
  159
+
  160
+ Title   : version
  161
+ Usage   : exit if $prog->version() < 1.8
  162
+ Function: Determine the version number of the program
  163
+ Example :
  164
+ Returns : float or undef
  165
+ Args    : none
  166
+
  167
+=cut
  168
+
  169
+sub version {
  170
+    my ($self) = @_;
  171
+    my $exe;
  172
+
  173
+    return undef unless $exe = $self->executable;
  174
+    my $string = `$exe 2>&1`;
  175
+
  176
+    $string =~ /FastTree\s+version\s+([\d\.]+)/;
  177
+    return $1 || undef;
  178
+}
  179
+
  180
+=head2 run
  181
+
  182
+ Title   : run
  183
+ Usage   : $factory->run($stockholm_file) OR
  184
+           $factory->run($align_object)
  185
+ Function: Runs FastTree to generate a tree 
  186
+ Returns : Bio::Tree::Tree object
  187
+ Args    : File name for your input alignment in stockholm format, OR
  188
+           Bio::Align::AlignI compliant object (eg. Bio::SimpleAlign).
  189
+
  190
+=cut
  191
+
  192
+sub run {
  193
+    my ($self, $in) = @_;
  194
+
  195
+    if (ref $in && $in->isa("Bio::Align::AlignI")) {
  196
+        $in = $self->_write_alignfile($in);
  197
+    }
  198
+    elsif (! -e $in) {
  199
+        $self->throw("When not supplying a Bio::Align::AlignI object, you must supply a readable filename");
  200
+    }
  201
+    
  202
+    $self->_run($in); 
  203
+}
  204
+
  205
+=head2 _run
  206
+
  207
+ Title   : _run
  208
+ Usage   : Internal function, not to be called directly
  209
+ Function: Runs the application
  210
+ Returns : Tree object
  211
+ Args    : Alignment file name
  212
+
  213
+=cut
  214
+
  215
+sub _run {
  216
+    my ( $self, $file ) = @_;
  217
+
  218
+    # If -nt is not set check the alphabet of the input
  219
+    $self->_alphabet($file) if ( ! $self->nt );
  220
+    $self->nt(1) if ( $self->_alphabet eq 'dna' );
  221
+
  222
+    my $exe       = $self->executable || return;
  223
+    my $param_str = $self->arguments . " " . $self->_setparams($file);
  224
+    my $command   = "$exe $param_str";
  225
+    $self->debug("FastTree command = $command");
  226
+
  227
+    my $status  = system($command);
  228
+    my $outfile = $self->outfile_name();
  229
+
  230
+    if ( !-e $outfile || -z $outfile ) {
  231
+        $self->warn("FastTree call had status of $status: $? [command $command]\n");
  232
+        return undef;
  233
+    }
  234
+
  235
+    my $treeio = Bio::TreeIO->new( -format => 'newick', -file => $outfile );
  236
+    my $tree = $treeio->next_tree;
  237
+
  238
+# if bootstraps were enabled, the bootstraps are the ids; convert to
  239
+# bootstrap and no id
  240
+# if ($self->boot) {
  241
+#     my @nodes = $tree->get_nodes;
  242
+#     my %non_internal = map { $_ => 1 } ($tree->get_leaf_nodes, $tree->get_root_node);
  243
+#     foreach my $node (@nodes) {
  244
+#         next if exists $non_internal{$node};
  245
+#         $node->bootstrap && next; # protect ourselves incase the parser improves
  246
+#         $node->bootstrap($node->id);
  247
+#         $node->id('');
  248
+#     }
  249
+# }
  250
+
  251
+    $tree;
  252
+}
  253
+
  254
+=head2 _write_alignfile
  255
+
  256
+ Title   : _write_alignfile
  257
+ Usage   : Internal function, not to be called directly
  258
+ Function: Create an alignment file
  259
+ Returns : filename
  260
+ Args    : Bio::Align::AlignI
  261
+
  262
+=cut
  263
+
  264
+sub _write_alignfile {
  265
+    my ( $self, $align ) = @_;
  266
+
  267
+    my ( $tfh, $tempfile ) = $self->io->tempfile( -dir => $self->tempdir );
  268
+
  269
+    my $out = Bio::AlignIO->new(
  270
+        -file   => ">$tempfile",
  271
+        -format => 'fasta'
  272
+    );
  273
+    $out->write_aln($align);
  274
+    $out->close();
  275
+    close($tfh);
  276
+
  277
+    $tempfile;
  278
+}
  279
+    
  280
+=head2 aformat
  281
+
  282
+ Title   : aformat
  283
+ Usage   : my $treeformat = $self->aformat();
  284
+ Function: Get/Set tree format
  285
+ Returns : string
  286
+ Args    : string
  287
+
  288
+=cut
  289
+
  290
+sub aformat {
  291
+    my $self = shift;
  292
+    $self->{'_aformat'} = shift if @_;
  293
+    $self->{'_aformat'};
  294
+}
  295
+
  296
+=head2 _alphabet
  297
+
  298
+ Title   : _alphabet
  299
+ Usage   : my $alphabet = $self->_alphabet;
  300
+ Function: Get the alphabet of the input
  301
+ Returns : 'dna' or 'protein'
  302
+ Args    : 
  303
+
  304
+=cut
  305
+
  306
+sub _alphabet {
  307
+    my ($self,$file) = @_;
  308
+
  309
+    return $self->{_alphabet} if $self->{_alphabet};
  310
+
  311
+    my $in = Bio::AlignIO->new(-file => $file);
  312
+    my $aln = $in->next_aln;
  313
+    # arbitrary, the first one
  314
+    my $seq = $aln->get_seq_by_pos(1);
  315
+    $self->{_alphabet} = $seq->alphabet;
  316
+
  317
+    $self->{'_alphabet'};
  318
+}
  319
+
  320
+=head2  _setparams
  321
+
  322
+ Title   :  _setparams
  323
+ Usage   :  Internal function, not to be called directly    
  324
+ Function:  Create parameter inputs for FastTree program
  325
+ Example :
  326
+ Returns : parameter string to be passed to FastTree
  327
+ Args    : name of calling object
  328
+
  329
+=cut
  330
+
  331
+sub _setparams {
  332
+    my ($self,$infile) = @_;
  333
+    my ( $attr, $value, $param_string );
  334
+    $param_string = '';
  335
+    my $laststr;
  336
+
  337
+    for $attr (@FastTree_PARAMS) {
  338
+        $value = $self->$attr();
  339
+        next unless ( defined $value );
  340
+        my $attr_key = lc $attr;
  341
+        $attr_key = ' -' . $attr_key;
  342
+        $param_string .= $attr_key . ' ' . $value;
  343
+    }
  344
+    for $attr (@FastTree_SWITCHES) {
  345
+        $value = $self->$attr();
  346
+        next unless ($value);
  347
+        my $attr_key = lc $attr;
  348
+        $attr_key = ' -' . $attr_key;
  349
+        $param_string .= $attr_key;
  350
+    }
  351
+
  352
+    # Set default output file if no explicit output file has been given
  353
+    if ( ! $self->outfile_name ) {
  354
+        my ( $tfh, $outfile ) = $self->io->tempfile( -dir => $self->tempdir() );
  355
+        close($tfh);
  356
+        undef $tfh;
  357
+        $self->outfile_name($outfile);
  358
+    }
  359
+    $param_string .= " $infile > " . $self->outfile_name;
  360
+
  361
+    $param_string .= ' 2> /dev/null' if ( $self->quiet() || $self->verbose < 0 );
  362
+
  363
+    $param_string;
  364
+}
  365
+
  366
+=head1 Bio::Tools::Run::BaseWrapper methods
  367
+
  368
+=cut
  369
+
  370
+=head2 no_param_checks
  371
+
  372
+ Title   : no_param_checks
  373
+ Usage   : $obj->no_param_checks($newval)
  374
+ Function: Boolean flag as to whether or not we should
  375
+           trust the sanity checks for parameter values  
  376
+ Returns : value of no_param_checks
  377
+ Args    : newvalue (optional)
  378
+
  379
+=cut
  380
+
  381
+=head2 save_tempfiles
  382
+
  383
+ Title   : save_tempfiles
  384
+ Usage   : $obj->save_tempfiles($newval)
  385
+ Function: 
  386
+ Returns : value of save_tempfiles
  387
+ Args    : newvalue (optional)
  388
+
  389
+=cut
  390
+
  391
+=head2 outfile_name
  392
+
  393
+ Title   : outfile_name
  394
+ Usage   : my $outfile = $FastTree->outfile_name();
  395
+ Function: Get/Set the name of the output file for this run
  396
+           (if you wanted to do something special)
  397
+ Returns : string
  398
+ Args    : [optional] string to set value to
  399
+
  400
+=cut
  401
+
  402
+=head2 tempdir
  403
+
  404
+ Title   : tempdir
  405
+ Usage   : my $tmpdir = $self->tempdir();
  406
+ Function: Retrieve a temporary directory name (which is created)
  407
+ Returns : string which is the name of the temporary directory
  408
+ Args    : none
  409
+
  410
+=cut
  411
+
  412
+=head2 cleanup
  413
+
  414
+ Title   : cleanup
  415
+ Usage   : $FastTree->cleanup();
  416
+ Function: Will cleanup the tempdir directory
  417
+ Returns : none
  418
+ Args    : none
  419
+
  420
+=cut
  421
+
  422
+=head2 io
  423
+
  424
+ Title   : io
  425
+ Usage   : $obj->io($newval)
  426
+ Function:  Gets a L<Bio::Root::IO> object
  427
+ Returns : L<Bio::Root::IO>
  428
+ Args    : none
  429
+
  430
+=cut
  431
+
  432
+1;    # Needed to keep compiler happy
  433
+
  434
+__END__
52  t/FastTree.t
... ...
@@ -0,0 +1,52 @@
  1
+# This is -*-Perl-*- code
  2
+## Bioperl Test Harness Script for Modules
  3
+# Before `./Build install' is performed this script should be runnable with
  4
+# `./Build test --test_files test.t'. After `./Build install' it should work as `perl test.t'
  5
+
  6
+use strict;
  7
+
  8
+BEGIN {
  9
+    use Bio::Root::Test;
  10
+    test_begin(
  11
+        -tests => 8,
  12
+    );
  13
+    use_ok('Bio::Root::IO');
  14
+    use_ok('Bio::Tools::Run::Phylo::FastTree');
  15
+    use_ok('Bio::AlignIO');
  16
+}
  17
+
  18
+ok my $ft = Bio::Tools::Run::Phylo::FastTree->new( -quiet => 1 );
  19
+
  20
+SKIP: {
  21
+    test_skip(
  22
+        -requires_executable => $ft,
  23
+        -tests               => 2
  24
+    );
  25
+
  26
+    # The input could be a SimpleAlign object
  27
+    my $alignio = Bio::AlignIO->new(
  28
+        -format => 'fasta',
  29
+        -file   => test_input_file('219877.cdna.fasta')
  30
+    );
  31
+    my $alnobj = $alignio->next_aln;
  32
+
  33
+    my $tree = $ft->run($alnobj);
  34
+    ok defined($tree);
  35
+    my @nodes = $tree->get_nodes;
  36
+    is($#nodes,3);
  37
+
  38
+    # The input could be an alignment file (fasta or phylip interleaved)
  39
+    my $alignfile = test_input_file("sample_dataset_1_aligned.fa");
  40
+    $tree = $ft->run($alignfile);
  41
+    ok defined($tree);
  42
+
  43
+    # Input is protein sequence alignment
  44
+    $alignio = Bio::AlignIO->new(
  45
+        -format => 'msf',
  46
+        -file   => test_input_file('cysprot.msf')
  47
+    );
  48
+    $alnobj = $alignio->next_aln;
  49
+
  50
+    my $ptree = $ft->run($alnobj);
  51
+    ok defined($ptree);
  52
+}
3,564  t/data/sample_dataset_1_aligned.fa
3564 additions, 0 deletions not shown

0 notes on commit 79d1e1c

Please sign in to comment.
Something went wrong with that request. Please try again.