Permalink
Browse files

some scripts for running protdist and neighbor, for easy use in the b…

…sub world

svn path=/bioperl-run/trunk/; revision=13397
  • Loading branch information...
1 parent 577f62a commit bf39c4bc174d7cdbd4e1f5a14a14def21c0ad456 @hyphaltip hyphaltip committed Mar 24, 2003
Showing with 144 additions and 0 deletions.
  1. +58 −0 scripts/run_neighbor.PLS
  2. +86 −0 scripts/run_protdist.PLS
View
@@ -0,0 +1,58 @@
+#!/usr/bin/perl -w
+use strict;
+
+=head1 NAME
+
+run_neighbor - run Phylip's 'neighbor' program through Bioperl
+
+=head1 SYNOPSIS
+
+run_neighbor [-i inputfile] [-o outfilename]
+
+=head1 DESCRIPTION
+
+Provide an matrix file to run neighbor on. File should be named of
+the forma file.matrix or file.protdist (the ending doesn't matter, but
+the program expects a file in the form of (\S+)\.(\S+).
+
+This will run the application 'neighbor' to build a
+Neighbor-Joining tree from a protein distance matrix.
+
+=head1 AUTHOR
+
+Jason Stajich, jason-AT-open-bio-DOT-org
+
+=cut
+
+use Bio::TreeIO;
+use Bio::Tools::Run::Phylo::Phylip::Neighbor;
+use Getopt::Long;
+
+my @params = ('type'=>'NJ', 'quiet' => 1);
+
+my $matrixfile;
+my $out;
+GetOptions(
+ 'i|input:s' => \$matrixfile,
+ 'o|out:s' => \$out,
+ 'h|help' => sub { exec('perldoc',$0); exit(0) }
+ );
+($matrixfile) ||= shift @ARGV;
+
+my ($stem,$ext) = ($matrixfile =~ /(\S+)\.(\S+)$/) ;
+$stem ||= $matrixfile;
+
+my $outfh;
+if( $out ) {
+ open($outfh, ">$out") || die($!);
+} else {
+ open($outfh, ">$stem.tree") || die($!);
+}
+
+my $tree_factory = Bio::Tools::Run::Phylo::Phylip::Neighbor->new(@params);
+my (@trees) = $tree_factory->create_tree($matrixfile);
+
+my $outtree = new Bio::TreeIO(-fh => $outfh);
+foreach my $tree ( @trees ){
+ $outtree->write_tree($tree);
+}
View
@@ -0,0 +1,86 @@
+#!/usr/bin/perl -w
+use strict;
+#!/usr/bin/perl -w
+use strict;
+
+=head1 NAME
+
+run_neighbor - run Phylip's 'protdist' program through Bioperl
+
+=head1 SYNOPSIS
+
+run_protdist [-i inputfile] [-o outfilename]
+
+=head1 DESCRIPTION
+
+Provide an alignment file to run protdist on. File should be named
+either .aln or .phy. This is required so that we can determine if we
+need to convert a clustalw alignment into phylip. You are welcome to
+extend the script to work on other MSA formats which bioperl supports.
+This is intended to be used in very simple manual pipelines.
+
+The input file should be named in the form of file.phy or file.aln
+the program expects a file in the form of (\S+)\.(\S+).
+
+This will run the application 'protdist' using the 'KIMURA' formula to
+build a a protein distance matrix. Those with phylip3.6 will want to
+make some changes if they want to use JTT. I'm happy to help add this
+in as a cmd-line argument if it is requested.
+
+=head1 AUTHOR
+
+Jason Stajich, jason-AT-open-bio-DOT-org
+
+=cut
+
+use Bio::AlignIO;
+use Bio::Tools::Run::Phylo::Phylip::ProtDist;
+use Getopt::Long;
+
+my @params = ( 'MODEL' => 'KIMURA',
+ 'quiet' => 1,
+ );
+
+my ($out,$file);
+
+GetOptions(
+ 'o|out:s' => \$out,
+ 'i|in:s' => \$file,
+ 'h|help' => sub { exec('perldoc',$0); exit(0) }
+ );
+
+($file) ||= shift @ARGV;
+
+my ($stem,$ext) = ($file =~ /(\S+)\.(\S+)$/);
+$stem ||= $file;
+
+my $outfh;
+if( $out ) {
+ open($outfh, ">$out") || die($!);
+} else {
+ open($outfh, ">$stem.matrix") || die($!);
+}
+
+if( $ext eq 'aln' ) {
+ my $inaln = new Bio::AlignIO(-file => $file,
+ -format => 'clustalw');
+ $file = "$stem.phy";
+
+ my $outreformat = new Bio::AlignIO(-file => ">$file",
+ -interleaved => 1,
+ -format => 'phylip');
+ while( my $aln = $inaln->next_aln ) {
+ $outreformat->write_aln($aln);
+ }
+ $outreformat->close();
+ $outreformat = undef;
+ $inaln = undef;
+}
+
+my $factory = new Bio::Tools::Run::Phylo::Phylip::ProtDist(@params);
+my (@matrix) = $factory->create_distance_matrix($file);
+
+foreach my $mat ( @matrix ) {
+ print $outfh $mat->print_matrix;
+}
+

0 comments on commit bf39c4b

Please sign in to comment.