Permalink
Browse files

examples/align/clustalw.pl: remove file, now part of Bio-Tools-Run-Al…

…ignment-Clustalw distribution
  • Loading branch information...
1 parent 930ccb8 commit 2783085753eda29e0e77f096acfa30ee1c862c0e @carandraug carandraug committed Sep 13, 2017
Showing with 0 additions and 252 deletions.
  1. +0 −252 examples/align/clustalw.pl
View
@@ -1,252 +0,0 @@
-#!/usr/bin/perl
-# PROGRAM : clustalw.pl
-# PURPOSE : Demonstrate possible uses of Bio::Tools::Run::Alignment::Clustalw.pm
-# AUTHOR : Peter Schattner schattner@alum.mit.edu
-# CREATED : Oct 06 2000
-#
-# INSTALLATION
-#
-# You will need to have installed clustalw and to ensure that Clustalw.pm can find it.
-# This can be done in different ways (bash syntax):
-# export PATH=$PATH:/home/peter/clustalw1.8
-# or
-# define an environmental variable CLUSTALDIR:
-# export CLUSTALDIR=/home/peter/clustalw1.8
-# or
-# include a definition of an environmental variable CLUSTALDIR in every
-# script that will use Clustal.pm.
-# BEGIN {$ENV{CLUSTALDIR} = '/home/peter/clustalw1.8/'; }
-#
-# We are going to demonstrate 3 possible applications of Clustalw.pm:
-# 1. Test effect of varying clustalw alignment parameter(s) on resulting alignment
-# 2. Test effect of changing the order that sequences are added to the alignment
-# on the resulting alignment
-# 3. Test effect of incorporating an "anchor point" in the alignment process
-#
-# Before we can do any tests, we need to set up the environment, create the factory
-# and read in the unaligned sequences.
-#
-
-#BEGIN {
-# $ENV{CLUSTALDIR} = '/home/peter/clustalw1.8/';
-#}
-
-use Getopt::Long;
-use Bio::Tools::Run::Alignment::Clustalw;
-use Bio::SimpleAlign;
-use Bio::AlignIO;
-use Bio::SeqIO;
-use strict;
-
-# set some default values
-my $infile = 't/data/cysprot1a.fa';
-my @params = ('quiet' => 1 );
-my $do_only = '123'; # string listing examples to be executed. Default is to
- # execute all tests (ie 1,2 and 3)
-my $param = 'ktuple'; # parameter to be varied in example 1
-my $startvalue = 1; # initial value for parameter $param
-my $stopvalue = 3; # final value for parameter $param
-my $regex = 'W[AT]F'; # regular expression for 'anchoring' alignment in example 3
-my $extension = 30; # distance regexp anchor should be extended in each direction
- # for local alignment in example 3
-my $helpflag = 0; # Flag to show usage info.
-
-# get user options
-my @argv = @ARGV; # copy ARGV before GetOptions() massacres it.
-
-&GetOptions("h!" => \$helpflag, "help!" => \$helpflag,
- "in=s" => \$infile,
- "param=s" => \$param,
- "do=s" => \$do_only,
- "start=i" => \$startvalue,
- "stop=i" => \$stopvalue,
- "ext=i" => \$extension,
- "regex=s" => \$regex,) ;
-
-if ($helpflag) { &clustalw_usage(); exit 0;}
-
-# create factory & set user-specified global clustalw parameters
-foreach my $argv (@argv) {
- unless ($argv =~ /^(.*)=>(.*)$/) { next;}
- push (@params, $1 => $2);
-}
-my $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params);
-
-
-# put unaligned sequences in a Bio::Seq array
-my $str = Bio::SeqIO->new(-file=> $infile, '-format' => 'Fasta');
-my ($paramvalue, $aln, $subaln, @consensus, $seq_num, $string, $strout, $id);
-my @seq_array =();
-while ( my $seq = $str->next_seq() ) { push (@seq_array, $seq) ;}
-
-# Do each example that has digit present in variable $do_only
-$_= $do_only;
-/1/ && &vary_params();
-/2/ && &vary_align_order();
-/3/ && &anchored_align();
-
-## End of "main"
-
-#################################################
-# vary_params(): Example demonstrating varying of clustalw parameter
-#
-
-sub vary_params {
-
- print "Beginning parameter-varying example... \n";
-
- # Now we'll create several alignments, 1 for each value of the selected
- # parameter. We also compute a simple consensus string for each alignment.
- # (In the default case, we vary the "ktuple" parameter, creating 3
- # alignments using ktuple values from 1 to 3.)
-
- my $index =0;
- for ($paramvalue = $startvalue; $paramvalue < ($stopvalue + 1); $paramvalue++) {
- $factory->$param($paramvalue); # set parameter value
- print "Performing alignment with $param = $paramvalue \n";
- $aln = $factory->align(\@seq_array);
- $string = $aln->consensus_string(); # Get consensus of alignment
- # convert '?' to 'X' at non-consensus positions
- $string =~ s/\?/X/g;
- $consensus[$index] = Bio::Seq->new(-id=>"$param=$paramvalue",-seq=>$string);
- $index++;
- }
- # Compare consensus strings for alignments with different $param values by
- # making an alignment of the different consensus strings
- # $factory->ktuple(1); # set ktuple parameter
- print "Performing alignment of $param consensus sequences \n";
- $aln = $factory->align(\@consensus);
- $strout = Bio::AlignIO->newFh('-format' => 'msf');
- print $strout $aln;
-
- return 1;
-}
-
-
-#################################################
-# vary_align_order():
-#
-# For our second example, we'll test the effect of changing the order
-# that sequences are added to the alignment
-
-sub vary_align_order {
-
- print "\nBeginning alignment-order-changing example... \n";
-
- @consensus = (); # clear array
- for ($seq_num = 0; $seq_num < scalar(@seq_array); $seq_num++) {
- my $obj_out = shift @seq_array; # remove one Seq object from array and save
- $id = $obj_out->display_id;
- # align remaining sequences
- print "Performing alignment with sequence $id left out \n";
- $subaln = $factory->align(\@seq_array);
- # add left-out sequence to subalignment
- $aln = $factory->profile_align($subaln,$obj_out);
- $string = $aln->consensus_string(); # Get consensus of alignment
- # convert '?' to 'X' for non-consensus positions
- $string =~ s/\?/X/g;
- $consensus[$seq_num] = Bio::Seq->new(-id=>"$id left out",-seq=>$string);
- push @seq_array, $obj_out; # return Seq object for next (sub) alignment
- }
-
- # Compare consensus strings for alignments created in different orders
- # $factory->ktuple(1); # set ktuple parameter
- print "\nPerforming alignment of consensus sequences for different reorderings \n";
- print "Each consensus is labeled by the sequence which was omitted in the initial alignment\n";
- $aln = $factory->align(\@consensus);
- $strout = Bio::AlignIO->newFh('-format' => 'msf');
- print $strout $aln;
-
- return 1;
-}
-
-#################################################
-# anchored_align()
-#
-# For our last example, we'll test a way to perform a local alignment by
-# "anchoring" the alignment to a regular expression. This is similar
-# to the approach taken in the recent dbclustal program.
-# In principle, we could write a script to search for a good regular expression
-# to use. Instead, here we'll simply choose one manually after looking at the
-# previous alignments.
-
-sub anchored_align {
-
- my @local_array = ();
- my @seqs_not_matched = ();
-
- print "\n Beginning anchored-alignment example... \n";
-
- for ($seq_num = 0; $seq_num < scalar(@seq_array); $seq_num++) {
- my $seqobj = $seq_array[$seq_num];
- my $seq = $seqobj->seq();
- my $id = $seqobj->id();
- # if $regex is not found in the sequence, save sequence id name and set
- # array value =0 for later
- unless ($seq =~/$regex/) {
- $local_array[$seq_num] = 0;
- push (@seqs_not_matched, $id) ;
- next;
- }
- # find positions of start and of subsequence to be aligned
- my $match_start_pos = length($`);
- my $match_stop_pos = length($`) + length($&);
- my $start = ($match_start_pos - $extension) > 1 ?
- ($match_start_pos - $extension) +1 : 1;
- my $stop = ($match_stop_pos + $extension) < length($seq) ?
- ($match_stop_pos + $extension) : length($seq);
- my $string = $seqobj->subseq($start, $stop);
-
- $local_array[$seq_num] = Bio::Seq->new(-id=>$id, -seq=>$string);
- }
- @local_array = grep $_ , @local_array; # remove array entries with no match
-
- # Perform alignment on the local segments of the sequences which match "anchor"
- $aln = $factory->align(\@local_array);
- my $consensus = $aln->consensus_string(); # Get consensus of local alignment
-
- if (scalar(@seqs_not_matched) ) {
- print " Sequences not matching $regex : @seqs_not_matched \n"
- } else {
- print " All sequences match $regex : @seqs_not_matched \n"
- }
- print "Consensus sequence of local alignment: $consensus \n";
-
- return 1;
-}
-
-#----------------
-sub clustalw_usage {
-#----------------
-
-#-----------------------
-# Prints usage information for general parameters.
-
- print STDERR <<"QQ_PARAMS_QQ";
-
- Command-line accessible script variables and commands:
- -------------------------------
- -h : Display this usage info and exit.
- -in <str> : File containing input sequences in fasta format (default = $infile) .
- -do <str> : String listing examples to be executed. Default is to execute
- all tests (ie default = '123')
- -param <str> : Parameter to be varied in example 1. Any clustalw parameter
- which takes inteer values can be varied (default = 'ktuple')
- -start <int> : Initial value for varying parameter in example 1 (default = 1)
- -stop <int> : Final value for varying parameter (default = 3)
- -regex <str> : Regular expression for 'anchoring' alignment in example 3
- (default = $regex)
- -ext <int> : Distance regexp anchor should be extended in each direction
- for local alignment in example 3 (default = 30)
-
-In addition, any valid Clustalw parameter can be set using the syntax
-"parameter=>value" as in "ktuple=>3"
-
-So a typical command lines might be:
- > clustalw.pl -param=pairgap -start=2 -stop=3 -do=1 "ktuple=>3"
-or
- > clustalw.pl -ext=10 -regex='W[AST]F' -do=23 -in='t/cysprot1a.fa'
-
-QQ_PARAMS_QQ
-
-}

0 comments on commit 2783085

Please sign in to comment.