Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

This commit was manufactured by cvs2svn to create tag 'release-1_01'.

svn path=/bioperl-live/tags/release-1_01/; revision=3782
  • Loading branch information...
commit 6765e43069b702973f976043890e72033fdb14cf 1 parent 28cd266
nobody authored
Showing with 0 additions and 38,400 deletions.
  1. +0 −59 AUTHORS
  2. +0 −69 BUGS
  3. +0 −823 Bio/Align/AlignI.pm
  4. +0 −110 Bio/Align/StatisticsI.pm
  5. +0 −472 Bio/AlignIO.pm
  6. +0 −176 Bio/AlignIO/bl2seq.pm
  7. +0 −256 Bio/AlignIO/clustalw.pm
  8. +0 −239 Bio/AlignIO/emboss.pm
  9. +0 −190 Bio/AlignIO/fasta.pm
  10. +0 −155 Bio/AlignIO/mase.pm
  11. +0 −230 Bio/AlignIO/mega.pm
  12. +0 −215 Bio/AlignIO/meme.pm
  13. +0 −241 Bio/AlignIO/msf.pm
  14. +0 −324 Bio/AlignIO/nexus.pm
  15. +0 −150 Bio/AlignIO/pfam.pm
  16. +0 −300 Bio/AlignIO/phylip.pm
  17. +0 −137 Bio/AlignIO/prodom.pm
  18. +0 −163 Bio/AlignIO/psi.pm
  19. +0 −168 Bio/AlignIO/selex.pm
  20. +0 −172 Bio/AlignIO/stockholm.pm
  21. +0 −194 Bio/AnalysisParserI.pm
  22. +0 −229 Bio/AnalysisResultI.pm
  23. +0 −454 Bio/Annotation.pm
  24. +0 −466 Bio/Annotation/Collection.pm
  25. +0 −149 Bio/Annotation/Comment.pm
  26. +0 −230 Bio/Annotation/DBLink.pm
  27. +0 −447 Bio/Annotation/Reference.pm
  28. +0 −162 Bio/Annotation/SimpleValue.pm
  29. +0 −172 Bio/Annotation/TypeManager.pm
  30. +0 −182 Bio/AnnotationCollectionI.pm
  31. +0 −158 Bio/AnnotationI.pm
  32. +0 −372 Bio/Biblio.pm
  33. +0 −137 Bio/Biblio/Article.pm
  34. +0 −199 Bio/Biblio/BiblioBase.pm
  35. +0 −143 Bio/Biblio/Book.pm
  36. +0 −132 Bio/Biblio/BookArticle.pm
  37. +0 −373 Bio/Biblio/IO.pm
  38. +0 −542 Bio/Biblio/IO/medline2ref.pm
  39. +0 −744 Bio/Biblio/IO/medlinexml.pm
  40. +0 −144 Bio/Biblio/IO/pubmed2ref.pm
  41. +0 −309 Bio/Biblio/IO/pubmedxml.pm
  42. +0 −131 Bio/Biblio/Journal.pm
  43. +0 −138 Bio/Biblio/JournalArticle.pm
  44. +0 −205 Bio/Biblio/MedlineArticle.pm
  45. +0 −131 Bio/Biblio/MedlineBook.pm
  46. +0 −143 Bio/Biblio/MedlineBookArticle.pm
  47. +0 −137 Bio/Biblio/MedlineJournal.pm
  48. +0 −144 Bio/Biblio/MedlineJournalArticle.pm
  49. +0 −129 Bio/Biblio/Organisation.pm
  50. +0 −135 Bio/Biblio/Patent.pm
  51. +0 −146 Bio/Biblio/Person.pm
  52. +0 −133 Bio/Biblio/Proceeding.pm
  53. +0 −126 Bio/Biblio/Provider.pm
  54. +0 −149 Bio/Biblio/PubmedArticle.pm
  55. +0 −138 Bio/Biblio/PubmedBookArticle.pm
  56. +0 −151 Bio/Biblio/PubmedJournalArticle.pm
  57. +0 −251 Bio/Biblio/Ref.pm
  58. +0 −128 Bio/Biblio/Service.pm
  59. +0 −128 Bio/Biblio/TechReport.pm
  60. +0 −127 Bio/Biblio/Thesis.pm
  61. +0 −135 Bio/Biblio/WebResource.pm
  62. +0 −209 Bio/DB/Ace.pm
  63. +0 −310 Bio/DB/Biblio/biofetch.pm
  64. +0 −530 Bio/DB/Biblio/soap.pm
  65. +0 −493 Bio/DB/BiblioI.pm
  66. +0 −487 Bio/DB/BioFetch.pm
  67. +0 −354 Bio/DB/DBFetch.pm
  68. +0 −197 Bio/DB/EMBL.pm
  69. +0 −162 Bio/DB/Failover.pm
  70. +0 −1,069 Bio/DB/Fasta.pm
  71. +0 −321 Bio/DB/FileCache.pm
  72. +0 −526 Bio/DB/Flat.pm
  73. +0 −453 Bio/DB/Flat/BDB.pm
  74. +0 −42 Bio/DB/Flat/BDB/embl.pm
  75. +0 −31 Bio/DB/Flat/BDB/fasta.pm
  76. +0 −1,682 Bio/DB/Flat/OBDAIndex.pm
  77. +0 −315 Bio/DB/GDB.pm
  78. +0 −2,930 Bio/DB/GFF.pm
  79. +0 −1,464 Bio/DB/GFF/Adaptor/dbi.pm
  80. +0 −268 Bio/DB/GFF/Adaptor/dbi/caching_handle.pm
  81. +0 −38 Bio/DB/GFF/Adaptor/dbi/iterator.pm
  82. +0 −1,404 Bio/DB/GFF/Adaptor/dbi/mysql.pm
  83. +0 −442 Bio/DB/GFF/Adaptor/dbi/mysqlopt.pm
  84. +0 −583 Bio/DB/GFF/Adaptor/memory.pm
  85. +0 −51 Bio/DB/GFF/Adaptor/memory_iterator.pm
  86. +0 −580 Bio/DB/GFF/Aggregator.pm
  87. +0 −126 Bio/DB/GFF/Aggregator/alignment.pm
  88. +0 −158 Bio/DB/GFF/Aggregator/clone.pm
  89. +0 −46 Bio/DB/GFF/Aggregator/none.pm
  90. +0 −113 Bio/DB/GFF/Aggregator/transcript.pm
  91. +0 −105 Bio/DB/GFF/Aggregator/wormbase_gene.pm
  92. +0 −155 Bio/DB/GFF/Featname.pm
  93. +0 −982 Bio/DB/GFF/Feature.pm
  94. +0 −92 Bio/DB/GFF/Homol.pm
  95. +0 −1,041 Bio/DB/GFF/RelSegment.pm
  96. +0 −612 Bio/DB/GFF/Segment.pm
  97. +0 −185 Bio/DB/GFF/Typename.pm
  98. +0 −37 Bio/DB/GFF/Util/Binning.pm
  99. +0 −52 Bio/DB/GFF/Util/Rearrange.pm
  100. +0 −266 Bio/DB/GenBank.pm
  101. +0 −240 Bio/DB/GenPept.pm
  102. +0 −258 Bio/DB/InMemoryCache.pm
  103. +0 −19 Bio/DB/MANIFEST
  104. +0 −13 Bio/DB/Makefile.PL
  105. +0 −501 Bio/DB/NCBIHelper.pm
  106. +0 −125 Bio/DB/RandomAccessI.pm
  107. +0 −141 Bio/DB/RefSeq.pm
  108. +0 −211 Bio/DB/Registry.pm
  109. +0 −179 Bio/DB/SeqI.pm
  110. +0 −447 Bio/DB/SwissProt.pm
  111. +0 −247 Bio/DB/Universal.pm
  112. +0 −231 Bio/DB/UpdateableSeqI.pm
  113. +0 −541 Bio/DB/WebDBSeqI.pm
  114. +0 −184 Bio/DB/XEMBL.pm
  115. +0 −107 Bio/DB/XEMBLService.pm
  116. +0 −106 Bio/DBLinkContainerI.pm
  117. +0 −254 Bio/DasI.pm
  118. +0 −114 Bio/DasSegmentI.pm
  119. +0 −89 Bio/Event/EventGeneratorI.pm
  120. +0 −215 Bio/Event/EventHandlerI.pm
  121. +0 −85 Bio/Factory/ApplicationFactoryI.pm
  122. +0 −253 Bio/Factory/BlastHitFactory.pm
  123. +0 −113 Bio/Factory/BlastResultFactory.pm
  124. +0 −186 Bio/Factory/DriverFactory.pm
  125. +0 −328 Bio/Factory/EMBOSS.pm
  126. +0 −93 Bio/Factory/HitFactoryI.pm
  127. +0 −111 Bio/Factory/MapFactoryI.pm
  128. +0 −93 Bio/Factory/ResultFactoryI.pm
  129. +0 −167 Bio/Factory/SeqAnalysisParserFactory.pm
  130. +0 −113 Bio/Factory/SeqAnalysisParserFactoryI.pm
  131. +0 −115 Bio/Factory/TreeFactoryI.pm
  132. +0 −118 Bio/Graphics.pm
Sorry, we could not display the entire diff because too many files (810) changed.
View
59 AUTHORS
@@ -1,59 +0,0 @@
-# -*- text -*-
-
-PRIMARY AUTHORS AND MAJOR CONTRIBUTORS TO BIOPERL
-
-Releases co-ordinated and submitted by bioperl core devs.
-
-* Ewan Birney <birney at ebi.ac.uk>
-* Chris Dagdigian <dag at sonsorol.org>
-* Hilmar Lapp <hilmarl at yahoo.com>
-* Heikki Lehvaslaiho <heikki at ebi.ac.uk>
-* Jason Stajich <jason at bioperl.org>
-* Lincoln Stein <stein at cshl.org>
-
-Major Contributors
-(Feel free to add descriptions of which modules you are responsible
-for if you see fit)
-
-Kris Boulez <Kris.Boulez at pandora.be>
-David Block <dblock at gnf.org>
-Brad Chapman <chapmanb at arches.uga.edu>
-Steve Chervitz <sac at bioperl.org>
-Michele Clamp <michele at sanger.ac.uk>
-Tony Cox <avc at sanger.ac.uk>
-Jame Cuff <james at sanger.ac.uk>
-Andrew Dalke <dalke at acm.org>
-Allen Day <allenday at ucla.edu>
-James Diggans <JDiggans at genelogic.com>
-Arne Elofsson <arne at sbc.su.se>
-Mark Fiers <M.W.E.J.Fiers at plant.wag-ur.nl>
-Georg Fuellen <fullen at bioperl.org>
-James Gilbert <jgrg at sanger.ac.uk>
-Ed Green <ed at compbio.berkeley.edu>
-Roger Hall <roger at iosea.com>
-Ian Holmes <ihn at fruitfly.org>
-Joseph Insana <insana at ebi.ac.uk> - Bio::LiveSeq
-Nicolas Joly <njoly at pasteur.fr>
-Ian Korf <ikorf at sapiens.wustl.edu>
-Arek Kasprzyk <arek at ebi.ac.uk>
-Aaron Mackey <amackey at virginia.edu>
-Brad Marshall <bradmars at yahoo.com>
-Chad Matsalla <bioinformatics1 at dieselwurks.com>
-Chris Mungall <cjm at fruitfly.bdgp.berkeley.edu>
-Brian Osborne <b_i_osborne at hotmail.com>
-Matthew Pockock <mrp at sanger.ac.uk>
-Lorenz Pollack <lorenz at ist.org> -- BPlite porting
-Todd Richmond <todd at andrew2.stanford.edu>
-Peter Schattner <schattner at alum.mit.edu>
-Martin Senger <senger at ebi.ac.uk> -- Biblio
-Arne Stabenau <stabenau at ebi.ac.uk>
-Elia Stupka <elia at ebi.ac.uk>
-Gert Thijs <gert.thijs at esat.kuleuven.ac.be>
-Charles Tilford <tilfordc at bms.com>
-Paul-Christophe Varoutas
-Andrew G. Walsh <paeruginosa at hotmail.com>
-Kai Wang <tumorimmunology at yahoo.com>
-Mark Wilkinson <mwilkinson at gene.pbi.nrc.ca>
-Helge Weissig <helgew at sdsc.edu>
-Alex Zelensky <alex_zelensky at mac.com> - Bioperl-DB
-
View
69 BUGS
@@ -1,69 +0,0 @@
-# $Id: BUGS,v 1.3 2001-11-19 17:34:08 jason Exp $
-
-Known Bugs
-
-Bioperl 0.9.0
-=============
- * Bio::Tools::Blast continues to cause problems for some people. As
- it is not actively maintained there are a slew of reported bugs for
- it that have not been fixed.
-
- * Bio::Tools::Run::Alignment::TCoffee - t_coffee binary does not get
- all parameters it needs when aligning (two) two DNA sequences
- (jitterbug #966).
-
- * Bio::Tools::Run::ClustalW and t/ClustalW will report errors for
- clustalw versions 1.8x due to a bug in clustalw.
-
- * Bio::DB::GenBank continues to have intermittent errors. Bio::DB::GDB
- is also unreliable at times and one can safely ignore errors from
- these during a make test.
- Bio::DB::GenBank is unable to download whole contig files as well
- as NCBI ref seqs like NT_* numbers unless the -format flag is
- passed in and specified as 'fasta' in the constructor.
- get_Stream_by_batch() also has intermittent errors which are being
- tracked down.
-
-Bioperl 0.7.2
-=============
-
- * NCBI has changed some of the cgi scripts for retrieving sequences
- online which as resulted in some of the DB methods from not working
- consistently. We are addressing these in the 0.9.x and 1.0 series
- of releases. We recommend using the Bio::DB::EMBL object that is
- part of the later releases.
-
- Additionally RefSeq Contigs are not properly downloaded, please see
- the bioperl list archives for information about potential
- workarounds and ongoing development effort to address these.
-
-Bioperl 0.7.1
-=============
- * Bio::Tools::BPlite does not parse and set frame properly for
- tblastx reports (Jitterbug bug # 978).
-
- * Bio::Tools::BPlite interface needs to be updated to fix parsing
- more than bl2seq report report (Jitterbug bug #940), this has been
- fixed on the main code trunk and will be part of the next major
- bioperl release.
-
- * If File::Temp is not installed, tempdirs are not cleaned up
- properly. This is fixed on main code trunk with the introduction
- of rmtree method in Bio::Root::IO, however, it is best to install
- File::Temp when running 0.7 branch code.
-
- * Bio::Tools::Blast does not allow users to run blast, instead use
- Bio::Tools::Run::StandAloneBlast to run local blasts. To submit
- jobs to a remote blast server like NCBI a module
- Bio::Tools::Run::RemoteBlast has been written but is part of the
- main trunk code and must be obtained through CVS until the next
- major bioperl release.
-
-Bioperl 0.7
-===========
- * Bio::Tools::BPlite doc error lists
- code synopsis code as
- my $parser = new BPlite(\*FH);
- should be
- my $parser = new Bio::Tools::BPlite(\*FH);
-
View
823 Bio/Align/AlignI.pm
@@ -1,823 +0,0 @@
-# $Id$
-#
-# BioPerl module for Bio::Align::AlignI
-#
-# Cared for by Jason Stajich <jason@bioperl.org>
-#
-# Copyright Jason Stajich
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::Align::AlignI - An interface for describing sequence alignments.
-
-=head1 SYNOPSIS
-
- # get a Bio::Align::AlignI somehow - typically using Bio::AlignIO system
- # some descriptors
- print $aln->length, "\n";
- print $aln->no_residues, "\n";
- print $aln->is_flush, "\n";
- print $aln->no_sequences, "\n";
- print $aln->percentage_identity, "\n";
- print $aln->consensus_string(50), "\n";
-
- # find the position in the alignment for a sequence location
- $pos = $aln->column_from_residue_number('1433_LYCES', 14); # = 6;
-
- # extract sequences and check values for the alignment column $pos
- foreach $seq ($aln->each_seq) {
- $res = $seq->subseq($pos, $pos);
- $count{$res}++;
- }
- foreach $res (keys %count) {
- printf "Res: %s Count: %2d\n", $res, $count{$res};
- }
-
-=head1 DESCRIPTION
-
-This interface describes the basis for alignment objects.
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to
-the Bioperl mailing list. Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/MailList.shtml - About the mailing lists
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-of the bugs and their resolution. Bug reports can be submitted via
-email or the web:
-
- bioperl-bugs@bioperl.org
- http://bioperl.org/bioperl-bugs/
-
-=head1 AUTHOR - Jason Stajich
-
-Email jason@bioperl.org
-
-=head1 CONTRIBUTORS
-
-Ewan Birney, birney@ebi.ac.uk
-Heikki Lehvaslaiho, heikki@ebi.ac.uk
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object methods.
-Internal methods are usually preceded with a _
-
-=cut
-
-
-# Let the code begin...
-
-
-package Bio::Align::AlignI;
-use vars qw(@ISA);
-use strict;
-
-use Bio::Root::RootI;
-
-@ISA = qw(Bio::Root::RootI);
-
-=head1 Modifier methods
-
-These methods modify the MSE by adding, removing or shuffling complete
-sequences.
-
-=head2 add_seq
-
- Title : add_seq
- Usage : $myalign->add_seq($newseq);
- Function : Adds another sequence to the alignment. *Does not* align
- it - just adds it to the hashes.
- Returns : nothing
- Argument : a Bio::LocatableSeq object
- order (optional)
-
-See L<Bio::LocatableSeq> for more information.
-
-=cut
-
-sub add_seq {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 remove_seq
-
- Title : remove_seq
- Usage : $aln->remove_seq($seq);
- Function : Removes a single sequence from an alignment
- Returns :
- Argument : a Bio::LocatableSeq object
-
-=cut
-
-sub remove_seq {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 purge
-
- Title : purge
- Usage : $aln->purge(0.7);
- Function:
-
- Removes sequences above whatever %id.
-
- This function will grind on large alignments. Beware!
- (perhaps not ideally implemented)
-
- Example :
- Returns : An array of the removed sequences
- Argument:
-
-
-=cut
-
-sub purge {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 sort_alphabetically
-
- Title : sort_alphabetically
- Usage : $ali->sort_alphabetically
- Function :
-
- Changes the order of the alignemnt to alphabetical on name
- followed by numerical by number.
-
- Returns :
- Argument :
-
-=cut
-
-sub sort_alphabetically {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head1 Sequence selection methods
-
-Methods returning one or more sequences objects.
-
-=head2 each_seq
-
- Title : each_seq
- Usage : foreach $seq ( $align->each_seq() )
- Function : Gets an array of Seq objects from the alignment
- Returns : an array
- Argument :
-
-=cut
-
-sub each_seq {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 each_alphabetically
-
- Title : each_alphabetically
- Usage : foreach $seq ( $ali->each_alphabetically() )
- Function :
-
- Returns an array of sequence object sorted alphabetically
- by name and then by start point.
- Does not change the order of the alignment
-
- Returns :
- Argument :
-
-=cut
-
-sub each_alphabetically {
- my($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 each_seq_with_id
-
- Title : each_seq_with_id
- Usage : foreach $seq ( $align->each_seq_with_id() )
- Function :
-
- Gets an array of Seq objects from the
- alignment, the contents being those sequences
- with the given name (there may be more than one)
-
- Returns : an array
- Argument : a seq name
-
-=cut
-
-sub each_seq_with_id {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 get_seq_by_pos
-
- Title : get_seq_by_pos
- Usage : $seq = $aln->get_seq_by_pos(3) # third sequence from the alignment
- Function :
-
- Gets a sequence based on its position in the alignment.
- Numbering starts from 1. Sequence positions larger than
- no_sequences() will thow an error.
-
- Returns : a Bio::LocatableSeq object
- Argument : positive integer for the sequence osition
-
-=cut
-
-sub get_seq_by_pos {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head1 Create new alignments
-
-The result of these methods are horizontal or vertical subsets of the
-current MSE.
-
-=head2 select
-
- Title : select
- Usage : $aln2 = $aln->select(1, 3) # three first sequences
- Function :
-
- Creates a new alignment from a continuous subset of
- sequences. Numbering starts from 1. Sequence positions
- larger than no_sequences() will thow an error.
-
- Returns : a Bio::SimpleAlign object
- Argument : positive integer for the first sequence
- positive integer for the last sequence to include (optional)
-
-=cut
-
-sub select {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-
-=head2 select_noncont
-
- Title : select_noncont
- Usage : $aln2 = $aln->select_noncont(1, 3) # first and 3rd sequences
- Function :
-
- Creates a new alignment from a subset of
- sequences. Numbering starts from 1. Sequence positions
- larger than no_sequences() will thow an error.
-
- Returns : a Bio::SimpleAlign object
- Args : array of integers for the sequences
-
-=cut
-
-sub select_noncont {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 slice
-
- Title : slice
- Usage : $aln2 = $aln->slice(20, 30)
- Function :
-
- Creates a slice from the alignment inclusive of start and
- end columns. Sequences with no residues in the slice are
- excluded from the new alignment and a warning is printed.
- Slice beyond the length of the sequence does not do
- padding.
-
- Returns : a Bio::SimpleAlign object
- Argument : positive integer for start column
- positive integer for end column
-
-=cut
-
-sub slice {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head1 Change sequences within the MSE
-
-These methods affect characters in all sequences without changeing the
-alignment.
-
-
-=head2 map_chars
-
- Title : map_chars
- Usage : $ali->map_chars('\.','-')
- Function :
-
- Does a s/$arg1/$arg2/ on the sequences. Useful for gap
- characters
-
- Notice that the from (arg1) is interpretted as a regex,
- so be careful about quoting meta characters (eg
- $ali->map_chars('.','-') wont do what you want)
-
- Returns :
- Argument : 'from' rexexp
- 'to' string
-
-=cut
-
-sub map_chars {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 uppercase
-
- Title : uppercase()
- Usage : $ali->uppercase()
- Function : Sets all the sequences to uppercase
- Returns :
- Argument :
-
-=cut
-
-sub uppercase {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 match_line
-
- Title : match_line()
- Usage : $align->match_line()
- Function : Generates a match line - much like consensus string
- except that a line indicating the '*' for a match.
- Argument : (optional) Match line characters ('*' by default)
- (optional) Strong match char (':' by default)
- (optional) Weak match char ('.' by default)
-
-=cut
-
-sub match_line {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 match
-
- Title : match()
- Usage : $ali->match()
- Function :
-
- Goes through all columns and changes residues that are
- identical to residue in first sequence to match '.'
- character. Sets L<match_char>.
-
- USE WITH CARE: Most MSE formats do not support match
- characters in sequences, so this is mostly for output
- only. NEXUS format (L<Bio::AlignIO::nexus>) can handle
- it.
-
- Returns : 1
- Argument : a match character, optional, defaults to '.'
-
-=cut
-
-sub match {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 unmatch
-
- Title : unmatch()
- Usage : $ali->unmatch()
- Function :
-
- Undoes the effect of method L<match>. Unsets L<match_char>.
-
- Returns : 1
- Argument : a match character, optional, defaults to '.'
-
-=cut
-
-sub unmatch {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-
-=head1 MSE attibutes
-
-Methods for setting and reading the MSE attributes.
-
-Note that the methods defining character semantics depend on the user
-to set them sensibly. They are needed only by certain input/output
-methods. Unset them by setting to an empty string ('').
-
-=head2 id
-
- Title : id
- Usage : $myalign->id("Ig")
- Function : Gets/sets the id field of the alignment
- Returns : An id string
- Argument : An id string (optional)
-
-=cut
-
-sub id {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 missing_char
-
- Title : missing_char
- Usage : $myalign->missing_char("?")
- Function : Gets/sets the missing_char attribute of the alignment
- It is generally recommended to set it to 'n' or 'N'
- for nucleotides and to 'X' for protein.
- Returns : An missing_char string,
- Argument : An missing_char string (optional)
-
-=cut
-
-sub missing_char {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 match_char
-
- Title : match_char
- Usage : $myalign->match_char('.')
- Function : Gets/sets the match_char attribute of the alignment
- Returns : An match_char string,
- Argument : An match_char string (optional)
-
-=cut
-
-sub match_char {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 gap_char
-
- Title : gap_char
- Usage : $myalign->gap_char('-')
- Function : Gets/sets the gap_char attribute of the alignment
- Returns : An gap_char string, defaults to '-'
- Argument : An gap_char string (optional)
-
-=cut
-
-sub gap_char {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 symbol_chars
-
- Title : symbol_chars
- Usage : my @symbolchars = $aln->symbol_chars;
- Function: Returns all the seen symbols (other than gaps)
- Returns : array of characters that are the seen symbols
- Argument: boolean to include the gap/missing/match characters
-
-=cut
-
-sub symbol_chars{
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head1 Alignment descriptors
-
-These read only methods describe the MSE in various ways.
-
-
-=head2 consensus_string
-
- Title : consensus_string
- Usage : $str = $ali->consensus_string($threshold_percent)
- Function : Makes a strict consensus
- Returns :
- Argument : Optional treshold ranging from 0 to 100.
- The consensus residue has to appear at least threshold %
- of the sequences at a given location, otherwise a '?'
- character will be placed at that location.
- (Default value = 0%)
-
-=cut
-
-sub consensus_string {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 consensus_iupac
-
- Title : consensus_iupac
- Usage : $str = $ali->consensus_iupac()
- Function :
-
- Makes a consensus using IUPAC ambiguity codes from DNA
- and RNA. The output is in upper case except when gaps in
- a column force output to be in lower case.
-
- Note that if your alignment sequences contain a lot of
- IUPAC ambiquity codes you often have to manually set
- alphabet. L<Bio::PrimarySeq::_guess_type> thinks they
- indicate a protein sequence.
-
- Returns : consensus string
- Argument : none
- Throws : on protein sequences
-
-=cut
-
-sub consensus_iupac {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 is_flush
-
- Title : is_flush
- Usage : if( $ali->is_flush() )
- :
- :
- Function : Tells you whether the alignment
- : is flush, ie all of the same length
- :
- :
- Returns : 1 or 0
- Argument :
-
-=cut
-
-sub is_flush {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 length
-
- Title : length()
- Usage : $len = $ali->length()
- Function : Returns the maximum length of the alignment.
- To be sure the alignment is a block, use is_flush
- Returns :
- Argument :
-
-=cut
-
-sub length {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 maxdisplayname_length
-
- Title : maxdisplayname_length
- Usage : $ali->maxdisplayname_length()
- Function :
-
- Gets the maximum length of the displayname in the
- alignment. Used in writing out various MSE formats.
-
- Returns : integer
- Argument :
-
-=cut
-
-sub maxname_length {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 no_residues
-
- Title : no_residues
- Usage : $no = $ali->no_residues
- Function : number of residues in total in the alignment
- Returns : integer
- Argument :
-
-=cut
-
-sub no_residues {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 no_sequences
-
- Title : no_sequences
- Usage : $depth = $ali->no_sequences
- Function : number of sequence in the sequence alignment
- Returns : integer
- Argument : None
-
-=cut
-
-sub no_sequences {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 percentage_identity
-
- Title : percentage_identity
- Usage : $id = $align->percentage_identity
- Function: The function calculates the percentage identity of the alignment
- Returns : The percentage identity of the alignment (as defined by the
- implementation)
- Argument: None
-
-=cut
-
-sub percentage_identity{
- my ($self) = @_;
- $self->throw_not_implemeneted();
-}
-
-=head2 overall_percentage_identity
-
- Title : percentage_identity
- Usage : $id = $align->percentage_identity
- Function: The function calculates the percentage identity of
- the conserved columns
- Returns : The percentage identity of the conserved columns
- Args : None
-
-=cut
-
-sub overall_percentage_identity{
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-
-=head2 average_percentage_identity
-
- Title : average_percentage_identity
- Usage : $id = $align->average_percentage_identity
- Function: The function uses a fast method to calculate the average
- percentage identity of the alignment
- Returns : The average percentage identity of the alignment
- Args : None
-
-=cut
-
-sub average_percentage_identity{
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head1 Alignment positions
-
-Methods to map a sequence position into an alignment column and back.
-column_from_residue_number() does the former. The latter is really a
-property of the sequence object and can done using
-L<Bio::LocatableSeq::location_from_column>:
-
- # select somehow a sequence from the alignment, e.g.
- my $seq = $aln->get_seq_by_pos(1);
- #$loc is undef or Bio::LocationI object
- my $loc = $seq->location_from_column(5);
-
-
-=head2 column_from_residue_number
-
- Title : column_from_residue_number
- Usage : $col = $ali->column_from_residue_number( $seqname, $resnumber)
- Function:
-
- This function gives the position in the alignment
- (i.e. column number) of the given residue number in the
- sequence with the given name. For example, for the
- alignment
-
- Seq1/91-97 AC..DEF.GH
- Seq2/24-30 ACGG.RTY..
- Seq3/43-51 AC.DDEFGHI
-
- column_from_residue_number( "Seq1", 94 ) returns 5.
- column_from_residue_number( "Seq2", 25 ) returns 2.
- column_from_residue_number( "Seq3", 50 ) returns 9.
-
- An exception is thrown if the residue number would lie
- outside the length of the aligment
- (e.g. column_from_residue_number( "Seq2", 22 )
-
- Note: If the the parent sequence is represented by more than
- one alignment sequence and the residue number is present in
- them, this method finds only the first one.
-
- Returns : A column number for the position in the alignment of the
- given residue in the given sequence (1 = first column)
- Args : A sequence id/name (not a name/start-end)
- A residue number in the whole sequence (not just that
- segment of it in the alignment)
-
-=cut
-
-sub column_from_residue_number {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head1 Sequence names
-
-Methods to manipulate the display name. The default name based on the
-sequence id and subsequence positions can be overridden in various
-ways.
-
-=head2 displayname
-
- Title : displayname
- Usage : $myalign->displayname("Ig", "IgA")
- Function : Gets/sets the display name of a sequence in the alignment
- :
- Returns : A display name string
- Argument : name of the sequence
- displayname of the sequence (optional)
-
-=cut
-
-sub displayname {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 set_displayname_count
-
- Title : set_displayname_count
- Usage : $ali->set_displayname_count
- Function :
-
- Sets the names to be name_# where # is the number of
- times this name has been used.
-
- Returns : None
- Argument : None
-
-=cut
-
-sub set_displayname_count {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 set_displayname_flat
-
- Title : set_displayname_flat
- Usage : $ali->set_displayname_flat()
- Function : Makes all the sequences be displayed as just their name,
- not name/start-end
- Returns : 1
- Argument : None
-
-=cut
-
-sub set_displayname_flat {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 set_displayname_normal
-
- Title : set_displayname_normal
- Usage : $ali->set_displayname_normal()
- Function : Makes all the sequences be displayed as name/start-end
- Returns : None
- Argument : None
-
-=cut
-
-sub set_displayname_normal {
- my ($self) = @_;
- $self->throw_not_implemented();
-}
-
-1;
View
110 Bio/Align/StatisticsI.pm
@@ -1,110 +0,0 @@
-# $Id$
-#
-# BioPerl module for Bio::Align::StatisticsI
-#
-# Cared for by Jason Stajich <jason@bioperl.org>
-#
-# Copyright Jason Stajich
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::Align::StatisticsI - Calculate some statistics for an alignment
-
-=head1 SYNOPSIS
-
-Give standard usage here
-
-=head1 DESCRIPTION
-
-Describe the interface here
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to
-the Bioperl mailing list. Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/MailList.shtml - About the mailing lists
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-of the bugs and their resolution. Bug reports can be submitted via
-email or the web:
-
- bioperl-bugs@bioperl.org
- http://bioperl.org/bioperl-bugs/
-
-=head1 AUTHOR - Jason Stajich
-
-Email jason@bioperl.org
-
-Describe contact details here
-
-=head1 CONTRIBUTORS
-
-Additional contributors names and emails here
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object methods.
-Internal methods are usually preceded with a _
-
-=cut
-
-
-# Let the code begin...
-
-
-package Bio::Align::StatisticsI;
-use strict;
-use vars qw(@ISA);
-
-use Bio::Root::RootI;
-
-@ISA = qw(Bio::Root::RootI);
-
-
-
-=head2 distance
-
- Title : distance
- Usage : my $distance_mat = $stats->distance(-align => $aln,
- -method => $method);
- Function: Calculates a distance matrix for all pairwise distances of
- sequences in an alignment.
- Returns : Array ref
- Args : -align => Bio::Align::AlignI object
- -method => String specifying specific distance method
- (implementing class may assume a default)
-=cut
-
-sub distance{
- my ($self,@args) = @_;
- $self->throw_not_implemented();
-}
-
-=head2 available_distance_methods
-
- Title : available_distance_methods
- Usage : my @methods = $stats->available_distance_methods();
- Function: Enumerates the possible distance methods
- Returns : Array of strings
- Args : none
-
-
-=cut
-
-sub available_distance_methods{
- my ($self,@args) = @_;
- $self->throw_not_implemented();
-}
-
-1;
View
472 Bio/AlignIO.pm
@@ -1,472 +0,0 @@
-# $Id$
-#
-# BioPerl module for Bio::AlignIO
-#
-# based on the Bio::SeqIO module
-# by Ewan Birney <birney@sanger.ac.uk>
-# and Lincoln Stein <lstein@cshl.org>
-#
-# Copyright Peter Schattner
-#
-# You may distribute this module under the same terms as perl itself
-#
-# _history
-# October 18, 1999 SeqIO largely rewritten by Lincoln Stein
-# September, 2000 AlignIO written by Peter Schattner
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::AlignIO - Handler for AlignIO Formats
-
-=head1 SYNOPSIS
-
- use Bio::AlignIO;
-
- $inputfilename = "testaln.fasta";
- $in = Bio::AlignIO->new(-file => $inputfilename , '-format' => 'fasta');
- $out = Bio::AlignIO->new(-file => ">out.aln.pfam" , '-format' => 'pfam');
- # note: we quote -format to keep older perl's from complaining.
-
- while ( my $aln = $in->next_aln() ) {
- $out->write_aln($aln);
- }
-
-or
-
- use Bio::AlignIO;
-
- $inputfilename = "testaln.fasta";
- $in = Bio::AlignIO->newFh(-file => $inputfilename , '-format' => 'fasta');
- $out = Bio::AlignIO->newFh('-format' => 'pfam');
-
- # World's shortest Fasta<->pfam format converter:
- print $out $_ while <$in>;
-
-=head1 DESCRIPTION
-
-Bio::AlignIO is a handler module for the formats in the AlignIO set
-(eg, Bio::AlignIO::fasta). It is the officially sanctioned way of
-getting at the alignment objects, which most people should use. The
-resulting alignment is a Bio::Align::AlignI compliant object. See
-L<Bio::Align::AlignI> for more information.
-
-The idea is that you request a stream object for a particular format.
-All the stream objects have a notion of an internal file that is read
-from or written to. A particular AlignIO object instance is configured
-for either input or output. A specific example of a stream object is
-the Bio::AlignIO::fasta object.
-
-Each stream object has functions
-
- $stream->next_aln();
-
-and
-
- $stream->write_aln($aln);
-
-also
-
- $stream->type() # returns 'INPUT' or 'OUTPUT'
-
-As an added bonus, you can recover a filehandle that is tied to the
-AlignIO object, allowing you to use the standard E<lt>E<gt> and print
-operations to read and write sequence objects:
-
- use Bio::AlignIO;
-
- $stream = Bio::AlignIO->newFh(-format => 'Fasta'); # read from standard input
-
- while ( $aln = <$stream> ) {
- # do something with $aln
- }
-
-and
-
- print $stream $aln; # when stream is in output mode
-
-This makes the simplest ever reformatter
-
- #!/usr/local/bin/perl
-
- $format1 = shift;
- $format2 = shift || die "Usage: reformat format1 format2 < input > output";
-
- use Bio::AlignIO;
-
- $in = Bio::AlignIO->newFh(-format => $format1 );
- $out = Bio::AlignIO->newFh(-format => $format2 );
- #note: you might want to quote -format to keep older perl's from complaining.
-
- print $out $_ while <$in>;
-
-AlignIO.pm is patterned on the module SeqIO.pm and shares most the
-SeqIO.pm features. One significant difference currently is that
-AlignIO.pm usually handles IO for only a single alignment at a time
-(SeqIO.pm handles IO for multiple sequences in a single stream.) The
-principal reason for this is that whereas simultaneously handling
-multiple sequences is a common requirement, simultaneous handling of
-multiple alignments is not. The only current exception is format
-"bl2seq" which parses results of the Blast bl2seq program and which
-may produce several alignment pairs. This set of alignment pairs can
-be read using multiple calls to next_aln.
-
-Capability for IO for more than one multiple alignment - other than
-for bl2seq format -(which may be of use for certain applications such
-as IO for Pfam libraries) may be included in the future. For this
-reason we keep the name "next_aln()" for the alignment input routine,
-even though in most cases only one alignment is read (or written) at a
-time and the name "read_aln()" might be more appropriate.
-
-=head1 CONSTRUCTORS
-
-=head2 Bio::AlignIO-E<gt>new()
-
- $seqIO = Bio::AlignIO->new(-file => 'filename', -format=>$format);
- $seqIO = Bio::AlignIO->new(-fh => \*FILEHANDLE, -format=>$format);
- $seqIO = Bio::AlignIO->new(-format => $format);
-
-The new() class method constructs a new Bio::AlignIO object. The
-returned object can be used to retrieve or print BioAlign
-objects. new() accepts the following parameters:
-
-=over 4
-
-=item -file
-
-A file path to be opened for reading or writing. The usual Perl
-conventions apply:
-
- 'file' # open file for reading
- '>file' # open file for writing
- '>>file' # open file for appending
- '+<file' # open file read/write
- 'command |' # open a pipe from the command
- '| command' # open a pipe to the command
-
-=item -fh
-
-You may provide new() with a previously-opened filehandle. For
-example, to read from STDIN:
-
- $seqIO = Bio::AlignIO->new(-fh => \*STDIN);
-
-Note that you must pass filehandles as references to globs.
-
-If neither a filehandle nor a filename is specified, then the module
-will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
-semantics.
-
-=item -format
-
-Specify the format of the file. Supported formats include:
-
- fasta FASTA format
- pfam pfam format
- selex selex (hmmer) format
- stockholm stockholm format
- prodom prodom (protein domain) format
- clustalw clustalw (.aln) format
- msf msf (GCG) format
- mase mase (seaview) format
- bl2seq Bl2seq Blast output
- nexus Swofford et al NEXUS format
- pfam Pfam sequence alignment format
- phylip Felsenstein's PHYLIP format
-
-Currently only those formats which were implemented in SimpleAlign.pm
-have been incorporated in AlignIO.pm. Specifically, mase, stockholm
-and prodom have only been implemented for input.
-
-If no format is specified and a filename is given, then the module
-will attempt to deduce it from the filename. If this is unsuccessful,
-Fasta format is assumed.
-
-The format name is case insensitive. 'FASTA', 'Fasta' and 'fasta' are
-all supported.
-
-=back
-
-=head2 Bio::AlignIO-E<gt>newFh()
-
- $fh = Bio::AlignIO->newFh(-fh => \*FILEHANDLE, -format=>$format);
- $fh = Bio::AlignIO->newFh(-format => $format);
- # etc.
-
-This constructor behaves like new(), but returns a tied filehandle
-rather than a Bio::AlignIO object. You can read sequences from this
-object using the familiar E<lt>E<gt> operator, and write to it using print().
-The usual array and $_ semantics work. For example, you can read all
-sequence objects into an array like this:
-
- @sequences = <$fh>;
-
-Other operations, such as read(), sysread(), write(), close(), and printf()
-are not supported.
-
-=head1 OBJECT METHODS
-
-See below for more detailed summaries. The main methods are:
-
-=head2 $alignment = $AlignIO-E<gt>next_aln()
-
-Fetch an alignment from a formatted file.
-
-=head2 $AlignIO-E<gt>write_aln($aln)
-
-Write the specified alignment to a file..
-
-=head2 TIEHANDLE(), READLINE(), PRINT()
-
-These provide the tie interface. See L<perltie> for more details.
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to one
-of the Bioperl mailing lists. Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bio.perl.org/MailList.html - About the mailing lists
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
- the bugs and their resolution.
- Bug reports can be submitted via email or the web:
-
- bioperl-bugs@bio.perl.org
- http://bio.perl.org/bioperl-bugs/
-
-=head1 AUTHOR - Peter Schattner
-
-Email: schattner@alum.mit.edu
-
-=head1 CONTRIBUTORS
-
-Jason Stajich, jason@bioperl.org
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object
-methods. Internal methods are usually preceded with a _
-
-=cut
-
-# 'Let the code begin...
-
-package Bio::AlignIO;
-
-use strict;
-use vars qw(@ISA);
-
-use Bio::Root::Root;
-use Bio::Seq;
-use Bio::LocatableSeq;
-use Bio::SimpleAlign;
-use Bio::Root::IO;
-@ISA = qw(Bio::Root::Root Bio::Root::IO);
-
-=head2 new
-
- Title : new
- Usage : $stream = Bio::AlignIO->new(-file => $filename, -format => 'Format')
- Function: Returns a new seqstream
- Returns : A Bio::AlignIO::Handler initialised with the appropriate format
- Args : -file => $filename
- -format => format
- -fh => filehandle to attach to
-
-=cut
-
-sub new {
- my ($caller,@args) = @_;
- my $class = ref($caller) || $caller;
-
- # or do we want to call SUPER on an object if $caller is an
- # object?
- if( $class =~ /Bio::AlignIO::(\S+)/ ) {
- my ($self) = $class->SUPER::new(@args);
- $self->_initialize(@args);
- return $self;
- } else {
-
- my %param = @args;
- @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
- my $format = $param{'-format'} ||
- $class->_guess_format( $param{-file} || $ARGV[0] ) ||
- 'fasta';
- $format = "\L$format"; # normalize capitalization to lower case
-
- # normalize capitalization
- return undef unless( &_load_format_module($format) );
- return "Bio::AlignIO::$format"->new(@args);
- }
-}
-
-
-=head2 newFh
-
- Title : newFh
- Usage : $fh = Bio::AlignIO->newFh(-file=>$filename,-format=>'Format')
- Function: does a new() followed by an fh()
- Example : $fh = Bio::AlignIO->newFh(-file=>$filename,-format=>'Format')
- $sequence = <$fh>; # read a sequence object
- print $fh $sequence; # write a sequence object
- Returns : filehandle tied to the Bio::AlignIO::Fh class
- Args :
-
-=cut
-
-sub newFh {
- my $class = shift;
- return unless my $self = $class->new(@_);
- return $self->fh;
-}
-
-=head2 fh
-
- Title : fh
- Usage : $obj->fh
- Function:
- Example : $fh = $obj->fh; # make a tied filehandle
- $sequence = <$fh>; # read a sequence object
- print $fh $sequence; # write a sequence object
- Returns : filehandle tied to the Bio::AlignIO::Fh class
- Args :
-
-=cut
-
-
-sub fh {
- my $self = shift;
- my $class = ref($self) || $self;
- my $s = Symbol::gensym;
- tie $$s,$class,$self;
- return $s;
-}
-
-# _initialize is where the heavy stuff will happen when new is called
-
-sub _initialize {
- my($self,@args) = @_;
-
- $self->_initialize_io(@args);
- 1;
-}
-
-=head2 _load_format_module
-
- Title : _load_format_module
- Usage : *INTERNAL AlignIO stuff*
- Function: Loads up (like use) a module at run time on demand
- Example :
- Returns :
- Args :
-
-=cut
-
-sub _load_format_module {
- my ($format) = @_;
- my ($module,$load,$m);
-
- $module = "_<Bio/AlignIO/$format.pm";
- $load = "Bio/AlignIO/$format.pm";
-
- return 1 if $main::{$module};
- eval {
- require $load;
- };
- if( $@ ) {
- print STDERR <<END;
-$load: $format cannot be found
-Exception $@
-For more information about the AlignIO system please see the AlignIO docs.
-This includes ways of checking for formats at compile time, not run time
-END
- ;
- return;
- }
- return 1;
-}
-
-=head2 next_aln
-
- Title : next_aln
- Usage : $aln = stream->next_aln
- Function: reads the next $aln object from the stream
- Returns : a Bio::Align::AlignI compliant object
- Args :
-
-=cut
-
-sub next_aln {
- my ($self,$aln) = @_;
- $self->throw("Sorry, you cannot read from a generic Bio::AlignIO object.");
-}
-
-=head2 write_aln
-
- Title : write_aln
- Usage : $stream->write_aln($aln)
- Function: writes the $aln object into the stream
- Returns : 1 for success and 0 for error
- Args : Bio::Seq object
-
-=cut
-
-sub write_aln {
- my ($self,$aln) = @_;
- $self->throw("Sorry, you cannot write to a generic Bio::AlignIO object.");
-}
-
-=head2 _guess_format
-
- Title : _guess_format
- Usage : $obj->_guess_format($filename)
- Function:
- Example :
- Returns : guessed format of filename (lower case)
- Args :
-
-=cut
-
-sub _guess_format {
- my $class = shift;
- return unless $_ = shift;
- return 'fasta' if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i;
- return 'msf' if /\.(msf|pileup)$/i;
- return 'pfam' if /\.(pfam|pfm)$/i;
- return 'selex' if /\.(selex|slx|selx|slex|sx)$/i;
- return 'phylip' if /\.(phylip|phlp|phyl|phy|phy|ph)$/i;
- return 'nexus' if /\.(nexus|nex)$/i;
- return 'mega' if( /\.(meg|mega)$/i );
-}
-
-sub DESTROY {
- my $self = shift;
- $self->close();
-}
-
-sub TIEHANDLE {
- my $class = shift;
- return bless {'alignio' => shift},$class;
-}
-
-sub READLINE {
- my $self = shift;
- return $self->{'alignio'}->next_aln() unless wantarray;
- my (@list,$obj);
- push @list,$obj while $obj = $self->{'alignio'}->next_aln();
- return @list;
-}
-
-sub PRINT {
- my $self = shift;
- $self->{'alignio'}->write_aln(@_);
-}
-
-1;
View
176 Bio/AlignIO/bl2seq.pm
@@ -1,176 +0,0 @@
-# $Id$
-#
-# BioPerl module for Bio::AlignIO::bl2seq
-
-# based on the Bio::SeqIO modules
-# by Ewan Birney <birney@sanger.ac.uk>
-# and Lincoln Stein <lstein@cshl.org>
-#
-# the Bio::Tools::BPlite modules by
-# Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
-# Lorenz Pollak (lorenz@ist.org, bioperl port)
-#
-# and the SimpleAlign.pm module of Ewan Birney
-#
-# Copyright Peter Schattner
-#
-# You may distribute this module under the same terms as perl itself
-# _history
-# September 5, 2000
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::AlignIO::bl2seq - bl2seq sequence input/output stream
-
-=head1 SYNOPSIS
-
-Do not use this module directly. Use it via the Bio::AlignIO class, as in:
-
- use Bio::AlignIO;
-
- $in = Bio::AlignIO->new(-file => "inputfilename" , '-format' => 'bl2seq');
- $aln = $in->next_aln();
-
-
-=head1 DESCRIPTION
-
-This object can create Bio::SimpleAlign sequence alignment objects (of
-2 sequences) from bl2seq BLAST reports.
-
-A nice feature of this module is that- in combination with
-StandAloneBlast.pm or remote blasting - it can be used to align 2
-sequences and make a SimpleAlign object from them which can then be
-manipulated using any SimpleAlign.pm methods, eg:
-
- #Get 2 sequences
- $str = Bio::SeqIO->new(-file=>'t/amino.fa' , '-format' => 'Fasta', );
- my $seq3 = $str->next_seq();
- my $seq4 = $str->next_seq();
-
- # Run bl2seq on them
- $factory = Bio::Tools::StandAloneBlast->new('program' => 'blastp',
- 'outfile' => 'bl2seq.out');
- my $bl2seq_report = $factory->bl2seq($seq3, $seq4);
-
- # Use AlignIO.pm to create a SimpleAlign object from the bl2seq report
- $str = Bio::AlignIO->new(-file=> 'bl2seq.out','-format' => 'bl2seq');
- $aln = $str->next_aln();
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to one
-of the Bioperl mailing lists. Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bio.perl.org/MailList.html - About the mailing lists
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
- the bugs and their resolution.
- Bug reports can be submitted via email or the web:
-
- bioperl-bugs@bio.perl.org
- http://bio.perl.org/bioperl-bugs/
-
-=head1 AUTHOR - Peter Schattner
-
-Email: schattner@alum.mit.edu
-
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object
-methods. Internal methods are usually preceded with a _
-
-=cut
-
-# Let the code begin...
-
-package Bio::AlignIO::bl2seq;
-use vars qw(@ISA);
-use strict;
-# Object preamble - inherits from Bio::Root::Object
-
-use Bio::AlignIO;
-use Bio::Tools::BPbl2seq;
-
-@ISA = qw(Bio::AlignIO);
-
-=head2 next_aln
-
- Title : next_aln
- Usage : $aln = $stream->next_aln()
- Function: returns the next alignment in the stream.
- Returns : SimpleAlign object - returns 0 on end of file
- or on error
- Args : NONE
-
-=cut
-
-sub next_aln {
- my $self = shift;
- my ($start,$end,$name,$seqname,$seq,$seqchar);
- my $aln = Bio::SimpleAlign->new(-source => 'bl2seq');
- $self->{'bl2seqobj'} =
- $self->{'bl2seqobj'} || Bio::Tools::BPbl2seq->new(-fh => $self->_fh);
- my $bl2seqobj = $self->{'bl2seqobj'};
- my $hsp = $bl2seqobj->next_feature;
- $seqchar = $hsp->querySeq;
- $start = $hsp->query->start;
- $end = $hsp->query->end;
- $seqname = 'Query-sequence'; # Query name not present in bl2seq report
-
-# unless ($seqchar && $start && $end && $seqname) {return 0} ;
- unless ($seqchar && $start && $end ) {return 0} ;
-
- $seq = new Bio::LocatableSeq('-seq'=>$seqchar,
- '-id'=>$seqname,
- '-start'=>$start,
- '-end'=>$end,
- );
-
- $aln->add_seq($seq);
-
- $seqchar = $hsp->sbjctSeq;
- $start = $hsp->hit->start;
- $end = $hsp->hit->end;
- $seqname = $bl2seqobj->sbjctName;
-
- unless ($seqchar && $start && $end && $seqname) {return 0} ;
-
- $seq = new Bio::LocatableSeq('-seq'=>$seqchar,
- '-id'=>$seqname,
- '-start'=>$start,
- '-end'=>$end,
- );
-
- $aln->add_seq($seq);
-
- return $aln;
-
-}
-
-
-=head2 write_aln
-
- Title : write_aln
- Usage : $stream->write_aln(@aln)
- Function: writes the $aln object into the stream in bl2seq format
- Returns : 1 for success and 0 for error
- Args : Bio::SimpleAlign object
-
-
-=cut
-
-sub write_aln {
- my ($self,@aln) = @_;
-
- $self->throw("Sorry: writing bl2seq output is not available! /n");
-}
-
-1;
View
256 Bio/AlignIO/clustalw.pm
@@ -1,256 +0,0 @@
-# $Id$
-#
-# BioPerl module for Bio::AlignIO::clustalw
-
-# based on the Bio::SeqIO modules
-# by Ewan Birney <birney@sanger.ac.uk>
-# and Lincoln Stein <lstein@cshl.org>
-#
-# and the SimpleAlign.pm module of Ewan Birney
-#
-# Copyright Peter Schattner
-#
-# You may distribute this module under the same terms as perl itself
-# _history
-# September 5, 2000
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::AlignIO::clustalw - clustalw sequence input/output stream
-
-=head1 SYNOPSIS
-
-Do not use this module directly. Use it via the Bio::AlignIO class.
-
-=head1 DESCRIPTION
-
-This object can transform Bio::Align::AlignI objects to and from clustalw flat
-file databases.
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to one
-of the Bioperl mailing lists. Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bio.perl.org/MailList.html - About the mailing lists
-
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-the bugs and their resolution. Bug reports can be submitted via email
-or the web:
-
- bioperl-bugs@bio.perl.org
- http://bio.perl.org/bioperl-bugs/
-
-=head1 AUTHORS - Peter Schattner
-
-Email: schattner@alum.mit.edu
-
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object
-methods. Internal methods are usually preceded with a _
-
-=cut
-
-# Let the code begin...
-
-package Bio::AlignIO::clustalw;
-use vars qw(@ISA $LINELENGTH);
-use strict;
-
-use Bio::AlignIO;
-use Bio::LocatableSeq;
-use Bio::SimpleAlign; # to be Bio::Align::Simple
-
-BEGIN {
- $LINELENGTH = 60;
-}
-
-@ISA = qw(Bio::AlignIO);
-
-
-=head2 new
-
- Title : new
- Usage : $alignio = new Bio::AlignIO(-format => 'clustalw',
- -file => 'filename');
- Function: returns a new Bio::AlignIO object to handle clustalw files
- Returns : Bio::AlignIO::clustalw object
- Args : -verbose => verbosity setting (-1,0,1,2)
- -file => name of file to read in or with ">" - writeout
- -fh => alternative to -file param - provide a filehandle
- to read from/write to
- -format => type of Alignment Format to process
- -percentages => (clustalw only) display a percentage of identity
- in each line of the alignment.
-
-=cut
-
-sub _initialize {
- my ($self, @args) = @_;
- $self->SUPER::_initialize(@args);
- my ($percentages) = $self->_rearrange([qw(PERCENTAGES)], @args);
- defined $percentages && $self->percentages($percentages);
-}
-
-=head2 next_aln
-
- Title : next_aln
- Usage : $aln = $stream->next_aln()
- Function: returns the next alignment in the stream
- Returns : Bio::Align::AlignI object
- Args : NONE
-
-=cut
-
-sub next_aln {
- my ($self) = @_;
-
- my $first_line;
- if( defined ($first_line = $self->_readline )
- && $first_line !~ /CLUSTAL/ ) {
- $self->warn("trying to parse a file which does not start with a CLUSTAL header");
- }
- my %alignments;
- my $aln = Bio::SimpleAlign->new(-source => 'clustalw');
- my $order = 0;
- my %order;
- $self->{_lastline} = '';
- while( defined ($_ = $self->_readline) ) {
- next if ( /^\s+$/ );
-
- my ($seqname, $aln_line) = ('', '');
- if( /^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ ) {
- # clustal 1.4 format
- ($seqname,$aln_line) = ("$1:$2-$3",$4);
- } elsif( /^(\S+)\s+([A-Z\-]+)\s*$/ ) {
- ($seqname,$aln_line) = ($1,$2);
- } else { $self->{_lastline} = $_; next }
-
- if( !exists $order{$seqname} ) {
- $order{$seqname} = $order++;
- }
-
- $alignments{$seqname} .= $aln_line;
- }
- my ($sname,$start,$end);
- foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) {
- if( $name =~ /(\S+):(\d+)-(\d+)/ ) {
- ($sname,$start,$end) = ($1,$2,$3);
- } else {
- ($sname, $start) = ($name,1);
- my $str = $alignments{$name};
- $str =~ s/[^A-Za-z]//g;
- $end = length($str);
- }
- my $seq = new Bio::LocatableSeq('-seq' => $alignments{$name},
- '-id' => $sname,
- '-start' => $start,
- '-end' => $end);
- $aln->add_seq($seq);
- }
- undef $aln if( !defined $end || $end <= 0);
- return $aln;
-}
-
-=head2 write_aln
-
- Title : write_aln
- Usage : $stream->write_aln(@aln)
- Function: writes the clustalw-format object (.aln) into the stream
- Returns : 1 for success and 0 for error
- Args : Bio::Align::AlignI object
-
-
-=cut
-
-sub write_aln {
- my ($self,@aln) = @_;
- my ($count,$length,$seq,@seq,$tempcount);
- foreach my $aln (@aln) {
- if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
- $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
- next;
- }
- my $matchline = $aln->match_line;
-
- $self->_print (sprintf("CLUSTAL W(1.81) multiple sequence alignment\n\n\n")) or return;
-
- $length = $aln->length();
- $count = $tempcount = 0;
- @seq = $aln->each_seq();
- my $max = 22;
- foreach $seq ( @seq ) {
- $max = length ($aln->displayname($seq->get_nse()))
- if( length ($aln->displayname($seq->get_nse())) > $max );
- }
- while( $count < $length ) {
- foreach $seq ( @seq ) {
-#
-# Following lines are to suppress warnings
-# if some sequences in the alignment are much longer than others.
-
- my ($substring);
- my $seqchars = $seq->seq();
- SWITCH: {
- if (length($seqchars) >= ($count + $LINELENGTH)) {
- $substring = substr($seqchars,$count,$LINELENGTH);
- last SWITCH;
- } elsif (length($seqchars) >= $count) {
- $substring = substr($seqchars,$count);
- last SWITCH;
- }
- $substring = "";
- }
-
- $self->_print (sprintf("%-".$max."s %s\n",
- $aln->displayname($seq->get_nse()),
- $substring)) or return;
- }
-
- my $linesubstr = substr($matchline, $count,$LINELENGTH);
- my $percentages = '';
- if( $self->percentages ) {
- my ($strcpy) = ($linesubstr);
- my $count = ($strcpy =~ tr/\*//);
- $percentages = sprintf("\t%d%%", 100 * ($count / length($linesubstr)));
- }
- $self->_print (sprintf("%-".$max."s %s%s\n", '', $linesubstr,
- $percentages));
- $self->_print (sprintf("\n\n")) or return;
- $count += $LINELENGTH;
- }
- }
- return 1;
-}
-
-=head2 percentages
-
- Title : percentages
- Usage : $obj->percentages($newval)
- Function: Set the percentages flag - whether or not to show percentages in
- each output line
- Returns : value of percentages
- Args : newvalue (optional)
-
-
-=cut
-
-sub percentages {
- my ($self,$value) = @_;
- if( defined $value) {
- $self->{'_percentages'} = $value;
- }
- return $self->{'_percentages'};
-}
-
-1;
View
239 Bio/AlignIO/emboss.pm
@@ -1,239 +0,0 @@
-# $Id$
-#
-# BioPerl module for Bio::AlignIO::emboss
-#
-# Cared for by Jason Stajich <jason@bioperl.org>
-#
-# Copyright Jason Stajich
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::AlignIO::emboss - Parse EMBOSS alignment output (from applications water and needle)
-
-=head1 SYNOPSIS
-
- # do not use the object directly
- use Bio::AlignIO;
- # read in an alignment from the EMBOSS program water
- my $in = new Bio::AlignIO(-format => 'emboss',
- -file => 'seq.water');
- while( my $aln = $in->next_aln ) {
- # do something with the alignment
- }
-
-=head1 DESCRIPTION
-
-This object handles parsing and writing pairwise sequence alignments
-from the EMBOSS suite.
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to
-the Bioperl mailing list. Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/MailList.shtml - About the mailing lists
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-of the bugs and their resolution. Bug reports can be submitted via
-email or the web:
-
- bioperl-bugs@bioperl.org
- http://bioperl.org/bioperl-bugs/
-
-=head1 AUTHOR - Jason Stajich
-
-Email jason@bioperl.org
-
-Describe contact details here
-
-=head1 CONTRIBUTORS
-
-Additional contributors names and emails here
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object methods.
-Internal methods are usually preceded with a _
-
-=cut
-
-
-# Let the code begin...
-
-
-package Bio::AlignIO::emboss;
-use vars qw(@ISA $EMBOSSTitleLen $EMBOSSLineLen);
-use strict;
-
-use Bio::AlignIO;
-use Bio::LocatableSeq;
-
-@ISA = qw(Bio::AlignIO );
-
-BEGIN {
- $EMBOSSTitleLen = 13;
- $EMBOSSLineLen = 50;
-}
-
-sub _initialize {
- my($self,@args) = @_;
- $self->SUPER::_initialize(@args);
- $self->{'_type'} = undef;
-}
-
-=head2 next_aln
-
- Title : next_aln
- Usage : $aln = $stream->next_aln()
- Function: returns the next alignment in the stream.
- Returns : Bio::SimpleAlign object - returns 0 on end of file
- or on error
- Args : NONE
-
-=cut
-
-sub next_aln {
- my ($self) = @_;
- my $seenbegin = 0;
- my %data = ( 'seq1' => {
- 'start'=> undef,
- 'end'=> undef,
- 'name' => '',
- 'data' => '' },
- 'seq2' => {
- 'start'=> undef,
- 'end'=> undef,
- 'name' => '',
- 'data' => '' },
- 'align' => '',
- 'type' => $self->{'_type'}, # to restore type from
- # previous aln if possible
- );
- my %names;
- while( defined($_ = $self->_readline) ) {
- next if( /^\#?\s+$/ || /^\#+\s*$/ );
- if( /^\#(\=|\-)+\s*$/) {
- last if( $seenbegin);
- } elsif( /(Local|Global):\s*(\S+)\s+vs\s+(\S+)/ ||
- /^\#\s+Program:\s+(\S+)/ )
- {
- my ($name1,$name2) = ($2,$3);
- if( ! defined $name1 ) { # Handle EMBOSS 2.2.X
- $data{'type'} = $1;
- $name1 = $name2 = '';
- } else {
- $data{'type'} = $1 eq 'Local' ? 'water' : 'needle';
- }
- $data{'seq1'}->{'name'} = $name1;
- $data{'seq2'}->{'name'} = $name2;
-
- $self->{'_type'} = $data{'type'};
-
- } elsif( /Score:\s+(\S+)/ ) {
- $data{'score'} = $1;
- } elsif( /^\#\s+(1|2):\s+(\S+)/ && ! $data{"seq$1"}->{'name'} ) {
- my $nm = $2;
- $nm = substr($nm,0,$EMBOSSTitleLen); # emboss has a max seq length
- if( $names{$nm} ) {
- $nm .= "-". $names{$nm};
- }
- $names{$nm}++;
- $data{"seq$1"}->{'name'} = $nm;
- } elsif( $data{'seq1'}->{'name'} &&
- /^$data{'seq1'}->{'name'}/ ) {
- my $count = 0;
- $seenbegin = 1;
- while( defined ($_) ) {
- my $align_other = '';
- my $delayed;
- if($count == 0 || $count == 2 ) {
- my @l = split;
- my ($seq,$start,$align,$end);
- if( $count == 2 && $data{'seq2'}->{'name'} eq '' ) {
- ($start,$align,$end) = @l;
- } elsif( @l == 3 ) {
- $align = '';
- ($seq,$start,$end) = @l
- } else {
- ($seq,$start,$align,$end) = @l;
- }
- my $seqname = sprintf("seq%d", ($count == 0) ? '1' : '2');
-
- $data{$seqname}->{'data'} .= $align;
- $data{$seqname}->{'start'} ||= $start;
- $data{$seqname}->{'end'} = $end;
- } else {
- s/^\s+//;
- s/\s+$//;
- $data{'align'} .= $_;
- }
- BOTTOM:
- last if( $count++ == 2);
- $_ = $self->_readline();
- }
- if( $data{'type'} eq 'needle' ) {
- # which ever one is shorter we want to bring it up to
- # length. Man this stinks.
- my ($s1,$s2) = sort { length($a->{'data'}) <=>
- length($b->{'data'}) }
- ($data{'seq1'}, $data{'seq2'});
- if(length($s1->{'data'}) != length($s2->{'data'}) ) {
-
- if( $s1->{'start'} <= 1) { # could be 0?
- $s1->{'data'} = '-' x ( length($s2->{'data'})
- - length($s1->{'data'})) .
- $s1->{'data'};
- } else {
- $s1->{'data'} .= '-' x ( length($s2->{'data'})
- - length($s1->{'data'}));
- }
- }
- }
- }
- }
- return undef unless $seenbegin;
- my $aln = Bio::SimpleAlign->new(-verbose => $self->verbose(),
- -source => "EMBOSS-".$data{'type'});
-
- foreach my $seqname ( qw(seq1 seq2) ) {
- return undef unless ( defined $data{$seqname} );
- $data{$seqname}->{'name'} ||= $seqname;
- my $seq = new Bio::LocatableSeq('-seq' => $data{$seqname}->{'data'},
- '-id' => $data{$seqname}->{'name'},
- '-start'=> $data{$seqname}->{'start'},
- '-end' => $data{$seqname}->{'end'},
- );
- $aln->add_seq($seq);
- }
- return $aln;
-}
-
-
-=head2 write_aln
-
- Title : write_aln
- Usage : $stream->write_aln(@aln)
- Function: writes the $aln object into the stream in emboss format
- Returns : 1 for success and 0 for error
- Args : Bio::Align::AlignI object
-
-
-=cut
-
-sub write_aln {
- my ($self,@aln) = @_;
-
- $self->throw("Sorry: writing emboss output is not currently available! \n");
-}
-
-1;
View
190 Bio/AlignIO/fasta.pm
@@ -1,190 +0,0 @@
-# $Id$
-#
-# BioPerl module for Bio::AlignIO::fasta
-
-# based on the Bio::SeqIO::fasta module
-# by Ewan Birney <birney@sanger.ac.uk>
-# and Lincoln Stein <lstein@cshl.org>
-#
-# and the SimpleAlign.pm module of Ewan Birney
-#
-# Copyright Peter Schattner
-#
-# You may distribute this module under the same terms as perl itself
-# _history
-# September 5, 2000
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::AlignIO::fasta - fasta sequence input/output stream
-
-=head1 SYNOPSIS
-
-Do not use this module directly. Use it via the Bio::AlignIO class.
-
-=head1 DESCRIPTION
-
-This object can transform Bio::SimpleAlign objects to and from fasta flat
-file databases.
-
-=head1 FEEDBACK
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-the bugs and their resolution. Bug reports can be submitted via email
-or the web:
-
- bioperl-bugs@bio.perl.org
- http://bio.perl.org/bioperl-bugs/
-
-=head1 AUTHORS - Peter Schattner
-
-Email: schattner@alum.mit.edu
-
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object
-methods. Internal methods are usually preceded with a _
-
-=cut
-
-# Let the code begin...
-
-package Bio::AlignIO::fasta;
-use vars qw(@ISA);
-use strict;
-
-use Bio::AlignIO;
-use Bio::SimpleAlign;
-
-@ISA = qw(Bio::AlignIO);
-
-
-=head2 next_aln
-
- Title : next_aln
- Usage : $aln = $stream->next_aln()
- Function: returns the next alignment in the stream.
- Returns : Bio::Align::AlignI object - returns 0 on end of file
- or on error
- Args : NONE
-
-=cut
-
-sub next_aln {
- my $self = shift;
- my $entry;
- my ($start,$end,$name,$seqname,$seq,$seqchar,$tempname,%align);
- my $aln = Bio::SimpleAlign->new();
-
- while(defined ($entry = $self->_readline)) {
- if($entry =~ /^>(\S+)/ ) {
- $tempname = $1;
- if( defined $name ) {
- # put away last name and sequence
-
- if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
- $seqname = $1;
- $start = $2;
- $end = $3;
- } else {
- $seqname=$name;
- $start = 1;
- $end = length($seqchar); #ps 9/6/00