Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Removing the deprecated module Bio.Fasta

  • Loading branch information...
commit e9a7cd62044fc4585bd0ea5c52029f778c8fcac9 1 parent 8dc3666
Michiel de Hoon authored
View
85 Bio/Fasta/FastaAlign.py
@@ -1,85 +0,0 @@
-"""
-Code to deal with alignments written in Fasta format (DEPRECATED).
-
-This module is considered obsolete and has been deprecated. It will be
-removed in a future release of Biopython. Please use Bio.AlignIO instead
-for reading and writing alignments in FASTA format.
-
-This mostly just uses the regular Fasta parsing stuff written by Jeff
-to deal with all of the input and output formats.
-
-functions:
-o parse_file()
-
-classes:
-FastaAlignment"""
-# standard library
-import os
-
-# biopython
-from Bio.Align.Generic import Alignment
-from Bio import Alphabet
-from Bio.Alphabet import IUPAC
-from Bio import Fasta
-
-def parse_file(file_name, type = 'DNA'):
- """Parse the given file into a FastaAlignment object.
-
- Arguments:
- o file_name - The location of the file to parse.
- o type - The type of information contained in the file.
- """
- if type.upper() == 'DNA':
- alphabet = IUPAC.ambiguous_dna
- elif type.upper() == 'RNA':
- alphabet = IUPAC.ambiguous_rna
- elif type.upper() == 'PROTEIN':
- alphabet = IUPAC.protein
- else:
- raise ValueError("Invalid type %s passed. Need DNA, RNA or PROTEIN"
- % type)
-
- # create a new alignment object
- fasta_align = FastaAlignment(Alphabet.Gapped(alphabet))
-
- # now parse the file and fill up the alignment object
- align_file = open(file_name, 'r')
-
- parser = Fasta.RecordParser()
- iterator = Fasta.Iterator(align_file, parser)
-
- cur_align = iterator.next()
- while cur_align:
- fasta_align.add_sequence(cur_align.title, cur_align.sequence)
-
- cur_align = iterator.next()
-
- return fasta_align
-
-class FastaAlignment(Alignment):
- """Work with the Fasta Alignment format.
-
- The fasta alignment format is basically the same as the regular ol'
- Fasta format we know and love, except the sequences have gaps
- (represented by -'s).
- """
- def __init__(self, alphabet = Alphabet.Gapped(IUPAC.ambiguous_dna)):
- Alignment.__init__(self, alphabet)
-
- def __str__(self):
- """Print out a fasta version of the alignment info."""
- return_string = ''
- for item in self._records:
- new_f_record = Fasta.Record()
- new_f_record.title = item.description
- new_f_record.sequence = item.seq.data
-
- return_string = return_string + str(new_f_record) + os.linesep + os.linesep
-
- # have a extra newline, so strip two off and add one before returning
- return return_string.rstrip() + os.linesep
-
-
-
-
-
View
205 Bio/Fasta/__init__.py
@@ -1,205 +0,0 @@
-"""Utilities for working with FASTA-formatted sequences (DEPRECATED).
-
-Classes:
-Record Holds FASTA sequence data.
-Iterator Iterates over sequence data in a FASTA file.
-RecordParser Parses FASTA sequence data into a Record object.
-SequenceParser Parses FASTA sequence data into a SeqRecord object.
-
-For a long time this module was the most commonly used and best documented
-FASTA parser in Biopython. However, we now recommend using Bio.SeqIO instead.
-After being declared obsolete, Bio.Fasta has now been officially deprecated
-(with a warning message when imported) and will be removed in a future
-release.
-
-If you are already using Bio.Fasta with the SequenceParser to get SeqRecord
-objects, then you should be able to switch to the more recent Bio.SeqIO module
-very easily as that too uses SeqRecord objects. For example,
-
-from Bio import Fasta
-handle = open("example.fas")
-for seq_record in Fasta.Iterator(handle, Fasta.SequenceParser()):
- print seq_record.description
- print seq_record.seq
-handle.close()
-
-Using Bio.SeqIO instead this becomes:
-
-from Bio import SeqIO
-handle = open("example.fas")
-for seq_record in SeqIO.parse(handle, "fasta"):
- print seq_record.description
- print seq_record.seq
-handle.close()
-
-Converting an existing code which uses the RecordParser is a little more
-complicated as the Bio.Fasta.Record object differs from the SeqRecord.
-
-from Bio import Fasta
-handle = open("example.fas")
-for record in Fasta.Iterator(handle, Fasta.RecordParser()):
- #record is a Bio.Fasta.Record object
- print record.title #The full title line as a string
- print record.sequence #The sequence as a string
-handle.close()
-
-Using Bio.SeqIO instead this becomes:
-
-from Bio import SeqIO
-handle = open("example.fas")
-for seq_record in SeqIO.parse(handle, "fasta"):
- print seq_record.description #The full title line as a string
- print str(seq_record.seq) #The sequence as a string
-handle.close()
-
-Very old code may have used Bio.Fasta.index_file and Dictionary, which were
-deprecated in Biopython 1.44 and removed in Biopython 1.46. These allowed
-indexing of a FASTA file and access to the records with a dictionary like
-interface. Currently using Bio.SeqIO.to_dict to create an in memory dictionary
-of SeqRecord objects is the best replacement, but for very large files
-additional indexing support for Bio.SeqIO is being considered.
-"""
-from Bio import Seq
-from Bio import SeqRecord
-from Bio import Alphabet
-
-import warnings
-warnings.warn('Bio.Fasta is deprecated. Please use the "fasta" support in '
- 'Bio.SeqIO (or Bio.AlignIO) instead.', DeprecationWarning)
-
-class Record:
- """Holds information from a FASTA record.
-
- Members:
- title Title line ('>' character not included).
- sequence The sequence.
-
- """
- def __init__(self, colwidth=60):
- """__init__(self, colwidth=60)
-
- Create a new Record. colwidth specifies the number of residues
- to put on each line when generating FASTA format.
-
- """
- self.title = ''
- self.sequence = ''
- self._colwidth = colwidth
-
- def __str__(self):
- s = []
- s.append('>%s' % self.title)
- i = 0
- while i < len(self.sequence):
- s.append(self.sequence[i:i+self._colwidth])
- i = i + self._colwidth
- #Was having a problem getting the tests to pass on windows...
- #return os.linesep.join(s)
- return "\n".join(s)
-
-class Iterator:
- """Returns one record at a time from a FASTA file.
- """
- def __init__(self, handle, parser = None, debug = 0):
- """Initialize a new iterator.
- """
- self.handle = handle
- self._parser = parser
- self._debug = debug
-
- #Skip any text before the first record (e.g. blank lines)
- while True:
- line = handle.readline()
- if not line or line[0] == ">":
- break
- if debug : print "Skipping: " + line
- self._lookahead = line
-
- def __iter__(self):
- return iter(self.next, None)
-
- def next(self):
- """Return the next record in the file"""
- line = self._lookahead
- if not line:
- return None
- assert line[0]==">", line
- lines = [line.rstrip()]
- line = self.handle.readline()
- while line:
- if line[0] == ">": break
- if line[0] == "#":
- if self._debug:
- print "Ignoring comment line"
- pass
- else:
- lines.append(line.rstrip())
- line = self.handle.readline()
- self._lookahead = line
- if self._debug:
- print "Debug: '%s'" % "".join(lines)
- if self._parser is None:
- return "\n".join(lines)
- else:
- return self._parser.parse_string("\n".join(lines))
-
-class RecordParser:
- """Parses FASTA sequence data into a Fasta.Record object.
- """
- def __init__(self, debug = 0):
- pass
-
- def parse_string(self, text):
- text = text.replace("\r\n","\n") #Crude way of dealing with \r\n
- assert text[0] == ">", text
- text = text.split("\n>",1)[0] # Only do the first record if more than one
- title, sequence = text.split("\n", 1)
- title = title[1:]
- rec = Record()
- rec.title = title
- rec.sequence = sequence.replace("\n","")
- return rec
-
- def parse(self, handle):
- return self.parse_string(handle.read())
-
-class SequenceParser:
- """Parses FASTA sequence data into a SeqRecord object.
- """
- def __init__(self, alphabet = Alphabet.generic_alphabet, title2ids = None,
- debug = 0):
- """Initialize a Scanner and Sequence Consumer.
-
- Arguments:
- o alphabet - The alphabet of the sequences to be parsed. If not
- passed, this will be set as generic_alphabet.
- o title2ids - A function that, when given the title of the FASTA
- file (without the beginning >), will return the id, name and
- description (in that order) for the record. If this is not given,
- then the entire title line will be used as the description.
- """
- self.alphabet = alphabet
- self.title2ids = title2ids
-
- def parse_string(self, text):
- text = text.replace("\r\n","\n") #Crude way of dealing with \r\n
- assert text[0] == ">", text
- text = text.split("\n>",1)[0] # Only do the first record if more than one
- title, sequence = text.split("\n", 1)
- title = title[1:]
-
- seq = Seq.Seq(sequence.replace("\n",""), self.alphabet)
- rec = SeqRecord.SeqRecord(seq)
-
- if self.title2ids:
- seq_id, name, descr = self.title2ids(title)
- rec.id = seq_id
- rec.name = name
- rec.description = descr
- else:
- rec.description = title
-
- return rec
-
- def parse(self, handle):
- return self.parse_string(handle.read())
View
5 DEPRECATED
@@ -179,8 +179,9 @@ which is suitable even for very large files.
Bio.Fasta (including Bio.Fasta.FastaAlign)
==========================================
-Declared obsolete in Release 1.48, and deprecated in Release 1.51.
-Please use the "fasta" support in Bio.SeqIO or Bio.AlignIO instead.
+Declared obsolete in Release 1.48, deprecated in Release 1.51, and removed
+in Release 1.55 final. Please use the "fasta" support in Bio.SeqIO or
+Bio.AlignIO instead.
Bio.Align.FormatConvert
=======================
View
2  Tests/test_BioSQL_SeqIO.py
@@ -52,7 +52,7 @@
("fasta", False, 'Fasta/loveliesbleeding.pro', 1),
("fasta", False, 'Fasta/rose.pro', 1),
("fasta", False, 'Fasta/rosemary.pro', 1),
-#Following examples are also used in test_Fasta.py
+#Following examples are also used in test_SeqIO.py
("fasta", False, 'Fasta/f001', 1), #Protein
("fasta", False, 'Fasta/f002', 3), #DNA
#("fasta", False, 'Fasta/f003', 2), #Protein with comments
View
158 Tests/test_Fasta.py
@@ -1,158 +0,0 @@
-# Copyright 1999 by Jeffrey Chang. All rights reserved.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-
-import os
-import sys
-import unittest
-from types import *
-
-import warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning)
-from Bio import Fasta
-warnings.resetwarnings()
-
-from Bio import SeqRecord
-from Bio import Seq
-from Bio import Alphabet
-from Bio.Alphabet import IUPAC
-
-
-class RecordTest(unittest.TestCase):
- def test_record_basic(self):
- """Basic test on Record
- """
- def pbool(b):
- if b:
- return 1
- return 0
-
- r = Fasta.Record()
- if sys.version_info[0] == 3:
- assert pbool(type(r.title) is str)
- assert pbool(type(r.sequence) is str)
- else:
- assert pbool(type(r.title) is StringType) # StringType
- assert pbool(type(r.sequence) is StringType) # StringType
-
-class ParserTest(unittest.TestCase):
- def setUp(self):
- files = ["f001", "f002"]
- self.handles = []
- for filename in files:
- self.handles.append(open(os.path.join("Fasta", filename)))
-
- self.lengths = {0 : (96, 79),
- 1 : (100, 633)}
-
- def tearDown(self):
- for handle in self.handles:
- handle.close()
-
- def test_record_parser(self):
- """Basic operation of the Record Parser.
- """
- parser = Fasta.RecordParser()
- for index in range(len(self.handles)):
- handle = self.handles[index]
- rec = parser.parse(handle)
- assert isinstance(rec, Fasta.Record)
- assert len(rec.title) == self.lengths[index][0]
- assert len(rec.sequence) == self.lengths[index][1]
-
- def test_sequence_parser(self):
- """Basic operation of the Sequence Parser.
- """
- parser = Fasta.SequenceParser()
- for index in range(len(self.handles)):
- handle = self.handles[index]
- rec = parser.parse(handle)
- assert isinstance(rec, SeqRecord.SeqRecord)
- assert isinstance(rec.seq, Seq.Seq)
- assert rec.seq.alphabet == Alphabet.generic_alphabet
- assert len(rec.seq) == self.lengths[index][1]
- assert len(rec.description) == self.lengths[index][0]
-
- def test_sequence_alphabet(self):
- """Setting the alphabet for the Sequence Parser.
- """
- parser = Fasta.SequenceParser(alphabet =
- IUPAC.unambiguous_dna)
- rec = parser.parse(self.handles[0])
- assert rec.seq.alphabet == IUPAC.unambiguous_dna
-
- def test_sequence_title_convert(self):
- """Test title conversion for the Sequence Parser.
- """
- def test_title2ids(title):
- return "id", "name", "description"
- parser = Fasta.SequenceParser(title2ids = test_title2ids)
- rec = parser.parse(self.handles[0])
- assert rec.id == "id"
- assert rec.name == "name"
- assert rec.description == "description"
-
-class IteratorTest(unittest.TestCase):
- def setUp(self):
- self.test_handle = open(os.path.join('Fasta', 'f002'))
-
- def tearDown(self):
- self.test_handle.close()
-
- def test_basic_iterator(self):
- """Ensure the Fasta iterator works returning text.
- """
- i = Fasta.Iterator(self.test_handle)
- rec_info = {0 : ">gi|1348912|gb|G26680|G26680",
- 1 : ">gi|1348917|gb|G26685|G26685",
- 2 : ">gi|1592936|gb|G29385|G29385"}
- for rec_num in range(3):
- rec = i.next()
- lines = rec.split("\n")
- title_part = lines[0].split()
- assert title_part[0] == rec_info[rec_num]
-
- # make sure we keep getting None when the iterator is done
- assert i.next() is None
- assert i.next() is None
-
- def test_new_iterator(self):
- """Ensure the Fasta iterator works like a Python 2.2 iterator.
- """
- n = 0
- iterator = Fasta.Iterator(self.test_handle)
- for rec in iter(iterator):
- n += 1
- assert n == 3
-
- def test_record_iterator(self):
- """Test the iterator with a Record Parser.
- """
- parser = Fasta.RecordParser()
- iterator = Fasta.Iterator(self.test_handle, parser)
- for rec in iter(iterator):
- assert isinstance(rec, Fasta.Record)
-
- def test_sequence_iterator(self):
- """Test the iterator with a Sequence Parser.
- """
- parser = Fasta.SequenceParser()
- iterator = Fasta.Iterator(self.test_handle, parser)
- for rec in iter(iterator):
- assert isinstance(rec, SeqRecord.SeqRecord)
-
- def test_parsing_comments(self):
- """Parse FASTA files with # style comment lines in them.
- """
- handle = open(os.path.join("Fasta", "f003"))
- iterator = Fasta.Iterator(handle, Fasta.RecordParser())
- num_recs = 0
- for rec in iter(iterator):
- num_recs += 1
- assert num_recs == 2
-
-
-if __name__ == "__main__":
- runner = unittest.TextTestRunner(verbosity = 2)
- unittest.main(testRunner=runner)
View
2  Tests/test_SeqIO.py
@@ -77,7 +77,7 @@ def send_warnings_to_stdout(message, category, filename, lineno,
("fasta", False, 'Fasta/loveliesbleeding.pro', 1),
("fasta", False, 'Fasta/rose.pro', 1),
("fasta", False, 'Fasta/rosemary.pro', 1),
-#Following examples are also used in test_Fasta.py
+#Following examples are also used in test_BioSQL_SeqIO.py
("fasta", False, 'Fasta/f001', 1), #Protein
("fasta", False, 'Fasta/f002', 3), #DNA
#("fasta", False, 'Fasta/f003', 2), #Protein with comments
View
1  setup.py
@@ -225,7 +225,6 @@ def is_Numpy_installed():
'Bio.Entrez',
'Bio.Enzyme',
'Bio.ExPASy',
- 'Bio.Fasta',
'Bio.FSSP',
'Bio.GA',
'Bio.GA.Crossover',
Please sign in to comment.
Something went wrong with that request. Please try again.