Merge branch 'master' into alignio-maf

biopython · Apr 3, 2015 · 02120d1 · 02120d1
2 parents 39cf674 + b31c789
commit 02120d1
Show file tree

Hide file tree

Showing 21 changed files with 6,417 additions and 127 deletions.
diff --git a/Bio/PDB/Chain.py b/Bio/PDB/Chain.py
@@ -129,6 +129,6 @@ def get_residues(self):
             yield r
 
     def get_atoms(self):
-        for r in self:
+        for r in self.get_residues():
             for a in r:
                 yield a
diff --git a/Bio/PDB/MMCIF2Dict.py b/Bio/PDB/MMCIF2Dict.py
@@ -21,6 +21,8 @@ def __init__(self, filename):
             tokens = self._tokenize(handle)
             token = next(tokens)
             self[token[0:5]] = token[5:]
+            i = 0
+            n = 0
             for token in tokens:
                 if token == "loop_":
                     loop_flag = True
@@ -47,6 +49,8 @@ def __init__(self, filename):
                     self[key] = token
                     key = None
 
+    # Private methods
+
     def _tokenize(self, handle):
         for line in handle:
             if line.startswith("#"):

diff --git a/Bio/PDB/MMCIFParser.py b/Bio/PDB/MMCIFParser.py
@@ -10,21 +10,53 @@
 from string import ascii_letters
 
 import numpy
+import warnings
 
 from Bio._py3k import range
 
 from Bio.PDB.MMCIF2Dict import MMCIF2Dict
 from Bio.PDB.StructureBuilder import StructureBuilder
 from Bio.PDB.PDBExceptions import PDBConstructionException
+from Bio.PDB.PDBExceptions import PDBConstructionWarning
 
 
 class MMCIFParser(object):
+    """Parse a PDB file and return a Structure object."""
+
+    def __init__(self, structure_builder=None, QUIET=False):
+        """Create a PDBParser object.
+        The PDB parser call a number of standard methods in an aggregated
+        StructureBuilder object. Normally this object is instanciated by the
+        MMCIParser object itself, but if the user provides his/her own
+        StructureBuilder object, the latter is used instead.
+        Arguments:
+         - structure_builder - an optional user implemented StructureBuilder class.
+         - QUIET - Evaluated as a Boolean. If true, warnings issued in constructing
+           the SMCRA data will be suppressed. If false (DEFAULT), they will be shown.
+           These warnings might be indicative of problems in the PDB file!
+        """
+        if structure_builder is not None:
+            self._structure_builder = structure_builder
+        else:
+            self._structure_builder = StructureBuilder()
+        # self.header = None
+        # self.trailer = None
+        self.line_counter = 0
+        self.build_structure = None
+        self.QUIET = bool(QUIET)
+
+    # Public methods
+
     def get_structure(self, structure_id, filename):
+        with warnings.catch_warnings():
+            if self.QUIET:
+                warnings.filterwarnings("ignore", category=PDBConstructionWarning)
         self._mmcif_dict = MMCIF2Dict(filename)
-        self._structure_builder = StructureBuilder()
         self._build_structure(structure_id)
         return self._structure_builder.get_structure()
 
+    # Private methods
+
     def _build_structure(self, structure_id):
         mmcif_dict = self._mmcif_dict
         atom_id_list = mmcif_dict["_atom_site.label_atom_id"]
@@ -39,6 +71,7 @@ def _build_structure(self, structure_id):
         y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]]
         z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]]
         alt_list = mmcif_dict["_atom_site.label_alt_id"]
+        icode_list = mmcif_dict["_atom_site.pdbx_PDB_ins_code"]
         b_factor_list = mmcif_dict["_atom_site.B_iso_or_equiv"]
         occupancy_list = mmcif_dict["_atom_site.occupancy"]
         fieldname_list = mmcif_dict["_atom_site.group_PDB"]
@@ -75,9 +108,15 @@ def _build_structure(self, structure_id):
         structure_builder.init_seg(" ")
         # Historically, Biopython PDB parser uses model_id to mean array index
         # so serial_id means the Model ID specified in the file
-        current_model_id = 0
+        current_model_id = -1
         current_serial_id = 0
         for i in range(0, len(atom_id_list)):
+
+            # set the line_counter for 'ATOM' lines only and not
+            # as a global line counter found in the PDBParser()
+            # this number should match the '_atom_site.id' index in the MMCIF
+            structure_builder.set_line_counter(i)
+
             x = x_list[i]
             y = y_list[i]
             z = z_list[i]
@@ -87,6 +126,9 @@ def _build_structure(self, structure_id):
             if altloc == ".":
                 altloc = " "
             resseq = seq_id_list[i]
+            icode = icode_list[i]
+            if icode == "?":
+                icode = " "
             name = atom_id_list[i]
             # occupancy & B factor
             try:
@@ -108,23 +150,23 @@ def _build_structure(self, structure_id):
                 if current_serial_id != serial_id:
                     # if serial changes, update it and start new model
                     current_serial_id = serial_id
-                    structure_builder.init_model(current_model_id, current_serial_id)
                     current_model_id += 1
+                    structure_builder.init_model(current_model_id, current_serial_id)
+                    current_chain_id = None
+                    current_residue_id = None
             else:
                 # no explicit model column; initialize single model
                 structure_builder.init_model(current_model_id)
+
             if current_chain_id != chainid:
                 current_chain_id = chainid
                 structure_builder.init_chain(current_chain_id)
+
+            if current_residue_id != resseq:
                 current_residue_id = resseq
-                icode, int_resseq = self._get_icode(resseq)
-                structure_builder.init_residue(resname, hetatm_flag, int_resseq,
-                    icode)
-            elif current_residue_id != resseq:
-                current_residue_id = resseq
-                icode, int_resseq = self._get_icode(resseq)
-                structure_builder.init_residue(resname, hetatm_flag, int_resseq,
-                    icode)
+                int_resseq = int(resseq)
+                structure_builder.init_residue(resname, hetatm_flag, int_resseq, icode)
+
             coord = numpy.array((x, y, z), 'f')
             element = element_list[i] if element_list else None
             structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc,
@@ -152,18 +194,6 @@ def _build_structure(self, structure_id):
         except:
             pass    # no cell found, so just ignore
 
-    def _get_icode(self, resseq):
-        """Tries to return the icode. In MMCIF files this is just part of
-        resseq! In PDB files, it's a separate field."""
-        last_resseq_char = resseq[-1]
-        if last_resseq_char in ascii_letters:
-            icode = last_resseq_char
-            int_resseq = int(resseq[0:-1])
-        else:
-            icode = " "
-            int_resseq = int(resseq)
-        return icode, int_resseq
-
 
 if __name__ == "__main__":
     import sys

diff --git a/Bio/PDB/Model.py b/Bio/PDB/Model.py
@@ -58,8 +58,12 @@ def __repr__(self):
 
     # Public
 
-    def get_residues(self):
+    def get_chains(self):
         for c in self:
+            yield c
+
+    def get_residues(self):
+        for c in self.get_chains():
             for r in c:
                 yield r
 

diff --git a/Bio/PDB/Residue.py b/Bio/PDB/Residue.py
@@ -112,6 +112,10 @@ def get_unpacked_list(self):
     def get_segid(self):
         return self.segid
 
+    def get_atom(self):
+        for a in self:
+            yield a
+
 
 class DisorderedResidue(DisorderedEntityWrapper):
     """

diff --git a/Bio/PDB/Structure.py b/Bio/PDB/Structure.py
@@ -37,8 +37,12 @@ def _sort(self, m1, m2):
 
     # Public
 
-    def get_chains(self):
+    def get_models(self):
         for m in self:
+            yield m
+
+    def get_chains(self):
+        for m in self.get_models():
             for c in m:
                 yield c
 

diff --git a/Bio/PDB/StructureBuilder.py b/Bio/PDB/StructureBuilder.py
@@ -156,9 +156,8 @@ def init_residue(self, resname, field, resseq, icode):
                     disordered_residue.disordered_add(new_residue)
                     self.residue = disordered_residue
                     return
-        residue = Residue(res_id, resname, self.segid)
-        self.chain.add(residue)
-        self.residue = residue
+        self.residue = Residue(res_id, resname, self.segid)
+        self.chain.add(self.residue)
 
     def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
                   serial_number=None, element=None):
@@ -196,15 +195,15 @@ def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
                                   % (duplicate_fullname, fullname,
                                      self.line_counter),
                                   PDBConstructionWarning)
-        atom = self.atom = Atom(name, coord, b_factor, occupancy, altloc,
-                                fullname, serial_number, element)
+        self.atom = Atom(name, coord, b_factor, occupancy, altloc,
+                         fullname, serial_number, element)
         if altloc != " ":
             # The atom is disordered
             if residue.has_id(name):
                 # Residue already contains this atom
                 duplicate_atom = residue[name]
                 if duplicate_atom.is_disordered() == 2:
-                    duplicate_atom.disordered_add(atom)
+                    duplicate_atom.disordered_add(self.atom)
                 else:
                     # This is an error in the PDB file:
                     # a disordered atom is found with a blank altloc
@@ -214,7 +213,7 @@ def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
                     residue.detach_child(name)
                     disordered_atom = DisorderedAtom(name)
                     residue.add(disordered_atom)
-                    disordered_atom.disordered_add(atom)
+                    disordered_atom.disordered_add(self.atom)
                     disordered_atom.disordered_add(duplicate_atom)
                     residue.flag_disordered()
                     warnings.warn("WARNING: disordered atom found "
@@ -228,11 +227,11 @@ def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
                 residue.add(disordered_atom)
                 # Add the real atom to the disordered atom, and the
                 # disordered atom to the residue
-                disordered_atom.disordered_add(atom)
+                disordered_atom.disordered_add(self.atom)
                 residue.flag_disordered()
         else:
             # The atom is not disordered
-            residue.add(atom)
+            residue.add(self.atom)
 
     def set_anisou(self, anisou_array):
         "Set anisotropic B factor of current Atom."

diff --git a/Bio/PopGen/SimCoal/Controller.py b/Bio/PopGen/SimCoal/Controller.py
@@ -171,11 +171,11 @@ def __init__(self, fastsimcoal_dir=None, cmd='fastsimcoal', **kwargs):
 
 
 class FastSimCoalController(object):
-    def __init__(self, fastsimcoal_dir=None, bin_name="fsc251"):
+    def __init__(self, fastsimcoal_dir=None, bin_name="fsc252"):
         """Initializes the controller.
 
         fastsimcoal_dir is the directory where fastsimcoal is.
-        By default the binary should be called fsc251.
+        By default the binary should be called fsc252.
         bin_name specifies a different name for the binary.
 
         The initializer checks for existence and executability of binaries

diff --git a/Bio/SeqFeature.py b/Bio/SeqFeature.py
@@ -1,6 +1,6 @@
 # Copyright 2000-2003 Jeff Chang.
 # Copyright 2001-2008 Brad Chapman.
-# Copyright 2005-2012 by Peter Cock.
+# Copyright 2005-2015 by Peter Cock.
 # Copyright 2006-2009 Michiel de Hoon.
 # All rights reserved.
 # This code is part of the Biopython distribution and governed by its
@@ -34,10 +34,9 @@
 Specify locations of a feature on a Sequence
 --------------------------------------------
 
-This aims to handle, in Ewan's words, 'the dreaded fuzziness issue' in
-much the same way as Biocorba. This has the advantages of allowing us
-to handle fuzzy stuff in case anyone needs it, and also be compatible
-with Biocorba.
+This aims to handle, in Ewan Birney's words, 'the dreaded fuzziness issue'.
+This has the advantages of allowing us to handle fuzzy stuff in case anyone
+needs it, and also be compatible with BioPerl etc and BioSQL.
 
 classes: