From 5c5725cc6485d6c34a40d3369e19e2b2494591de Mon Sep 17 00:00:00 2001
From: nisse <nisse>
Date: Fri, 14 May 2004 09:52:12 +0000
Subject: [PATCH] *** empty log message ***

---
 Bio/PDB/Atom.py           |   3 +
 Bio/PDB/FragmentMapper.py | 304 ++++++++++++++++++++++++++++++++++++++
 Bio/PDB/HSExposure.py     |   6 +-
 Bio/PDB/Polypeptide.py    |  41 ++++-
 Bio/PDB/__init__.py       |   3 +
 5 files changed, 347 insertions(+), 10 deletions(-)
 create mode 100644 Bio/PDB/FragmentMapper.py
diff --git a/Bio/PDB/Atom.py b/Bio/PDB/Atom.py
index 855c9c752f2..3a6271c7c75 100644
--- a/Bio/PDB/Atom.py
+++ b/Bio/PDB/Atom.py
@@ -71,6 +71,9 @@ def __sub__(self, other):
         
         Example:
             >>> distance=atom1-atom2
+
+        @param other: the other atom
+        @type other: L{Atom}
         """
         diff=self.coord-other.coord
         return sqrt(sum(diff*diff))
diff --git a/Bio/PDB/FragmentMapper.py b/Bio/PDB/FragmentMapper.py
new file mode 100644
index 00000000000..02e67b8434d
--- /dev/null
+++ b/Bio/PDB/FragmentMapper.py
@@ -0,0 +1,304 @@
+from Numeric import array, Float0, zeros
+import os
+
+from Bio.SVDSuperimposer import SVDSuperimposer
+from Bio.PDB import *
+
+__doc__="""
+Classify protein backbone structure according to Kolodny et al's fragment
+libraries. It can be regarded as a form of objective secondary structure
+classification. Only fragments of length 5 or 7 are supported (ie. there is a
+'central' residue).
+
+Full reference:
+
+Kolodny R, Koehl P, Guibas L, Levitt M.
+Small libraries of protein fragments model native protein structures accurately.
+J Mol Biol. 2002 323(2):297-307.
+
+The definition files of the fragments can be obtained from:
+
+http://csb.stanford.edu/rachel/fragments/
+
+You need these files to use this module.
+
+The following example uses the library with 10 fragments of length 5.
+The library files can be found in directory 'fragment_data'.
+
+    >>> model=structure[0]
+    >>> fm=FragmentMapper(lsize=10, flength=5, dir="fragment_data")
+    >>> fm.map(model)
+    >>> fragment=fm[residue]
+"""
+
+
+# fragment file (lib_SIZE_z_LENGTH.txt)
+# SIZE=number of fragments
+# LENGTH=length of fragment (4,5,6,7)
+_FRAGMENT_FILE="lib_%s_z_%s.txt"
+
+
+def _read_fragments(size, length, dir="."):
+    """
+    Read a fragment spec file (available from 
+    U{http://csb.stanford.edu/rachel/fragments/} 
+    and return a list of Fragment objects.
+
+    @param size: number of fragments in the library
+    @type size: int
+
+    @param length: length of the fragments
+    @type length: int
+
+    @param dir: directory where the fragment files can be found
+    @type dir: string
+    """
+    filename=(dir+"/"+_FRAGMENT_FILE) % (size, length)
+    fp=open(filename, "r")
+    flist=[]
+    fid=0
+    for l in fp.readlines():
+                # skip comment and blank lines
+        if l[0]=="*" or l[0]=="\n":
+            continue
+        sl=l.split()
+        if sl[1]=="------":
+            # Start of fragment definition
+            f=Fragment(length, fid)
+            flist.append(f)
+            fid+=1
+            continue
+        # Add CA coord to Fragment
+        coord=array(map(float, sl[0:3]))
+        f.add_residue("XXX", coord)
+    fp.close()
+    return flist
+
+
+class Fragment:
+    """
+    Represent a polypeptide C-alpha fragment.
+    """
+    def __init__(self, length, fid):
+        """
+        @param length: length of the fragment
+        @type length: int
+
+        @param fid: id for the fragment
+        @type fid: int
+        """
+        # nr of residues in fragment
+        self.length=length
+        # nr of residues added
+        self.counter=0
+        self.resname_list=[]
+        # CA coordinate matrix
+        self.coords_ca=zeros((length, 3), "d")
+        self.fid=fid
+
+    def get_id(self):
+        """
+        @return: id for the fragment
+        @rtype: int
+        """
+        return self.fid
+
+    def get_coords(self):
+        """
+        @return: the CA coords in the fragment
+        @rtype: Numpy (Nx3) array
+        """
+        return self.coords_ca
+
+    def add_residue(self, resname, ca_coord):
+        """
+        @param resname: residue name (eg. GLY).
+        @type resname: string
+
+        @param ca_coord: the c-alpha coorinates of the residues
+        @type ca_coord: Numpy array with length 3
+        """
+        if self.counter>=self.length:
+            raise PDBException, "Fragment boundary exceeded."
+        self.resname_list.append(resname)
+        self.coords_ca[self.counter]=ca_coord
+        self.counter=self.counter+1
+
+    def __len__(self):
+        """
+        @return: length of fragment
+        @rtype: int
+        """
+        return self.length
+
+    def __sub__(self, other):
+        """
+        Return rmsd between two fragments.
+
+        Example:
+            >>> rmsd=fragment1-fragment2
+
+        @return: rmsd between fragments
+        @rtype: float
+        """
+        sup=SVDSuperimposer()
+        sup.set(self.coords_ca, other.coords_ca)
+        sup.run()
+        return sup.get_rms()
+
+    def __repr__(self):
+        """
+        Returns <Fragment length=L id=ID> where L=length of fragment
+        and ID the identifier (rank in the library).
+        """
+        return "<Fragment length=%i id=%i>" % (self.length, self.fid)
+
+
+def _make_fragment_list(pp, length):
+    """
+    Dice up a peptide in fragments of length "length".
+
+    @param pp: a list of residues (part of one peptide)
+    @type pp: [L{Residue}, L{Residue}, ...]
+
+    @param length: fragment length
+    @type length: int
+    """
+    frag_list=[]
+    for i in range(0, len(pp)-length+1):
+        f=Fragment(length, -1)
+        for j in range(0, length):
+            residue=pp[i+j]
+            resname=residue.get_resname()
+            if residue.has_id("CA"):
+                ca=residue["CA"]
+            else:
+                raise "CHAINBREAK"
+            if ca.is_disordered():
+                raise "CHAINBREAK"
+            ca_coord=ca.get_coord()
+            f.add_residue(resname, ca_coord)
+        frag_list.append(f)
+    return frag_list
+
+
+def _map_fragment_list(flist, reflist):
+    """
+    Map all frgaments in flist to the closest
+    (in RMSD) fragment in reflist.
+
+    Returns a list of reflist indices.
+
+    @param flist: list of protein fragments
+    @type flist: [L{Fragment}, L{Fragment}, ...]
+
+    @param reflist: list of reference (ie. library) fragments
+    @type reflist: [L{Fragment}, L{Fragment}, ...]
+    """
+    mapped=[]
+    for f in flist:
+        rank=[]
+        for i in range(0, len(reflist)):
+            rf=reflist[i]
+            rms=f-rf
+            rank.append((rms, rf))
+        rank.sort()
+        fragment=rank[0][1]
+        mapped.append(fragment)
+    return mapped
+
+
+class FragmentMapper:
+    """
+    Map polypeptides in a model to lists of representative fragments.
+    """
+    def __init__(self, lsize, flength=5, fdir="."):
+        """
+        @param lsize: number of fragments in the library
+        @type lsize: int
+
+        @param flength: length of fragments in the library
+        @type flength: int
+
+        @param fdir: directory where teh definition files are
+        @type fdir: string
+        """
+        if flength==5:
+            self.edge=2
+        elif flength==7:
+            self.edge=3
+        else:
+            raise PDBException, "Fragment length should be 5 or 7."
+        self.flength=flength
+        self.lsize=lsize
+        self.reflist=_read_fragments(lsize, flength, fdir)
+
+    def map(self, model):
+        """
+        @param model: the model that will be mapped
+        @type model: L{Model}
+        """
+        ppb=PPBuilder()
+        ppl=ppb.build_peptides(model)
+        fd={}
+        for pp in ppl:
+            try:
+                # make fragments
+                flist=_make_fragment_list(pp, self.flength)
+                # classify fragments
+                mflist=_map_fragment_list(flist, self.reflist)
+                for i in range(0, len(pp)):
+                    res=pp[i]
+                    if i<self.edge:
+                        # start residues
+                        continue
+                    elif i>=(len(pp)-self.edge):
+                        # end residues
+                        continue
+                    else:
+                        # fragment
+                        index=i-self.edge
+                        assert(index>=0)
+                        fd[res]=mflist[index]
+            except "CHAINBREAK":
+                # Funny polypeptide - skip
+                pass
+        self.fd=fd
+
+    def has_key(self, res):
+        """
+        @type res: L{Residue}
+        """
+        return self.fd.has_key(r)
+
+    def __getitem__(self, res):
+        """
+        @type res: L{Residue}
+
+        @return: fragment classification
+        @rtype: L{Fragment}
+        """
+        return self.fd[r]
+
+
+if __name__=="__main__":
+
+    import sys
+
+    p=PDBParser()
+    s=p.get_structure("X", sys.argv[1])
+
+    fm=FragmentMapper(10, 5, "levitt_data")
+
+    m=s[0]
+
+    fm.map(m)
+
+    for r in Selection.unfold_entities(m, "R"):
+
+        print r,
+        if fm.has_key(r):
+            print fm[r]
+        else:
+            print
+
diff --git a/Bio/PDB/HSExposure.py b/Bio/PDB/HSExposure.py
index 09be61e1c1d..efb5c64d94b 100644
--- a/Bio/PDB/HSExposure.py
+++ b/Bio/PDB/HSExposure.py
@@ -15,7 +15,7 @@ class HSExposure:
     # unit vector along z
     # CA-CB vectors or the CA-CA-CA approximation will
     # be rotated onto this unit vector
-    unit_z=Vector(0.0, 0.0, 1.0)
+    _unit_z=Vector(0.0, 0.0, 1.0)
 
     def __init__(self, OFFSET=0.0):
         # List of CA-CB direction calculated from CA-CA-CA
@@ -127,7 +127,7 @@ def _get_rotran_list_from_cb(self, residue_list):
                 # CB-CA vector
                 cb_ca_v=cb_v-ca_v
                 # Rotate CB-CA vector to unit vector along Z
-                rotation=rotmat(cb_ca_v, self.unit_z)
+                rotation=rotmat(cb_ca_v, self._unit_z)
                 translation=ca.get_coord()
                 rotran_list.append((translation, rotation, ca, residue))
         return rotran_list
@@ -168,7 +168,7 @@ def _get_rotran_list_from_ca(self, model):
                 # pseudo cb centred at origin
                 cb_v=self._get_cb_from_ca(ca_v1, ca_v2, ca_v3)
                 # rotate cb to unit vector along z
-                rotation=rotmat(cb_v, self.unit_z)
+                rotation=rotmat(cb_v, self._unit_z)
                 translation=ca2.get_coord()
                 rotran_list.append((translation, rotation, ca2, r))
                 # Calculate angle between pseudo-CB-CA and CA-CB
diff --git a/Bio/PDB/Polypeptide.py b/Bio/PDB/Polypeptide.py
index 1fa99b9b091..5db56a824f9 100644
--- a/Bio/PDB/Polypeptide.py
+++ b/Bio/PDB/Polypeptide.py
@@ -7,13 +7,22 @@
 from Bio.PDB.PDBExceptions import PDBException
 from Bio.PDB.Residue import Residue, DisorderedResidue
 
-__doc__="Polypeptide related classes (construction and representation)."
+__doc__="""
+Polypeptide related classes (construction and representation).
+
+Example:
+
+    >>> ppb=PPBuilder()
+    >>> for pp in ppb.build_peptides(structure):
+    >>>     print pp.get_sequence()
+"""
 
 def is_aa(residue):
     """
     Return 1 if residue object/string is an amino acid.
 
-    residue --- a residue object OR a three letter amino acid code
+    @param residue: a L{Residue} object OR a three letter amino acid code
+    @type residue: L{Residue} or string
     """
     if not type(residue)==StringType:
         residue=residue.get_resname()
@@ -23,11 +32,12 @@ def is_aa(residue):
 
 class Polypeptide(list):
     """
-    A polypeptide is simply a list of Residue objects.
+    A polypeptide is simply a list of L{Residue} objects.
     """
     def get_ca_list(self):
         """
-        Get the list of CA atoms.
+        @return: the list of C-alpha atoms
+        @rtype: [L{Atom}, L{Atom}, ...]
         """
         ca_list=[]
         for res in self:
@@ -36,7 +46,12 @@ def get_ca_list(self):
         return ca_list
 
     def get_sequence(self):
-        "Return the AA sequence."
+        """
+        Return the AA sequence.
+
+        @return: polypeptide sequence 
+        @rtype: L{Seq}
+        """
         s=""
         for res in self:
             resname=res.get_resname()
@@ -49,6 +64,10 @@ def get_sequence(self):
         return seq
 
     def __repr__(self):
+        """
+        Return <Polypeptide start=START end=END>, where START
+        and END are sequence identifiers of the outer residues.
+        """
         start=self[0].get_id()[1]
         end=self[-1].get_id()[1]
         s="<Polypeptide start=%s end=%s>" % (start, end)
@@ -62,6 +81,10 @@ class _PPBuilder:
     subclass.
     """
     def __init__(self, radius):
+        """
+        @param radius: distance
+        @type radius: float
+        """
         self.radius=radius
 
     def _accept(self, residue):
@@ -75,8 +98,12 @@ def _accept(self, residue):
     def build_peptides(self, entity, aa_only=1):
         """
         Build and return a list of Polypeptide objects.
-        model_id --- only this model is examined
-        aa_only --- if 1, the residue name needs to be standard AA
+
+        @param entity: polypeptides are searched for in this object
+        @type entity: L{Structure}, L{Model} or L{Chain}
+
+        @param aa_only: if 1, the residue needs to be a standard AA
+        @type aa_only: int
         """
         is_connected=self._is_connected
         accept=self._accept
diff --git a/Bio/PDB/__init__.py b/Bio/PDB/__init__.py
index 42f9714b993..98ea7ae3d22 100644
--- a/Bio/PDB/__init__.py
+++ b/Bio/PDB/__init__.py
@@ -50,6 +50,9 @@
 # My new solvent exposure method - article coming up
 from HSExposure import HSExposure
 
+# Kolodny et al.'s backbone libraries
+from FragmentMapper import FragmentMapper
+
 # Fast atom neighbor search
 # Depends on KDTree C++ module
 try: