*** empty log message ***

biopython · May 14, 2004 · 5c5725c · 5c5725c
1 parent 64b31ad
commit 5c5725c
Show file tree

Hide file tree

Showing 5 changed files with 347 additions and 10 deletions.
diff --git a/Bio/PDB/Atom.py b/Bio/PDB/Atom.py
@@ -71,6 +71,9 @@ def __sub__(self, other):
         
         Example:
             >>> distance=atom1-atom2
+
+        @param other: the other atom
+        @type other: L{Atom}
         """
         diff=self.coord-other.coord
         return sqrt(sum(diff*diff))

diff --git a/Bio/PDB/FragmentMapper.py b/Bio/PDB/FragmentMapper.py
@@ -0,0 +1,304 @@
+from Numeric import array, Float0, zeros
+import os
+
+from Bio.SVDSuperimposer import SVDSuperimposer
+from Bio.PDB import *
+
+__doc__="""
+Classify protein backbone structure according to Kolodny et al's fragment
+libraries. It can be regarded as a form of objective secondary structure
+classification. Only fragments of length 5 or 7 are supported (ie. there is a
+'central' residue).
+
+Full reference:
+
+Kolodny R, Koehl P, Guibas L, Levitt M.
+Small libraries of protein fragments model native protein structures accurately.
+J Mol Biol. 2002 323(2):297-307.
+
+The definition files of the fragments can be obtained from:
+
+http://csb.stanford.edu/rachel/fragments/
+
+You need these files to use this module.
+
+The following example uses the library with 10 fragments of length 5.
+The library files can be found in directory 'fragment_data'.
+
+    >>> model=structure[0]
+    >>> fm=FragmentMapper(lsize=10, flength=5, dir="fragment_data")
+    >>> fm.map(model)
+    >>> fragment=fm[residue]
+"""
+
+
+# fragment file (lib_SIZE_z_LENGTH.txt)
+# SIZE=number of fragments
+# LENGTH=length of fragment (4,5,6,7)
+_FRAGMENT_FILE="lib_%s_z_%s.txt"
+
+
+def _read_fragments(size, length, dir="."):
+    """
+    Read a fragment spec file (available from 
+    U{http://csb.stanford.edu/rachel/fragments/} 
+    and return a list of Fragment objects.
+
+    @param size: number of fragments in the library
+    @type size: int
+
+    @param length: length of the fragments
+    @type length: int
+
+    @param dir: directory where the fragment files can be found
+    @type dir: string
+    """
+    filename=(dir+"/"+_FRAGMENT_FILE) % (size, length)
+    fp=open(filename, "r")
+    flist=[]
+    fid=0
+    for l in fp.readlines():
+                # skip comment and blank lines
+        if l[0]=="*" or l[0]=="\n":
+            continue
+        sl=l.split()
+        if sl[1]=="------":
+            # Start of fragment definition
+            f=Fragment(length, fid)
+            flist.append(f)
+            fid+=1
+            continue
+        # Add CA coord to Fragment
+        coord=array(map(float, sl[0:3]))
+        f.add_residue("XXX", coord)
+    fp.close()
+    return flist
+
+
+class Fragment:
+    """
+    Represent a polypeptide C-alpha fragment.
+    """
+    def __init__(self, length, fid):
+        """
+        @param length: length of the fragment
+        @type length: int
+
+        @param fid: id for the fragment
+        @type fid: int
+        """
+        # nr of residues in fragment
+        self.length=length
+        # nr of residues added
+        self.counter=0
+        self.resname_list=[]
+        # CA coordinate matrix
+        self.coords_ca=zeros((length, 3), "d")
+        self.fid=fid
+
+    def get_id(self):
+        """
+        @return: id for the fragment
+        @rtype: int
+        """
+        return self.fid
+
+    def get_coords(self):
+        """
+        @return: the CA coords in the fragment
+        @rtype: Numpy (Nx3) array
+        """
+        return self.coords_ca
+
+    def add_residue(self, resname, ca_coord):
+        """
+        @param resname: residue name (eg. GLY).
+        @type resname: string
+
+        @param ca_coord: the c-alpha coorinates of the residues
+        @type ca_coord: Numpy array with length 3
+        """
+        if self.counter>=self.length:
+            raise PDBException, "Fragment boundary exceeded."
+        self.resname_list.append(resname)
+        self.coords_ca[self.counter]=ca_coord
+        self.counter=self.counter+1
+
+    def __len__(self):
+        """
+        @return: length of fragment
+        @rtype: int
+        """
+        return self.length
+
+    def __sub__(self, other):
+        """
+        Return rmsd between two fragments.
+
+        Example:
+            >>> rmsd=fragment1-fragment2
+
+        @return: rmsd between fragments
+        @rtype: float
+        """
+        sup=SVDSuperimposer()
+        sup.set(self.coords_ca, other.coords_ca)
+        sup.run()
+        return sup.get_rms()
+
+    def __repr__(self):
+        """
+        Returns <Fragment length=L id=ID> where L=length of fragment
+        and ID the identifier (rank in the library).
+        """
+        return "<Fragment length=%i id=%i>" % (self.length, self.fid)
+
+
+def _make_fragment_list(pp, length):
+    """
+    Dice up a peptide in fragments of length "length".
+
+    @param pp: a list of residues (part of one peptide)
+    @type pp: [L{Residue}, L{Residue}, ...]
+
+    @param length: fragment length
+    @type length: int
+    """
+    frag_list=[]
+    for i in range(0, len(pp)-length+1):
+        f=Fragment(length, -1)
+        for j in range(0, length):
+            residue=pp[i+j]
+            resname=residue.get_resname()
+            if residue.has_id("CA"):
+                ca=residue["CA"]
+            else:
+                raise "CHAINBREAK"
+            if ca.is_disordered():
+                raise "CHAINBREAK"
+            ca_coord=ca.get_coord()
+            f.add_residue(resname, ca_coord)
+        frag_list.append(f)
+    return frag_list
+
+
+def _map_fragment_list(flist, reflist):
+    """
+    Map all frgaments in flist to the closest
+    (in RMSD) fragment in reflist.
+
+    Returns a list of reflist indices.
+
+    @param flist: list of protein fragments
+    @type flist: [L{Fragment}, L{Fragment}, ...]
+
+    @param reflist: list of reference (ie. library) fragments
+    @type reflist: [L{Fragment}, L{Fragment}, ...]
+    """
+    mapped=[]
+    for f in flist:
+        rank=[]
+        for i in range(0, len(reflist)):
+            rf=reflist[i]
+            rms=f-rf
+            rank.append((rms, rf))
+        rank.sort()
+        fragment=rank[0][1]
+        mapped.append(fragment)
+    return mapped
+
+
+class FragmentMapper:
+    """
+    Map polypeptides in a model to lists of representative fragments.
+    """
+    def __init__(self, lsize, flength=5, fdir="."):
+        """
+        @param lsize: number of fragments in the library
+        @type lsize: int
+
+        @param flength: length of fragments in the library
+        @type flength: int
+
+        @param fdir: directory where teh definition files are
+        @type fdir: string
+        """
+        if flength==5:
+            self.edge=2
+        elif flength==7:
+            self.edge=3
+        else:
+            raise PDBException, "Fragment length should be 5 or 7."
+        self.flength=flength
+        self.lsize=lsize
+        self.reflist=_read_fragments(lsize, flength, fdir)
+
+    def map(self, model):
+        """
+        @param model: the model that will be mapped
+        @type model: L{Model}
+        """
+        ppb=PPBuilder()
+        ppl=ppb.build_peptides(model)
+        fd={}
+        for pp in ppl:
+            try:
+                # make fragments
+                flist=_make_fragment_list(pp, self.flength)
+                # classify fragments
+                mflist=_map_fragment_list(flist, self.reflist)
+                for i in range(0, len(pp)):
+                    res=pp[i]
+                    if i<self.edge:
+                        # start residues
+                        continue
+                    elif i>=(len(pp)-self.edge):
+                        # end residues
+                        continue
+                    else:
+                        # fragment
+                        index=i-self.edge
+                        assert(index>=0)
+                        fd[res]=mflist[index]
+            except "CHAINBREAK":
+                # Funny polypeptide - skip
+                pass
+        self.fd=fd
+
+    def has_key(self, res):
+        """
+        @type res: L{Residue}
+        """
+        return self.fd.has_key(r)
+
+    def __getitem__(self, res):
+        """
+        @type res: L{Residue}
+
+        @return: fragment classification
+        @rtype: L{Fragment}
+        """
+        return self.fd[r]
+
+
+if __name__=="__main__":
+
+    import sys
+
+    p=PDBParser()
+    s=p.get_structure("X", sys.argv[1])
+
+    fm=FragmentMapper(10, 5, "levitt_data")
+
+    m=s[0]
+
+    fm.map(m)
+
+    for r in Selection.unfold_entities(m, "R"):
+
+        print r,
+        if fm.has_key(r):
+            print fm[r]
+        else:
+            print
+
diff --git a/Bio/PDB/HSExposure.py b/Bio/PDB/HSExposure.py
@@ -15,7 +15,7 @@ class HSExposure:
     # unit vector along z
     # CA-CB vectors or the CA-CA-CA approximation will
     # be rotated onto this unit vector
-    unit_z=Vector(0.0, 0.0, 1.0)
+    _unit_z=Vector(0.0, 0.0, 1.0)
 
     def __init__(self, OFFSET=0.0):
         # List of CA-CB direction calculated from CA-CA-CA
@@ -127,7 +127,7 @@ def _get_rotran_list_from_cb(self, residue_list):
                 # CB-CA vector
                 cb_ca_v=cb_v-ca_v
                 # Rotate CB-CA vector to unit vector along Z
-                rotation=rotmat(cb_ca_v, self.unit_z)
+                rotation=rotmat(cb_ca_v, self._unit_z)
                 translation=ca.get_coord()
                 rotran_list.append((translation, rotation, ca, residue))
         return rotran_list
@@ -168,7 +168,7 @@ def _get_rotran_list_from_ca(self, model):
                 # pseudo cb centred at origin
                 cb_v=self._get_cb_from_ca(ca_v1, ca_v2, ca_v3)
                 # rotate cb to unit vector along z
-                rotation=rotmat(cb_v, self.unit_z)
+                rotation=rotmat(cb_v, self._unit_z)
                 translation=ca2.get_coord()
                 rotran_list.append((translation, rotation, ca2, r))
                 # Calculate angle between pseudo-CB-CA and CA-CB