From 5c5725cc6485d6c34a40d3369e19e2b2494591de Mon Sep 17 00:00:00 2001 From: nisse Date: Fri, 14 May 2004 09:52:12 +0000 Subject: [PATCH] *** empty log message *** --- Bio/PDB/Atom.py | 3 + Bio/PDB/FragmentMapper.py | 304 ++++++++++++++++++++++++++++++++++++++ Bio/PDB/HSExposure.py | 6 +- Bio/PDB/Polypeptide.py | 41 ++++- Bio/PDB/__init__.py | 3 + 5 files changed, 347 insertions(+), 10 deletions(-) create mode 100644 Bio/PDB/FragmentMapper.py diff --git a/Bio/PDB/Atom.py b/Bio/PDB/Atom.py index 855c9c752f2..3a6271c7c75 100644 --- a/Bio/PDB/Atom.py +++ b/Bio/PDB/Atom.py @@ -71,6 +71,9 @@ def __sub__(self, other): Example: >>> distance=atom1-atom2 + + @param other: the other atom + @type other: L{Atom} """ diff=self.coord-other.coord return sqrt(sum(diff*diff)) diff --git a/Bio/PDB/FragmentMapper.py b/Bio/PDB/FragmentMapper.py new file mode 100644 index 00000000000..02e67b8434d --- /dev/null +++ b/Bio/PDB/FragmentMapper.py @@ -0,0 +1,304 @@ +from Numeric import array, Float0, zeros +import os + +from Bio.SVDSuperimposer import SVDSuperimposer +from Bio.PDB import * + +__doc__=""" +Classify protein backbone structure according to Kolodny et al's fragment +libraries. It can be regarded as a form of objective secondary structure +classification. Only fragments of length 5 or 7 are supported (ie. there is a +'central' residue). + +Full reference: + +Kolodny R, Koehl P, Guibas L, Levitt M. +Small libraries of protein fragments model native protein structures accurately. +J Mol Biol. 2002 323(2):297-307. + +The definition files of the fragments can be obtained from: + +http://csb.stanford.edu/rachel/fragments/ + +You need these files to use this module. + +The following example uses the library with 10 fragments of length 5. +The library files can be found in directory 'fragment_data'. + + >>> model=structure[0] + >>> fm=FragmentMapper(lsize=10, flength=5, dir="fragment_data") + >>> fm.map(model) + >>> fragment=fm[residue] +""" + + +# fragment file (lib_SIZE_z_LENGTH.txt) +# SIZE=number of fragments +# LENGTH=length of fragment (4,5,6,7) +_FRAGMENT_FILE="lib_%s_z_%s.txt" + + +def _read_fragments(size, length, dir="."): + """ + Read a fragment spec file (available from + U{http://csb.stanford.edu/rachel/fragments/} + and return a list of Fragment objects. + + @param size: number of fragments in the library + @type size: int + + @param length: length of the fragments + @type length: int + + @param dir: directory where the fragment files can be found + @type dir: string + """ + filename=(dir+"/"+_FRAGMENT_FILE) % (size, length) + fp=open(filename, "r") + flist=[] + fid=0 + for l in fp.readlines(): + # skip comment and blank lines + if l[0]=="*" or l[0]=="\n": + continue + sl=l.split() + if sl[1]=="------": + # Start of fragment definition + f=Fragment(length, fid) + flist.append(f) + fid+=1 + continue + # Add CA coord to Fragment + coord=array(map(float, sl[0:3])) + f.add_residue("XXX", coord) + fp.close() + return flist + + +class Fragment: + """ + Represent a polypeptide C-alpha fragment. + """ + def __init__(self, length, fid): + """ + @param length: length of the fragment + @type length: int + + @param fid: id for the fragment + @type fid: int + """ + # nr of residues in fragment + self.length=length + # nr of residues added + self.counter=0 + self.resname_list=[] + # CA coordinate matrix + self.coords_ca=zeros((length, 3), "d") + self.fid=fid + + def get_id(self): + """ + @return: id for the fragment + @rtype: int + """ + return self.fid + + def get_coords(self): + """ + @return: the CA coords in the fragment + @rtype: Numpy (Nx3) array + """ + return self.coords_ca + + def add_residue(self, resname, ca_coord): + """ + @param resname: residue name (eg. GLY). + @type resname: string + + @param ca_coord: the c-alpha coorinates of the residues + @type ca_coord: Numpy array with length 3 + """ + if self.counter>=self.length: + raise PDBException, "Fragment boundary exceeded." + self.resname_list.append(resname) + self.coords_ca[self.counter]=ca_coord + self.counter=self.counter+1 + + def __len__(self): + """ + @return: length of fragment + @rtype: int + """ + return self.length + + def __sub__(self, other): + """ + Return rmsd between two fragments. + + Example: + >>> rmsd=fragment1-fragment2 + + @return: rmsd between fragments + @rtype: float + """ + sup=SVDSuperimposer() + sup.set(self.coords_ca, other.coords_ca) + sup.run() + return sup.get_rms() + + def __repr__(self): + """ + Returns where L=length of fragment + and ID the identifier (rank in the library). + """ + return "" % (self.length, self.fid) + + +def _make_fragment_list(pp, length): + """ + Dice up a peptide in fragments of length "length". + + @param pp: a list of residues (part of one peptide) + @type pp: [L{Residue}, L{Residue}, ...] + + @param length: fragment length + @type length: int + """ + frag_list=[] + for i in range(0, len(pp)-length+1): + f=Fragment(length, -1) + for j in range(0, length): + residue=pp[i+j] + resname=residue.get_resname() + if residue.has_id("CA"): + ca=residue["CA"] + else: + raise "CHAINBREAK" + if ca.is_disordered(): + raise "CHAINBREAK" + ca_coord=ca.get_coord() + f.add_residue(resname, ca_coord) + frag_list.append(f) + return frag_list + + +def _map_fragment_list(flist, reflist): + """ + Map all frgaments in flist to the closest + (in RMSD) fragment in reflist. + + Returns a list of reflist indices. + + @param flist: list of protein fragments + @type flist: [L{Fragment}, L{Fragment}, ...] + + @param reflist: list of reference (ie. library) fragments + @type reflist: [L{Fragment}, L{Fragment}, ...] + """ + mapped=[] + for f in flist: + rank=[] + for i in range(0, len(reflist)): + rf=reflist[i] + rms=f-rf + rank.append((rms, rf)) + rank.sort() + fragment=rank[0][1] + mapped.append(fragment) + return mapped + + +class FragmentMapper: + """ + Map polypeptides in a model to lists of representative fragments. + """ + def __init__(self, lsize, flength=5, fdir="."): + """ + @param lsize: number of fragments in the library + @type lsize: int + + @param flength: length of fragments in the library + @type flength: int + + @param fdir: directory where teh definition files are + @type fdir: string + """ + if flength==5: + self.edge=2 + elif flength==7: + self.edge=3 + else: + raise PDBException, "Fragment length should be 5 or 7." + self.flength=flength + self.lsize=lsize + self.reflist=_read_fragments(lsize, flength, fdir) + + def map(self, model): + """ + @param model: the model that will be mapped + @type model: L{Model} + """ + ppb=PPBuilder() + ppl=ppb.build_peptides(model) + fd={} + for pp in ppl: + try: + # make fragments + flist=_make_fragment_list(pp, self.flength) + # classify fragments + mflist=_map_fragment_list(flist, self.reflist) + for i in range(0, len(pp)): + res=pp[i] + if i=(len(pp)-self.edge): + # end residues + continue + else: + # fragment + index=i-self.edge + assert(index>=0) + fd[res]=mflist[index] + except "CHAINBREAK": + # Funny polypeptide - skip + pass + self.fd=fd + + def has_key(self, res): + """ + @type res: L{Residue} + """ + return self.fd.has_key(r) + + def __getitem__(self, res): + """ + @type res: L{Residue} + + @return: fragment classification + @rtype: L{Fragment} + """ + return self.fd[r] + + +if __name__=="__main__": + + import sys + + p=PDBParser() + s=p.get_structure("X", sys.argv[1]) + + fm=FragmentMapper(10, 5, "levitt_data") + + m=s[0] + + fm.map(m) + + for r in Selection.unfold_entities(m, "R"): + + print r, + if fm.has_key(r): + print fm[r] + else: + print + diff --git a/Bio/PDB/HSExposure.py b/Bio/PDB/HSExposure.py index 09be61e1c1d..efb5c64d94b 100644 --- a/Bio/PDB/HSExposure.py +++ b/Bio/PDB/HSExposure.py @@ -15,7 +15,7 @@ class HSExposure: # unit vector along z # CA-CB vectors or the CA-CA-CA approximation will # be rotated onto this unit vector - unit_z=Vector(0.0, 0.0, 1.0) + _unit_z=Vector(0.0, 0.0, 1.0) def __init__(self, OFFSET=0.0): # List of CA-CB direction calculated from CA-CA-CA @@ -127,7 +127,7 @@ def _get_rotran_list_from_cb(self, residue_list): # CB-CA vector cb_ca_v=cb_v-ca_v # Rotate CB-CA vector to unit vector along Z - rotation=rotmat(cb_ca_v, self.unit_z) + rotation=rotmat(cb_ca_v, self._unit_z) translation=ca.get_coord() rotran_list.append((translation, rotation, ca, residue)) return rotran_list @@ -168,7 +168,7 @@ def _get_rotran_list_from_ca(self, model): # pseudo cb centred at origin cb_v=self._get_cb_from_ca(ca_v1, ca_v2, ca_v3) # rotate cb to unit vector along z - rotation=rotmat(cb_v, self.unit_z) + rotation=rotmat(cb_v, self._unit_z) translation=ca2.get_coord() rotran_list.append((translation, rotation, ca2, r)) # Calculate angle between pseudo-CB-CA and CA-CB diff --git a/Bio/PDB/Polypeptide.py b/Bio/PDB/Polypeptide.py index 1fa99b9b091..5db56a824f9 100644 --- a/Bio/PDB/Polypeptide.py +++ b/Bio/PDB/Polypeptide.py @@ -7,13 +7,22 @@ from Bio.PDB.PDBExceptions import PDBException from Bio.PDB.Residue import Residue, DisorderedResidue -__doc__="Polypeptide related classes (construction and representation)." +__doc__=""" +Polypeptide related classes (construction and representation). + +Example: + + >>> ppb=PPBuilder() + >>> for pp in ppb.build_peptides(structure): + >>> print pp.get_sequence() +""" def is_aa(residue): """ Return 1 if residue object/string is an amino acid. - residue --- a residue object OR a three letter amino acid code + @param residue: a L{Residue} object OR a three letter amino acid code + @type residue: L{Residue} or string """ if not type(residue)==StringType: residue=residue.get_resname() @@ -23,11 +32,12 @@ def is_aa(residue): class Polypeptide(list): """ - A polypeptide is simply a list of Residue objects. + A polypeptide is simply a list of L{Residue} objects. """ def get_ca_list(self): """ - Get the list of CA atoms. + @return: the list of C-alpha atoms + @rtype: [L{Atom}, L{Atom}, ...] """ ca_list=[] for res in self: @@ -36,7 +46,12 @@ def get_ca_list(self): return ca_list def get_sequence(self): - "Return the AA sequence." + """ + Return the AA sequence. + + @return: polypeptide sequence + @rtype: L{Seq} + """ s="" for res in self: resname=res.get_resname() @@ -49,6 +64,10 @@ def get_sequence(self): return seq def __repr__(self): + """ + Return , where START + and END are sequence identifiers of the outer residues. + """ start=self[0].get_id()[1] end=self[-1].get_id()[1] s="" % (start, end) @@ -62,6 +81,10 @@ class _PPBuilder: subclass. """ def __init__(self, radius): + """ + @param radius: distance + @type radius: float + """ self.radius=radius def _accept(self, residue): @@ -75,8 +98,12 @@ def _accept(self, residue): def build_peptides(self, entity, aa_only=1): """ Build and return a list of Polypeptide objects. - model_id --- only this model is examined - aa_only --- if 1, the residue name needs to be standard AA + + @param entity: polypeptides are searched for in this object + @type entity: L{Structure}, L{Model} or L{Chain} + + @param aa_only: if 1, the residue needs to be a standard AA + @type aa_only: int """ is_connected=self._is_connected accept=self._accept diff --git a/Bio/PDB/__init__.py b/Bio/PDB/__init__.py index 42f9714b993..98ea7ae3d22 100644 --- a/Bio/PDB/__init__.py +++ b/Bio/PDB/__init__.py @@ -50,6 +50,9 @@ # My new solvent exposure method - article coming up from HSExposure import HSExposure +# Kolodny et al.'s backbone libraries +from FragmentMapper import FragmentMapper + # Fast atom neighbor search # Depends on KDTree C++ module try: