Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
nisse
committed
May 14, 2004
1 parent
64b31ad
commit 5c5725c
Showing
5 changed files
with
347 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,304 @@ | ||
from Numeric import array, Float0, zeros | ||
import os | ||
|
||
from Bio.SVDSuperimposer import SVDSuperimposer | ||
from Bio.PDB import * | ||
|
||
__doc__=""" | ||
Classify protein backbone structure according to Kolodny et al's fragment | ||
libraries. It can be regarded as a form of objective secondary structure | ||
classification. Only fragments of length 5 or 7 are supported (ie. there is a | ||
'central' residue). | ||
Full reference: | ||
Kolodny R, Koehl P, Guibas L, Levitt M. | ||
Small libraries of protein fragments model native protein structures accurately. | ||
J Mol Biol. 2002 323(2):297-307. | ||
The definition files of the fragments can be obtained from: | ||
http://csb.stanford.edu/rachel/fragments/ | ||
You need these files to use this module. | ||
The following example uses the library with 10 fragments of length 5. | ||
The library files can be found in directory 'fragment_data'. | ||
>>> model=structure[0] | ||
>>> fm=FragmentMapper(lsize=10, flength=5, dir="fragment_data") | ||
>>> fm.map(model) | ||
>>> fragment=fm[residue] | ||
""" | ||
|
||
|
||
# fragment file (lib_SIZE_z_LENGTH.txt) | ||
# SIZE=number of fragments | ||
# LENGTH=length of fragment (4,5,6,7) | ||
_FRAGMENT_FILE="lib_%s_z_%s.txt" | ||
|
||
|
||
def _read_fragments(size, length, dir="."): | ||
""" | ||
Read a fragment spec file (available from | ||
U{http://csb.stanford.edu/rachel/fragments/} | ||
and return a list of Fragment objects. | ||
@param size: number of fragments in the library | ||
@type size: int | ||
@param length: length of the fragments | ||
@type length: int | ||
@param dir: directory where the fragment files can be found | ||
@type dir: string | ||
""" | ||
filename=(dir+"/"+_FRAGMENT_FILE) % (size, length) | ||
fp=open(filename, "r") | ||
flist=[] | ||
fid=0 | ||
for l in fp.readlines(): | ||
# skip comment and blank lines | ||
if l[0]=="*" or l[0]=="\n": | ||
continue | ||
sl=l.split() | ||
if sl[1]=="------": | ||
# Start of fragment definition | ||
f=Fragment(length, fid) | ||
flist.append(f) | ||
fid+=1 | ||
continue | ||
# Add CA coord to Fragment | ||
coord=array(map(float, sl[0:3])) | ||
f.add_residue("XXX", coord) | ||
fp.close() | ||
return flist | ||
|
||
|
||
class Fragment: | ||
""" | ||
Represent a polypeptide C-alpha fragment. | ||
""" | ||
def __init__(self, length, fid): | ||
""" | ||
@param length: length of the fragment | ||
@type length: int | ||
@param fid: id for the fragment | ||
@type fid: int | ||
""" | ||
# nr of residues in fragment | ||
self.length=length | ||
# nr of residues added | ||
self.counter=0 | ||
self.resname_list=[] | ||
# CA coordinate matrix | ||
self.coords_ca=zeros((length, 3), "d") | ||
self.fid=fid | ||
|
||
def get_id(self): | ||
""" | ||
@return: id for the fragment | ||
@rtype: int | ||
""" | ||
return self.fid | ||
|
||
def get_coords(self): | ||
""" | ||
@return: the CA coords in the fragment | ||
@rtype: Numpy (Nx3) array | ||
""" | ||
return self.coords_ca | ||
|
||
def add_residue(self, resname, ca_coord): | ||
""" | ||
@param resname: residue name (eg. GLY). | ||
@type resname: string | ||
@param ca_coord: the c-alpha coorinates of the residues | ||
@type ca_coord: Numpy array with length 3 | ||
""" | ||
if self.counter>=self.length: | ||
raise PDBException, "Fragment boundary exceeded." | ||
self.resname_list.append(resname) | ||
self.coords_ca[self.counter]=ca_coord | ||
self.counter=self.counter+1 | ||
|
||
def __len__(self): | ||
""" | ||
@return: length of fragment | ||
@rtype: int | ||
""" | ||
return self.length | ||
|
||
def __sub__(self, other): | ||
""" | ||
Return rmsd between two fragments. | ||
Example: | ||
>>> rmsd=fragment1-fragment2 | ||
@return: rmsd between fragments | ||
@rtype: float | ||
""" | ||
sup=SVDSuperimposer() | ||
sup.set(self.coords_ca, other.coords_ca) | ||
sup.run() | ||
return sup.get_rms() | ||
|
||
def __repr__(self): | ||
""" | ||
Returns <Fragment length=L id=ID> where L=length of fragment | ||
and ID the identifier (rank in the library). | ||
""" | ||
return "<Fragment length=%i id=%i>" % (self.length, self.fid) | ||
|
||
|
||
def _make_fragment_list(pp, length): | ||
""" | ||
Dice up a peptide in fragments of length "length". | ||
@param pp: a list of residues (part of one peptide) | ||
@type pp: [L{Residue}, L{Residue}, ...] | ||
@param length: fragment length | ||
@type length: int | ||
""" | ||
frag_list=[] | ||
for i in range(0, len(pp)-length+1): | ||
f=Fragment(length, -1) | ||
for j in range(0, length): | ||
residue=pp[i+j] | ||
resname=residue.get_resname() | ||
if residue.has_id("CA"): | ||
ca=residue["CA"] | ||
else: | ||
raise "CHAINBREAK" | ||
if ca.is_disordered(): | ||
raise "CHAINBREAK" | ||
ca_coord=ca.get_coord() | ||
f.add_residue(resname, ca_coord) | ||
frag_list.append(f) | ||
return frag_list | ||
|
||
|
||
def _map_fragment_list(flist, reflist): | ||
""" | ||
Map all frgaments in flist to the closest | ||
(in RMSD) fragment in reflist. | ||
Returns a list of reflist indices. | ||
@param flist: list of protein fragments | ||
@type flist: [L{Fragment}, L{Fragment}, ...] | ||
@param reflist: list of reference (ie. library) fragments | ||
@type reflist: [L{Fragment}, L{Fragment}, ...] | ||
""" | ||
mapped=[] | ||
for f in flist: | ||
rank=[] | ||
for i in range(0, len(reflist)): | ||
rf=reflist[i] | ||
rms=f-rf | ||
rank.append((rms, rf)) | ||
rank.sort() | ||
fragment=rank[0][1] | ||
mapped.append(fragment) | ||
return mapped | ||
|
||
|
||
class FragmentMapper: | ||
""" | ||
Map polypeptides in a model to lists of representative fragments. | ||
""" | ||
def __init__(self, lsize, flength=5, fdir="."): | ||
""" | ||
@param lsize: number of fragments in the library | ||
@type lsize: int | ||
@param flength: length of fragments in the library | ||
@type flength: int | ||
@param fdir: directory where teh definition files are | ||
@type fdir: string | ||
""" | ||
if flength==5: | ||
self.edge=2 | ||
elif flength==7: | ||
self.edge=3 | ||
else: | ||
raise PDBException, "Fragment length should be 5 or 7." | ||
self.flength=flength | ||
self.lsize=lsize | ||
self.reflist=_read_fragments(lsize, flength, fdir) | ||
|
||
def map(self, model): | ||
""" | ||
@param model: the model that will be mapped | ||
@type model: L{Model} | ||
""" | ||
ppb=PPBuilder() | ||
ppl=ppb.build_peptides(model) | ||
fd={} | ||
for pp in ppl: | ||
try: | ||
# make fragments | ||
flist=_make_fragment_list(pp, self.flength) | ||
# classify fragments | ||
mflist=_map_fragment_list(flist, self.reflist) | ||
for i in range(0, len(pp)): | ||
res=pp[i] | ||
if i<self.edge: | ||
# start residues | ||
continue | ||
elif i>=(len(pp)-self.edge): | ||
# end residues | ||
continue | ||
else: | ||
# fragment | ||
index=i-self.edge | ||
assert(index>=0) | ||
fd[res]=mflist[index] | ||
except "CHAINBREAK": | ||
# Funny polypeptide - skip | ||
pass | ||
self.fd=fd | ||
|
||
def has_key(self, res): | ||
""" | ||
@type res: L{Residue} | ||
""" | ||
return self.fd.has_key(r) | ||
|
||
def __getitem__(self, res): | ||
""" | ||
@type res: L{Residue} | ||
@return: fragment classification | ||
@rtype: L{Fragment} | ||
""" | ||
return self.fd[r] | ||
|
||
|
||
if __name__=="__main__": | ||
|
||
import sys | ||
|
||
p=PDBParser() | ||
s=p.get_structure("X", sys.argv[1]) | ||
|
||
fm=FragmentMapper(10, 5, "levitt_data") | ||
|
||
m=s[0] | ||
|
||
fm.map(m) | ||
|
||
for r in Selection.unfold_entities(m, "R"): | ||
|
||
print r, | ||
if fm.has_key(r): | ||
print fm[r] | ||
else: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.