Skip to content


Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

executable file 162 lines (124 sloc) 4.926 kb
#!/usr/bin/env python
# Copyright 2001 by Gavin E. Crooks. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
import getopt
import sys
import urllib
from Bio.SCOP import *
def usage():
print \
"""Extract a SCOP domain's ATOM and HETATOM records from the relevant PDB file.
For example: astral-rapid-access-1.55.raf dir.cla.scop.txt_1.55 d3hbib_
A result file, d3hbib_.ent, will be generated in the working directory.
The required RAF file can be found at [],
and the SCOP CLA file at [].
Note: Errors will occur if the PDB file has been altered since the creation
of the SCOP CLA and ASTRAL RAF files.
Usage: scop_pdb [-h] [-i file] [-o file] [-p pdb_url_prefix]
raf_url cla_url [sid] [sid] [sid] ...
-h -- Print this help message.
-i file -- Input file name. Each line should start with an sid (Scop domain
identifier). Blank lines, and lines starting with '#' are
ignored. If file is '-' then data is read from stdin. If not
given then sids are taken from the command line.
-o file -- Output file name. If '-' then data is written to stdout. If not
given then data is written to files named sid+'.ent'.
-p pdb_url-- A URL for PDB files. The token '%s' will be replaced with the
4 character PDB ID. If the pdb_url is not given then the latest
PDB file is retrieved directly from
raf_url -- The URL or filename of an ASTRAL Rapid Access File sequence map.
See []
cla_url -- The URL or filename of a SCOP parsable CLA file.
See []
sid -- A SCOP domain identifier. e.g. d3hbib_
default_pdb_url = "" \
#default_pdb_url = "file://usr/local/db/pdb/data/010331/snapshot/all/pdb%s.ent"
def open_pdb(pdbid, pdb_url=None):
if pdb_url is None:
pdb_url = default_pdb_url
url = pdb_url % pdbid
fn, header = urllib.urlretrieve(url)
return open(fn)
def main():
opts, args = getopt.getopt(sys.argv[1:], "hp:o:i:",
["help", "usage", "pdb=", "output=", "input="])
except getopt.GetoptError:
# print help information and exit:
input = None
in_handle = None
output = None
pdb_url = None
cla_url = None
raf_url = None
for o, a in opts:
if o in ("-h", "--help", "--usage"):
elif o in ("-o", "--output"):
output = a
elif o in ("-i", "--input"):
input = a
elif o in ("-p", "--pdb"):
pdb_url = a
if len(args) < 2:
print >> sys.stderr, \
"Not enough arguments. Try --help for more details."
raf_url = args[0]
cla_url = args[1]
(raf_filename, headers) = urllib.urlretrieve(raf_url)
seqMapIndex = Raf.SeqMapIndex(raf_filename)
(cla_filename, headers) = urllib.urlretrieve(cla_url)
claIndex = Cla.Index(cla_filename)
if input is None:
sids = args[2:]
elif input == '-':
sids = sys.stdin.xreadlines()
else :
in_handle = open(input)
sids = in_handle.xreadlines()
for sid in sids:
if not sid or sid[0:1] == '#':
id = sid[0:7]
pdbid = id[1:5]
s = pdbid[0:1]
if s == '0' or s == 's':
print >> sys.stderr, "No coordinates for domain " + id
if output is None:
filename = id + ".ent"
out_handle = open(filename, "w+")
elif output == '-':
out_handle = sys.stdout
out_handle = open(output, "w+")
claRec = claIndex[id]
residues = claRec.residues
seqMap = seqMapIndex.getSeqMap(residues)
pdbid = residues.pdbid
f = open_pdb(pdbid, pdb_url)
seqMap.getAtoms(f, out_handle)
except (IOError, KeyError, RuntimeError), e:
print >> sys.stderr, "I cannot do SCOP domain ", id, ":", e
if in_handle is not None:
if __name__ == "__main__":
Jump to Line
Something went wrong with that request. Please try again.