Skip to content
Fetching contributors…
Cannot retrieve contributors at this time
executable file 163 lines (125 sloc) 4.82 KB
#!/usr/bin/env python
# Copyright 2001 by Gavin E. Crooks. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
import getopt
import sys
import types
import urllib
from Bio.SCOP import *
def usage():
print \
"""Extract a SCOP domain's ATOM and HETATOM records from the relevant PDB file.
For example: astral-rapid-access-1.55.raf dir.cla.scop.txt_1.55 d3hbib_
A result file, d3hbib_.ent, will be generated in the working directory.
The required RAF file can be found at [],
and the SCOP CLA file at [].
Note: Errors will occur if the PDB file has been altered since the creation
of the SCOP CLA and ASTRAL RAF files.
Usage: scop_pdb [-h] [-i file] [-o file] [-p pdb_url_prefix]
raf_url cla_url [sid] [sid] [sid] ...
-h -- Print this help message.
-i file -- Input file name. Each line should start with an sid (Scop domain
identifier). Blank lines, and lines starting with '#' are
ignored. If file is '-' then data is read from stdin. If not
given then sids are taken from the command line.
-o file -- Output file name. If '-' then data is written to stdout. If not
given then data is written to files named sid+'.ent'.
-p pdb_url-- A URL for PDB files. The token '%s' will be replaced with the
4 character PDB ID. If the pdb_url is not given then the latest
PDB file is retrieved directly from
raf_url -- The URL or filename of an ASTRAL Rapid Access File sequence map.
See []
cla_url -- The URL or filename of a SCOP parsable CLA file.
See []
sid -- A SCOP domain identifier. e.g. d3hbib_
default_pdb_url = "" \
#default_pdb_url = "file://usr/local/db/pdb/data/010331/snapshot/all/pdb%s.ent"
def open_pdb(pdbid, pdb_url=None):
if pdb_url is None:
pdb_url = default_pdb_url
url = pdb_url % pdbid
fn, header = urllib.urlretrieve(url)
return open(fn)
def main():
opts, args = getopt.getopt(sys.argv[1:], "hp:o:i:",
["help", "usage", "pdb=", "output=", "input="])
except getopt.GetoptError:
# print help information and exit:
input = None
in_handle = None
output = None
pdb_url = None
cla_url = None
raf_url = None
for o, a in opts:
if o in ("-h", "--help", "--usage"):
elif o in ("-o", "--output"):
output = a
elif o in ("-i", "--input"):
input = a
elif o in ("-p", "--pdb"):
pdb_url = a
if len(args) < 2:
print >> sys.stderr, \
"Not enough arguments. Try --help for more details."
raf_url = args[0]
cla_url = args[1]
(raf_filename, headers) = urllib.urlretrieve(raf_url)
seqMapIndex = Raf.SeqMapIndex(raf_filename)
(cla_filename, headers) = urllib.urlretrieve(cla_url)
claIndex = Cla.Index(cla_filename)
if input is None:
sids = args[2:]
elif input == '-':
sids = sys.stdin.xreadlines()
else :
in_handle = open(input)
sids = in_handle.xreadlines()
for sid in sids:
if not sid or sid[0:1] == '#':
id = sid[0:7]
pdbid = id[1:5]
s = pdbid[0:1]
if s == '0' or s == 's':
print >> sys.stderr, "No coordinates for domain " + id
if output is None:
filename = id + ".ent"
out_handle = open(filename, "w+")
elif output == '-':
out_handle = sys.stdout
out_handle = open(output, "w+")
claRec = claIndex[id]
residues = claRec.residues
seqMap = seqMapIndex.getSeqMap(residues)
pdbid = residues.pdbid
f = open_pdb(pdbid, pdb_url)
seqMap.getAtoms(f, out_handle)
except (IOError, KeyError, RuntimeError), e:
print >> sys.stderr, "I cannot do SCOP domain ", id, ":", e
if in_handle is not None:
if __name__ == "__main__":
Jump to Line
Something went wrong with that request. Please try again.