/
Dice.py
82 lines (62 loc) · 2.16 KB
/
Dice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Code for chopping up (dicing) a structure.
This module is used internally by the Bio.PDB.extract() function.
"""
import re
import warnings
from Bio.PDB.PDBIO import PDBIO
from Bio import BiopythonWarning
_hydrogen = re.compile("[123 ]*H.*")
class ChainSelector(object):
"""Only accepts residues with right chainid, between start and end.
Remove hydrogens, waters and ligands. Only use model 0 by default.
"""
def __init__(self, chain_id, start, end, model_id=0):
"""Initialize the class."""
self.chain_id = chain_id
self.start = start
self.end = end
self.model_id = model_id
def accept_model(self, model):
# model - only keep model 0
if model.get_id() == self.model_id:
return 1
return 0
def accept_chain(self, chain):
if chain.get_id() == self.chain_id:
return 1
return 0
def accept_residue(self, residue):
# residue - between start and end
hetatm_flag, resseq, icode = residue.get_id()
if hetatm_flag != " ":
# skip HETATMS
return 0
if icode != " ":
warnings.warn("WARNING: Icode %s at position %s"
% (icode, resseq), BiopythonWarning)
if self.start <= resseq <= self.end:
return 1
return 0
def accept_atom(self, atom):
# atoms - get rid of hydrogens
name = atom.get_id()
if _hydrogen.match(name):
return 0
else:
return 1
def extract(structure, chain_id, start, end, filename):
"""Write out selected portion to filename."""
sel = ChainSelector(chain_id, start, end)
io = PDBIO()
io.set_structure(structure)
io.save(filename, sel)
if __name__ == "__main__":
from Bio.PDB.PDBParser import PDBParser
import sys
p = PDBParser()
s = p.get_structure("scr", sys.argv[1])
extract(s, " ", 1, 100, "out.pdb")