/
coordinate_box_utils.py
378 lines (322 loc) · 13 KB
/
coordinate_box_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
"""This module adds utilities for coordinate boxes"""
from typing import List, Sequence, Tuple
import numpy as np
from scipy.spatial import ConvexHull
class CoordinateBox(object):
"""A coordinate box that represents a block in space.
Molecular complexes are typically represented with atoms as
coordinate points. Each complex is naturally associated with a
number of different box regions. For example, the bounding box is a
box that contains all atoms in the molecular complex. A binding
pocket box is a box that focuses in on a binding region of a protein
to a ligand. A interface box is the region in which two proteins
have a bulk interaction.
The `CoordinateBox` class is designed to represent such regions of
space. It consists of the coordinates of the box, and the collection
of atoms that live in this box alongside their coordinates.
"""
def __init__(self, x_range: Tuple[float, float],
y_range: Tuple[float, float], z_range: Tuple[float, float]):
"""Initialize this box.
Parameters
----------
x_range: Tuple[float, float]
A tuple of `(x_min, x_max)` with max and min x-coordinates.
y_range: Tuple[float, float]
A tuple of `(y_min, y_max)` with max and min y-coordinates.
z_range: Tuple[float, float]
A tuple of `(z_min, z_max)` with max and min z-coordinates.
Raises
------
`ValueError` if this interval is malformed
"""
if not isinstance(x_range, tuple) or not len(x_range) == 2:
raise ValueError("x_range must be a tuple of length 2")
else:
x_min, x_max = x_range
if not x_min <= x_max:
raise ValueError("x minimum must be <= x maximum")
if not isinstance(y_range, tuple) or not len(y_range) == 2:
raise ValueError("y_range must be a tuple of length 2")
else:
y_min, y_max = y_range
if not y_min <= y_max:
raise ValueError("y minimum must be <= y maximum")
if not isinstance(z_range, tuple) or not len(z_range) == 2:
raise ValueError("z_range must be a tuple of length 2")
else:
z_min, z_max = z_range
if not z_min <= z_max:
raise ValueError("z minimum must be <= z maximum")
self.x_range = x_range
self.y_range = y_range
self.z_range = z_range
def __repr__(self):
"""Create a string representation of this box"""
x_str = str(self.x_range)
y_str = str(self.y_range)
z_str = str(self.z_range)
return "Box[x_bounds=%s, y_bounds=%s, z_bounds=%s]" % (x_str, y_str,
z_str)
def __str__(self):
"""Create a string representation of this box."""
return self.__repr__()
def __contains__(self, point: Sequence[float]) -> bool:
"""Check whether a point is in this box.
Parameters
----------
point: Sequence[float]
3-tuple or list of length 3 or np.ndarray of shape `(3,)`.
The `(x, y, z)` coordinates of a point in space.
Returns
-------
bool
`True` if `other` is contained in this box.
"""
(x_min, x_max) = self.x_range
(y_min, y_max) = self.y_range
(z_min, z_max) = self.z_range
x_cont = (x_min <= point[0] and point[0] <= x_max)
y_cont = (y_min <= point[1] and point[1] <= y_max)
z_cont = (z_min <= point[2] and point[2] <= z_max)
return x_cont and y_cont and z_cont
# FIXME: Argument 1 of "__eq__" is incompatible with supertype "object"
def __eq__(self, other: "CoordinateBox") -> bool: # type: ignore
"""Compare two boxes to see if they're equal.
Parameters
----------
other: CoordinateBox
Compare this coordinate box to the other one.
Returns
-------
bool
That's `True` if all bounds match.
Raises
------
`ValueError` if attempting to compare to something that isn't a
`CoordinateBox`.
"""
if not isinstance(other, CoordinateBox):
raise ValueError("Can only compare to another box.")
return (self.x_range == other.x_range and
self.y_range == other.y_range and self.z_range == other.z_range)
def __hash__(self) -> int:
"""Implement hashing function for this box.
Uses the default `hash` on `self.x_range, self.y_range,
self.z_range`.
Returns
-------
int
Unique integer
"""
return hash((self.x_range, self.y_range, self.z_range))
def center(self) -> Tuple[float, float, float]:
"""Computes the center of this box.
Returns
-------
Tuple[float, float, float]
`(x, y, z)` the coordinates of the center of the box.
Examples
--------
>>> box = CoordinateBox((0, 1), (0, 1), (0, 1))
>>> box.center()
(0.5, 0.5, 0.5)
"""
x_min, x_max = self.x_range
y_min, y_max = self.y_range
z_min, z_max = self.z_range
return (x_min + (x_max - x_min) / 2, y_min + (y_max - y_min) / 2,
z_min + (z_max - z_min) / 2)
def volume(self) -> float:
"""Computes and returns the volume of this box.
Returns
-------
float
The volume of this box. Can be 0 if box is empty
Examples
--------
>>> box = CoordinateBox((0, 1), (0, 1), (0, 1))
>>> box.volume()
1
"""
x_min, x_max = self.x_range
y_min, y_max = self.y_range
z_min, z_max = self.z_range
return (x_max - x_min) * (y_max - y_min) * (z_max - z_min)
def contains(self, other: "CoordinateBox") -> bool:
"""Test whether this box contains another.
This method checks whether `other` is contained in this box.
Parameters
----------
other: CoordinateBox
The box to check is contained in this box.
Returns
-------
bool
`True` if `other` is contained in this box.
Raises
------
`ValueError` if `not isinstance(other, CoordinateBox)`.
"""
if not isinstance(other, CoordinateBox):
raise ValueError("other must be a CoordinateBox")
other_x_min, other_x_max = other.x_range
other_y_min, other_y_max = other.y_range
other_z_min, other_z_max = other.z_range
self_x_min, self_x_max = self.x_range
self_y_min, self_y_max = self.y_range
self_z_min, self_z_max = self.z_range
return (self_x_min <= other_x_min and other_x_max <= self_x_max and
self_y_min <= other_y_min and other_y_max <= self_y_max and
self_z_min <= other_z_min and other_z_max <= self_z_max)
def intersect_interval(interval1: Tuple[float, float],
interval2: Tuple[float, float]) -> Tuple[float, float]:
"""Computes the intersection of two intervals.
Parameters
----------
interval1: Tuple[float, float]
Should be `(x1_min, x1_max)`
interval2: Tuple[float, float]
Should be `(x2_min, x2_max)`
Returns
-------
x_intersect: Tuple[float, float]
Should be the intersection. If the intersection is empty returns
`(0, 0)` to represent the empty set. Otherwise is `(max(x1_min,
x2_min), min(x1_max, x2_max))`.
"""
x1_min, x1_max = interval1
x2_min, x2_max = interval2
if x1_max < x2_min:
# If interval1 < interval2 entirely
return (0, 0)
elif x2_max < x1_min:
# If interval2 < interval1 entirely
return (0, 0)
x_min = max(x1_min, x2_min)
x_max = min(x1_max, x2_max)
return (x_min, x_max)
def intersection(box1: CoordinateBox, box2: CoordinateBox) -> CoordinateBox:
"""Computes the intersection box of provided boxes.
Parameters
----------
box1: CoordinateBox
First `CoordinateBox`
box2: CoordinateBox
Another `CoordinateBox` to intersect first one with.
Returns
-------
CoordinateBox
A `CoordinateBox` containing the intersection. If the intersection is empty,
returns the box with 0 bounds.
"""
x_intersection = intersect_interval(box1.x_range, box2.x_range)
y_intersection = intersect_interval(box1.y_range, box2.y_range)
z_intersection = intersect_interval(box1.z_range, box2.z_range)
return CoordinateBox(x_intersection, y_intersection, z_intersection)
def union(box1: CoordinateBox, box2: CoordinateBox) -> CoordinateBox:
"""Merges provided boxes to find the smallest union box.
This method merges the two provided boxes.
Parameters
----------
box1: CoordinateBox
First box to merge in
box2: CoordinateBox
Second box to merge into this box
Returns
-------
CoordinateBox
Smallest `CoordinateBox` that contains both `box1` and `box2`
"""
x_min = min(box1.x_range[0], box2.x_range[0])
y_min = min(box1.y_range[0], box2.y_range[0])
z_min = min(box1.z_range[0], box2.z_range[0])
x_max = max(box1.x_range[1], box2.x_range[1])
y_max = max(box1.y_range[1], box2.y_range[1])
z_max = max(box1.z_range[1], box2.z_range[1])
return CoordinateBox((x_min, x_max), (y_min, y_max), (z_min, z_max))
def merge_overlapping_boxes(boxes: List[CoordinateBox],
threshold: float = 0.8) -> List[CoordinateBox]:
"""Merge boxes which have an overlap greater than threshold.
Parameters
----------
boxes: list[CoordinateBox]
A list of `CoordinateBox` objects.
threshold: float, default 0.8
The volume fraction of the boxes that must overlap for them to be
merged together.
Returns
-------
List[CoordinateBox]
List[CoordinateBox] of merged boxes. This list will have length less
than or equal to the length of `boxes`.
"""
outputs: List[CoordinateBox] = []
for box in boxes:
for other in boxes:
if box == other:
continue
intersect_box = intersection(box, other)
if (intersect_box.volume() >= threshold * box.volume() or
intersect_box.volume() >= threshold * other.volume()):
box = union(box, other)
unique_box = True
for output in outputs:
if output.contains(box):
unique_box = False
if unique_box:
outputs.append(box)
return outputs
def get_face_boxes(coords: np.ndarray, pad: float = 5.0) -> List[CoordinateBox]:
"""For each face of the convex hull, compute a coordinate box around it.
The convex hull of a macromolecule will have a series of triangular
faces. For each such triangular face, we construct a bounding box
around this triangle. Think of this box as attempting to capture
some binding interaction region whose exterior is controlled by the
box. Note that this box will likely be a crude approximation, but
the advantage of this technique is that it only uses simple geometry
to provide some basic biological insight into the molecule at hand.
The `pad` parameter is used to control the amount of padding around
the face to be used for the coordinate box.
Parameters
----------
coords: np.ndarray
A numpy array of shape `(N, 3)`. The coordinates of a molecule.
pad: float, optional (default 5.0)
The number of angstroms to pad.
Returns
-------
boxes: List[CoordinateBox]
List of `CoordinateBox`
Examples
--------
>>> coords = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]])
>>> boxes = get_face_boxes(coords, pad=5)
"""
hull = ConvexHull(coords)
boxes = []
# Each triangle in the simplices is a set of 3 atoms from
# coordinates which forms the vertices of an exterior triangle on
# the convex hull of the macromolecule.
for triangle in hull.simplices:
# Points is the set of atom coordinates that make up this
# triangular face on the convex hull
points = np.array(
[coords[triangle[0]], coords[triangle[1]], coords[triangle[2]]])
# Let's extract x/y/z coords for this face
x_coords = points[:, 0]
y_coords = points[:, 1]
z_coords = points[:, 2]
# Let's compute min/max points
x_min, x_max = np.amin(x_coords), np.amax(x_coords)
x_min, x_max = int(np.floor(x_min)) - pad, int(np.ceil(x_max)) + pad
x_bounds = (x_min, x_max)
y_min, y_max = np.amin(y_coords), np.amax(y_coords)
y_min, y_max = int(np.floor(y_min)) - pad, int(np.ceil(y_max)) + pad
y_bounds = (y_min, y_max)
z_min, z_max = np.amin(z_coords), np.amax(z_coords)
z_min, z_max = int(np.floor(z_min)) - pad, int(np.ceil(z_max)) + pad
z_bounds = (z_min, z_max)
box = CoordinateBox(x_bounds, y_bounds, z_bounds)
boxes.append(box)
return boxes