-
Notifications
You must be signed in to change notification settings - Fork 5
/
ball_tree.pyx
129 lines (106 loc) · 5.15 KB
/
ball_tree.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!python
#cython: boundscheck=False
#cython: wraparound=False
#cython: cdivision=True
__all__ = ['BallTree']
DOC_DICT = {'BinaryTree':'BallTree'}
VALID_METRICS = ['EuclideanDistance', 'SEuclideanDistance',
'ManhattanDistance', 'ChebyshevDistance',
'MinkowskiDistance', 'WMinkowskiDistance',
'MahalanobisDistance', 'HammingDistance',
'CanberraDistance', 'BrayCurtisDistance',
'JaccardDistance', 'MatchingDistance',
'DiceDistance', 'KulsinskiDistance',
'RogerStanimotoDistance', 'RussellRaoDistance',
'SokalMichenerDistance', 'SokalSneathDistance']
include "binary_tree.pxi"
BallTree = BinaryTree
#----------------------------------------------------------------------
# These functions make the BinaryTree a BallTree
#
cdef void allocate_data(BinaryTree bt, ITYPE_t n_nodes, ITYPE_t n_features):
bt.node_bounds = np.zeros((1, n_nodes, n_features), dtype=DTYPE)
cdef void init_node(BinaryTree bt, ITYPE_t i_node,
ITYPE_t idx_start, ITYPE_t idx_end):
cdef ITYPE_t n_features = bt.data.shape[1]
cdef ITYPE_t n_points = idx_end - idx_start
cdef ITYPE_t i, j
cdef DTYPE_t radius
cdef DTYPE_t *this_pt
cdef ITYPE_t* idx_array = &bt.idx_array[0]
cdef DTYPE_t* data = &bt.data[0, 0]
cdef DTYPE_t* centroid = &bt.node_bounds[0, i_node, 0]
# determine Node centroid
for j in range(n_features):
centroid[j] = 0
for i in range(idx_start, idx_end):
this_pt = data + n_features * idx_array[i]
for j from 0 <= j < n_features:
centroid[j] += this_pt[j]
for j in range(n_features):
centroid[j] /= n_points
# determine Node radius
radius = 0
for i in range(idx_start, idx_end):
radius = fmax(radius,
bt.rdist(centroid,
data + n_features * idx_array[i],
n_features))
bt.node_data[i_node].radius = bt.dm.rdist_to_dist(radius)
bt.node_data[i_node].idx_start = idx_start
bt.node_data[i_node].idx_end = idx_end
cdef inline DTYPE_t min_dist(BinaryTree bt, ITYPE_t i_node, DTYPE_t* pt):
cdef DTYPE_t dist_pt = bt.dist(pt, &bt.node_bounds[0, i_node, 0],
bt.data.shape[1])
return fmax(0, dist_pt - bt.node_data[i_node].radius)
cdef inline DTYPE_t max_dist(BinaryTree bt, ITYPE_t i_node, DTYPE_t* pt):
cdef DTYPE_t dist_pt = bt.dist(pt, &bt.node_bounds[0, i_node, 0],
bt.data.shape[1])
return dist_pt + bt.node_data[i_node].radius
cdef inline void min_max_dist(BinaryTree bt, ITYPE_t i_node, DTYPE_t* pt,
DTYPE_t* min_dist, DTYPE_t* max_dist):
cdef DTYPE_t dist_pt = bt.dist(pt, &bt.node_bounds[0, i_node, 0],
bt.data.shape[1])
cdef DTYPE_t rad = bt.node_data[i_node].radius
min_dist[0] = fmax(0, dist_pt - rad)
max_dist[0] = dist_pt + rad
cdef inline DTYPE_t min_rdist(BinaryTree bt, ITYPE_t i_node, DTYPE_t* pt):
if bt.euclidean:
return euclidean_dist_to_rdist(min_dist(bt, i_node, pt))
else:
return bt.dm.dist_to_rdist(min_dist(bt, i_node, pt))
cdef inline DTYPE_t max_rdist(BinaryTree bt, ITYPE_t i_node, DTYPE_t* pt):
if bt.euclidean:
return euclidean_dist_to_rdist(max_dist(bt, i_node, pt))
else:
return bt.dm.dist_to_rdist(max_dist(bt, i_node, pt))
cdef inline DTYPE_t min_dist_dual(BinaryTree bt1, ITYPE_t i_node1,
BinaryTree bt2, ITYPE_t i_node2):
cdef DTYPE_t dist_pt = bt1.dist(&bt2.node_bounds[0, i_node2, 0],
&bt1.node_bounds[0, i_node1, 0],
bt1.data.shape[1])
return fmax(0, (dist_pt - bt1.node_data[i_node1].radius
- bt2.node_data[i_node2].radius))
cdef inline DTYPE_t max_dist_dual(BinaryTree bt1, ITYPE_t i_node1,
BinaryTree bt2, ITYPE_t i_node2):
cdef DTYPE_t dist_pt = bt1.dist(&bt2.node_bounds[0, i_node2, 0],
&bt1.node_bounds[0, i_node1, 0],
bt1.data.shape[1])
return (dist_pt + bt1.node_data[i_node1].radius
+ bt2.node_data[i_node2].radius)
cdef inline DTYPE_t min_rdist_dual(BinaryTree bt1, ITYPE_t i_node1,
BinaryTree bt2, ITYPE_t i_node2):
if bt1.euclidean:
return euclidean_dist_to_rdist(min_dist_dual(bt1, i_node1,
bt2, i_node2))
else:
return bt1.dm.dist_to_rdist(min_dist_dual(bt1, i_node1,
bt2, i_node2))
cdef inline DTYPE_t max_rdist_dual(BinaryTree bt1, ITYPE_t i_node1,
BinaryTree bt2, ITYPE_t i_node2):
if bt1.euclidean:
return euclidean_dist_to_rdist(max_dist_dual(bt1, i_node1,
bt2, i_node2))
else:
return bt1.dm.dist_to_rdist(max_dist_dual(bt1, i_node1,
bt2, i_node2))