-
Notifications
You must be signed in to change notification settings - Fork 5
/
kd_tree.pyx
142 lines (106 loc) · 4.73 KB
/
kd_tree.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!python
#cython: boundscheck=False
#cython: wraparound=False
#cython: cdivision=True
__all__ = ['KDTree']
DOC_DICT = {'BinaryTree':'KDTree'}
VALID_METRICS = ['EuclideanDistance', 'ManhattanDistance', 'MinkowskiDistance']
include "binary_tree.pxi"
KDTree = BinaryTree
cdef void allocate_data(BinaryTree bt, ITYPE_t n_nodes, ITYPE_t n_features):
bt.node_bounds = np.zeros((2, n_nodes, n_features), dtype=DTYPE)
cdef void init_node(BinaryTree bt, ITYPE_t i_node,
ITYPE_t idx_start, ITYPE_t idx_end):
cdef ITYPE_t n_features = bt.data.shape[1]
cdef ITYPE_t i, j, idx_i
# determine Node bounds
for j in range(n_features):
bt.node_bounds[0, i_node, j] = INF
bt.node_bounds[1, i_node, j] = -INF
for i in range(idx_start, idx_end):
idx_i = bt.idx_array[i]
for j in range(n_features):
bt.node_bounds[0, i_node, j] =\
fmin(bt.node_bounds[0, i_node, j],
bt.data[idx_i, j])
bt.node_bounds[1, i_node, j] =\
fmax(bt.node_bounds[1, i_node, j],
bt.data[idx_i, j])
bt.node_data[i_node].idx_start = idx_start
bt.node_data[i_node].idx_end = idx_end
cdef DTYPE_t min_rdist(BinaryTree bt, ITYPE_t i_node, DTYPE_t* pt):
cdef ITYPE_t n_features = bt.data.shape[1]
cdef DTYPE_t d, d_lo, d_hi, rdist=0.0
cdef ITYPE_t j
# here we'll use the fact that x + abs(x) = 2 * max(x, 0)
for j in range(n_features):
d_lo = bt.node_bounds[0, i_node, j] - pt[j]
d_hi = pt[j] - bt.node_bounds[1, i_node, j]
d = (d_lo + fabs(d_lo)) + (d_hi + fabs(d_hi))
rdist += pow(0.5 * d, bt.dm.p)
return rdist
cdef DTYPE_t min_dist(BinaryTree bt, ITYPE_t i_node, DTYPE_t* pt):
return pow(min_rdist(bt, i_node, pt), 1. / bt.dm.p)
cdef DTYPE_t max_rdist(BinaryTree bt, ITYPE_t i_node, DTYPE_t* pt):
cdef ITYPE_t n_features = bt.data.shape[1]
cdef DTYPE_t d, d_lo, d_hi, rdist=0.0
cdef ITYPE_t j
for j in range(n_features):
d_lo = fabs(pt[j] - bt.node_bounds[0, i_node, j])
d_hi = fabs(pt[j] - bt.node_bounds[1, i_node, j])
rdist += pow(fmax(d_lo, d_hi), bt.dm.p)
return rdist
cdef DTYPE_t max_dist(BinaryTree bt, ITYPE_t i_node, DTYPE_t* pt):
return pow(max_rdist(bt, i_node, pt), 1. / bt.dm.p)
cdef inline void min_max_dist(BinaryTree bt, ITYPE_t i_node, DTYPE_t* pt,
DTYPE_t* min_dist, DTYPE_t* max_dist):
cdef ITYPE_t n_features = bt.data.shape[1]
cdef DTYPE_t d, d_lo, d_hi
cdef ITYPE_t j
min_dist[0] = 0.0
max_dist[0] = 0.0
# as above, use the fact that x + abs(x) = 2 * max(x, 0)
for j in range(n_features):
d_lo = bt.node_bounds[0, i_node, j] - pt[j]
d_hi = pt[j] - bt.node_bounds[1, i_node, j]
d = (d_lo + fabs(d_lo)) + (d_hi + fabs(d_hi))
min_dist[0] += pow(0.5 * d, bt.dm.p)
max_dist[0] += pow(fmax(fabs(d_lo), fabs(d_hi)), bt.dm.p)
min_dist[0] = pow(min_dist[0], 1. / bt.dm.p)
max_dist[0] = pow(max_dist[0], 1. / bt.dm.p)
cdef inline DTYPE_t min_rdist_dual(BinaryTree bt, ITYPE_t i_node1,
BinaryTree bt2, ITYPE_t i_node2):
cdef ITYPE_t n_features = bt.data.shape[1]
cdef DTYPE_t d, d1, d2, rdist=0.0
cdef DTYPE_t zero = 0.0
cdef ITYPE_t j
# here we'll use the fact that x + abs(x) = 2 * max(x, 0)
for j in range(n_features):
d1 = (bt.node_bounds[0, i_node1, j]
- bt2.node_bounds[1, i_node2, j])
d2 = (bt2.node_bounds[0, i_node2, j]
- bt.node_bounds[1, i_node1, j])
d = (d1 + fabs(d1)) + (d2 + fabs(d2))
rdist += pow(0.5 * d, bt.dm.p)
return rdist
cdef inline DTYPE_t min_dist_dual(BinaryTree bt1, ITYPE_t i_node1,
BinaryTree bt2, ITYPE_t i_node2):
return bt1.dm.rdist_to_dist(min_rdist_dual(bt1, i_node1,
bt2, i_node2))
cdef inline DTYPE_t max_rdist_dual(BinaryTree bt1, ITYPE_t i_node1,
BinaryTree bt2, ITYPE_t i_node2):
cdef ITYPE_t n_features = bt1.data.shape[1]
cdef DTYPE_t d, d1, d2, rdist=0.0
cdef DTYPE_t zero = 0.0
cdef ITYPE_t j
for j in range(n_features):
d1 = fabs(bt1.node_bounds[0, i_node1, j]
- bt2.node_bounds[1, i_node2, j])
d2 = fabs(bt1.node_bounds[1, i_node1, j]
- bt2.node_bounds[0, i_node2, j])
rdist += pow(fmax(d1, d2), bt1.dm.p)
return rdist
cdef inline DTYPE_t max_dist_dual(BinaryTree bt1, ITYPE_t i_node1,
BinaryTree bt2, ITYPE_t i_node2):
return bt1.dm.rdist_to_dist(max_rdist_dual(bt1, i_node1,
bt2, i_node2))