forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
_hierarchical.pyx
135 lines (117 loc) · 3.73 KB
/
_hierarchical.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import numpy as np
cimport numpy as np
cimport cython
ctypedef np.float64_t DOUBLE
ctypedef np.int_t INT
ctypedef np.int8_t INT8
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def compute_ward_dist(np.ndarray[DOUBLE, ndim=1] m_1,\
np.ndarray[DOUBLE, ndim=2] m_2,\
np.ndarray[INT, ndim=1] coord_row,
np.ndarray[INT, ndim=1] coord_col,\
np.ndarray[DOUBLE, ndim=1] res):
cdef int size_max = coord_row.shape[0]
cdef int n_features = m_2.shape[1]
cdef int i, j, row, col
cdef DOUBLE pa, n
for i in range(size_max):
row = coord_row[i]
col = coord_col[i]
n = (m_1[row] * m_1[col]) / (m_1[row] + m_1[col])
pa = 0.
for j in range(n_features):
pa += (m_2[row, j] / m_1[row] - m_2[col, j] / m_1[col])**2
res[i] = pa * n
return res
def _hc_get_descendent(int node, children, int n_leaves):
"""
Function returning all the descendent leaves of a set of nodes in the tree.
Parameters
----------
node : int
The node for which we want the descendents.
children : list of pairs. Length of n_nodes
List of the children of each nodes.
This is not defined for leaves.
n_leaves : int
Number of leaves.
Return
------
descendent : list of int
"""
ind = [node]
if node < n_leaves:
return ind
descendent = []
# It is actually faster to do the accounting of the number of
# elements is the list ourselves: len is a lengthy operation on a
# chained list
cdef int n_indices = 1
cdef int i
while n_indices:
i = ind.pop()
if i < n_leaves:
descendent.append(i)
n_indices -= 1
else:
ind.extend(children[i - n_leaves])
n_indices += 1
return descendent
@cython.boundscheck(False)
@cython.wraparound(False)
def hc_get_heads(np.ndarray[INT, ndim=1] parents, copy=True):
""" Return the heads of the forest, as defined by parents
Parameters
===========
parents: array of integers
The parent structure defining the forest (ensemble of trees)
copy: boolean
If copy is False, the input 'parents' array is modified inplace
Returns
=======
heads: array of integers of same shape as parents
The indices in the 'parents' of the tree heads
"""
cdef unsigned int parent, node0, node, size
if copy:
parents = np.copy(parents)
size = parents.size
for node0 in range(size):
# Start from the top of the tree and go down
node0 = size - node0 - 1
node = node0
parent = parents[node]
while parent != node:
parents[node0] = parent
node = parent
parent = parents[node]
return parents
@cython.boundscheck(False)
@cython.wraparound(False)
def _get_parents(nodes, heads, np.ndarray[INT, ndim=1] parents,
np.ndarray[INT8, ndim=1] not_visited):
""" Return the heads of the given nodes, as defined by parents
Modifies in-place 'heads' and 'not_visited'
Parameters
===========
nodes: list of integers
The nodes to start from
heads: list of integers
A list to hold the results (modified inplace)
parents: array of integers
The parent structure defining the tree
not_visited:
The tree nodes to consider (modified inplace)
"""
cdef unsigned int parent, node
for node in nodes:
parent = parents[node]
while parent != node:
node = parent
parent = parents[node]
if not_visited[node]:
not_visited[node] = 0
heads.append(node)
return heads