/
initialize.py
75 lines (58 loc) · 2.46 KB
/
initialize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"Implements get_initial_parameters"
from numpy import ones, hstack, zeros, tile, argmin
from numpy.linalg import lstsq, matrix_rank
from numpy.random import permutation as randperm
# pylint: disable=too-many-locals
def get_initial_parameters(x, y, K):
"""
Initializes max-affine fit to data (y, x)
ensures that initialization has at least K+1 points per partition (i.e.
per affine function)
Arguments:
----------
x: Independent variable data
2D column vector [nPoints x nDims]
y: Dependent variable data
2D column vector [nPoints x 1]
Returns:
--------
ba: Initial b and a parameters
2D array [(dimx+1) x k]
"""
npt, dimx = x.shape
if K*(dimx + 1) > npt:
raise ValueError("Not enough data points")
X = hstack((ones((npt, 1)), x))
b = zeros((dimx + 1, K))
randinds = randperm(npt)[0:K] # Choose K unique indices
# partition based on distances
sqdists = zeros((npt, K))
for k in range(K):
sqdists[:, k] = ((x - tile(x[randinds[k], :], (npt, 1)))**2).sum(1)
# index to closest k for each data pt
mindistind = argmin(sqdists, axis=1)
# loop through each partition, making local fits
# note we expand partitions that result in singular least squares problems
# why this way? some points will be shared by multiple partitions, but
# resulting max-affine fit will tend to be good. (as opposed to solving
# least-norm version)
for k in range(K):
inds = mindistind == k
# before fitting, check rank and increase partition size if necessary
# (this does create overlaps)
if matrix_rank(X[inds, :]) < dimx + 1:
sortdistind = sqdists[:, k].argsort()
i = sum(inds) # number of points in partition
if i < dimx + 1:
# obviously, at least need dimx+1 points. fill these in before
# checking any ranks
inds[sortdistind[i + 1 : dimx + 1]] = 1 # TODO: check index
i = dimx + 1 # TODO: check index
# now add points until rank condition satisfied
while matrix_rank(X[inds, :]) < dimx + 1:
i = i + 1
inds[sortdistind[i]] = 1
# now create the local fit
b[:, k] = lstsq(X[inds.nonzero()], y[inds.nonzero()], rcond=-1)[0][:, 0]
# Rank condition specified to default for python upgrades
return b