/
kliep.js
132 lines (114 loc) · 3.32 KB
/
kliep.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import Matrix from '../util/matrix.js'
/**
* Kullback-Leibler importance estimation procedure
*/
export default class KLIEP {
// https://github.com/hoxo-m/densratio
/**
* @param {number[]} sigma Sigmas of normal distribution
* @param {number} fold Number of folds
* @param {number} kernelNum Number of kernels
*/
constructor(sigma, fold, kernelNum) {
this._sigma_cand = sigma
this._fold = fold
this._kernelNum = kernelNum
}
_kernel_gaussian(x, c, s) {
const k = []
for (let i = 0; i < c.rows; i++) {
const ki = []
for (let j = 0; j < x.rows; j++) {
const r = Matrix.sub(c.row(i), x.row(j))
ki.push(Math.exp(-r.reduce((ss, v) => ss + v ** 2, 0) / (2 * s ** 2)))
}
k.push(ki)
}
return Matrix.fromArray(k)
}
_optimize_alpha(a, b) {
const c = Matrix.div(b, b.norm() ** 2)
let alpha = Matrix.ones(a.rows, 1)
alpha.add(Matrix.mult(c, 1 - b.tDot(alpha).toScaler()))
alpha.map(v => (v < 0 ? 0 : v))
alpha.div(b.tDot(alpha).toScaler())
let score = a.tDot(alpha)
score.map(Math.log)
score = score.mean()
for (let k = 3; k >= -3; k--) {
const epsilon = 10 ** k
const epsa = Matrix.mult(a, epsilon)
for (let i = 0; i < 100; i++) {
const alpha0 = epsa.dot(Matrix.div(1, a.tDot(alpha)))
alpha0.add(alpha)
alpha0.add(Matrix.mult(c, 1 - b.tDot(alpha0).toScaler()))
alpha0.map(v => (v < 0 ? 0 : v))
alpha0.div(b.tDot(alpha0).toScaler())
let newScore = a.tDot(alpha0)
newScore.map(Math.log)
newScore = newScore.mean()
if (newScore <= score) {
break
}
alpha = alpha0
score = newScore
}
}
return alpha
}
/**
* Fit model.
*
* @param {Array<Array<number>>} x1 Numerator data
* @param {Array<Array<number>>} x2 Denominator data
*/
fit(x1, x2) {
x1 = Matrix.fromArray(x1)
x2 = Matrix.fromArray(x2)
const n1 = x1.rows
const kn = Math.min(this._kernelNum, n1)
const centers = (this._centers = x1.sample(kn)[0])
this._sigma = this._sigma_cand[0]
if (this._sigma_cand.length > 1) {
let best_score = -Infinity
const cvCls = Array.from({ length: n1 }, (_, i) => i % this._fold)
for (const sgm of this._sigma_cand) {
const phi1 = this._kernel_gaussian(x1, centers, sgm)
const phi2 = this._kernel_gaussian(x2, centers, sgm)
const phi2mean = phi2.mean(1)
for (let i = cvCls.length - 1; i > 0; i--) {
let r = Math.floor(Math.random() * (i + 1))
;[cvCls[i], cvCls[r]] = [cvCls[r], cvCls[i]]
}
let totalScore = 0
for (let i = 0; i < this._fold; i++) {
const idx = cvCls.map(c => c === i)
const nidx = cvCls.map(c => c !== i)
const alpha = this._optimize_alpha(phi1.col(idx), phi2mean)
const score = phi1.col(nidx).tDot(alpha)
score.map(Math.log)
totalScore += score.mean()
}
totalScore /= this._fold
if (totalScore > best_score) {
best_score = totalScore
this._sigma = sgm
}
}
}
const phi1 = this._kernel_gaussian(x1, centers, this._sigma)
const phi2 = this._kernel_gaussian(x2, centers, this._sigma)
this._kw = this._optimize_alpha(phi1, phi2.mean(1))
}
/**
* Returns estimated values.
*
* @param {Array<Array<number>>} x Sample data
* @returns {number[]} Predicted values
*/
predict(x) {
x = Matrix.fromArray(x)
const phi = this._kernel_gaussian(x, this._centers, this._sigma)
return phi.tDot(this._kw).value
}
}