/
hartigan_wong_kmeans.js
110 lines (105 loc) · 2.44 KB
/
hartigan_wong_kmeans.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/**
* Hartigan-Wong k-Means algorithm
*/
export default class HartiganWongKMeans {
// A K-Means Clustering Algorithm
// Hartigan's K-means versus Lloyd's K-means: is it time for a change?.
// https://www.ijcai.org/Proceedings/13/Papers/249.pdf
// https://en.wikipedia.org/wiki/K-means_clustering
/**
* @param {number} k Number of clusters
*/
constructor(k) {
this._k = k
this._c = null
this._n = null
this._d = (a, b) => Math.sqrt(a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0))
}
/**
* Centroids
*
* @type {Array<Array<number>>}
*/
get centroids() {
return this._c
}
/**
* Initialize this model.
*
* @param {Array<Array<number>>} datas Training data
*/
init(datas) {
this._x = datas
const n = this._x.length
this._p = []
this._c = []
this._n = Array(this._k).fill(0)
for (let i = 0; i < n; i++) {
if (this._c.length < this._k) {
this._p[i] = this._c.length
this._c.push(this._x[i].concat())
this._n[this._p[i]]++
continue
} else {
this._p[i] = Math.floor(Math.random() * this._k)
for (let j = 0; j < this._x[i].length; j++) {
this._c[this._p[i]][j] += this._x[i][j]
}
}
this._n[this._p[i]]++
}
for (let k = 0; k < this._k; k++) {
this._c[k] = this._c[k].map(v => v / this._n[k])
}
}
/**
* Fit model.
*/
fit() {
const n = this._x.length
for (let i = 0; i < n; i++) {
const p = this._p[i]
const d = (this._n[p] / (this._n[p] - 1)) * this._d(this._x[i], this._c[p]) ** 2
let max_d = 0
let max_k = -1
for (let k = 0; k < this._k; k++) {
if (p === k) continue
const delta = d - (this._n[k] / (this._n[k] + 1)) * this._d(this._x[i], this._c[k]) ** 2
if (delta > max_d) {
max_d = delta
max_k = k
}
}
if (max_k >= 0) {
this._c[p] = this._c[p].map((v, j) => (this._n[p] * v - this._x[i][j]) / (this._n[p] - 1))
this._n[p]--
this._c[max_k] = this._c[max_k].map(
(v, j) => (this._n[max_k] * v + this._x[i][j]) / (this._n[max_k] + 1)
)
this._n[max_k]++
this._p[i] = max_k
}
}
}
/**
* Returns predicted categories.
*
* @param {Array<Array<number>>} datas Sample data
* @returns {number[]} Predicted values
*/
predict(datas) {
const p = []
for (let i = 0; i < datas.length; i++) {
let min_d = Infinity
p[i] = -1
for (let k = 0; k < this._c.length; k++) {
const d = this._d(datas[i], this._c[k])
if (d < min_d) {
min_d = d
p[i] = k
}
}
}
return p
}
}