From c7b2f7502e840f09560aed98864f083577dbf471 Mon Sep 17 00:00:00 2001 From: ishii-norimi Date: Thu, 29 Dec 2022 11:55:38 +0900 Subject: [PATCH] Add STING --- README.md | 2 +- js/model_selector.js | 2 +- js/view/sting.js | 13 ++- lib/model/sting.js | 168 +++++++++++++++++++++++++++------- tests/gui/view/sting.test.js | 40 ++++++++ tests/lib/model/sting.test.js | 25 +++++ 6 files changed, 212 insertions(+), 38 deletions(-) create mode 100644 tests/gui/view/sting.test.js create mode 100644 tests/lib/model/sting.test.js diff --git a/README.md b/README.md index 41f737d4a..b3a426f60 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ for (let i = 0; i < n; i++) { | task | model | | ---- | ----- | -| clustering | (Soft / Kernel / Genetic / Weighted / Bisecting) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, k-harmonic means, MacQueen, Hartigan-Wong, Elkan, Hamelry, Drake, Yinyang, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, DTSCAN, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, DOC, FastDOC, DiSH, LMCLUS, NMF, Autoencoder | +| clustering | (Soft / Kernel / Genetic / Weighted / Bisecting) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, k-harmonic means, MacQueen, Hartigan-Wong, Elkan, Hamelry, Drake, Yinyang, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, DTSCAN, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, STING, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, DOC, FastDOC, DiSH, LMCLUS, NMF, Autoencoder | | classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, (Multiclass / Kernel) Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, ENaN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, (Bounded) Online gradient descent, (Budgeted online) Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, (Multiclass) BSGD, ILK, SILK, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter / Tightest) Perceptron, PAUM, RBP, ADALINE, MADALINE, MLP, ELM, LMNN | | semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, S3VM, Ladder network | | regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MARS, MLP, ELM, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median | diff --git a/js/model_selector.js b/js/model_selector.js index 3a5281996..2ecf558c2 100644 --- a/js/model_selector.js +++ b/js/model_selector.js @@ -146,7 +146,7 @@ const AIMethods = [ '': [ { value: 'mutual_knn', title: 'Mutual kNN' }, { value: 'art', title: 'Adaptive resonance theory' }, - //{ value: "sting", title: "STING" }, + { value: 'sting', title: 'STING' }, { value: 'svc', title: 'Support vector clustering' }, { value: 'affinity_propagation', title: 'Affinity Propagation' }, { value: 'cast', title: 'CAST' }, diff --git a/js/view/sting.js b/js/view/sting.js index afb986f60..f06cbdb68 100644 --- a/js/view/sting.js +++ b/js/view/sting.js @@ -10,13 +10,16 @@ export default function (platform) { } const controller = new Controller(platform) const fitModel = () => { - const model = new STING() + const model = new STING(c.value) model.fit(platform.trainInput) - //const pred = model.predict(platform.trainInput); - //platform.trainResult = pred.map(v => v + 1) - //clusters.value = new Set(pred).size + const pred = model.predict(platform.trainInput) + platform.trainResult = pred.map(v => v + 1) + clusters.value = new Set(pred.filter(v => v >= 0)).size + const tilePred = model.predict(platform.testInput(4)) + platform.testResult(tilePred.map(v => (v < 0 ? -1 : v + 1))) } - const stepButton = controller.input.button('Fit').on('click', fitModel) + const c = controller.input.number({ label: 'c', min: 0, max: 10000, value: 500 }) + controller.input.button('Fit').on('click', fitModel) const clusters = controller.text({ label: ' Clusters: ' }) } diff --git a/lib/model/sting.js b/lib/model/sting.js index 43ef199e6..6db79ddbe 100644 --- a/lib/model/sting.js +++ b/lib/model/sting.js @@ -1,12 +1,16 @@ /** * STatistical INformation Grid-based method - * @deprecated Not implemented */ export default class STING { // https://en.wikipedia.org/wiki/Cluster_analysis // "STING : A Statistical Information Grid Approach to Spatial Data Mining" - constructor() { + /** + * @param {number} c specified density + */ + constructor(c) { + this._c = c this._cells = null + this._t = 0.05 } /** @@ -24,14 +28,14 @@ export default class STING { ranges: ranges, children: [], } - let stack = [this._cells] + let layer = [this._cells] const spl_size = 2 ** dim - const average_number = 20 + const average_number = 5 const max_depth = Math.log(n / average_number) / Math.log(spl_size) - const cells = [stack] + const cells = [layer] for (let a = 0; a < max_depth; a++) { const new_stack = [] - for (const c of stack) { + for (const c of layer) { const rng = c.ranges for (let i = 0; i < spl_size; i++) { let p = i @@ -53,53 +57,141 @@ export default class STING { c.children.push(t) } } - stack = new_stack - cells.push(stack) + layer = new_stack + cells.push(layer) + } + + let bottomSpace = 1 + for (let d = 0; d < dim; d++) { + const range = layer[0].ranges[d] + bottomSpace *= range[1] - range[0] } - for (let i = 0; i < stack.length; i++) { - const c = stack[i] + for (let i = 0; i < layer.length; i++) { + const c = layer[i] const d = x.filter(v => { return c.ranges.every((r, i) => r[0] <= v[i] && (r[1] === maxs[i] ? v[i] <= r[1] : v[i] < r[1])) }) - const n = (c.n = d.length) - const m = Array(dim).fill(0) - const min = (c.min = Array(dim).fill(Infinity)) - const max = (c.max = Array(dim).fill(-Infinity)) - for (let j = 0; j < n; j++) { + c.n = d.length + c.min = Array(dim).fill(Infinity) + c.max = Array(dim).fill(-Infinity) + const sum = Array(dim).fill(0) + for (let j = 0; j < c.n; j++) { for (let k = 0; k < dim; k++) { - m[k] += d[j][k] - min[k] = Math.min(min[k], d[j][k]) - max[k] = Math.max(max[k], d[j][k]) + sum[k] += d[j][k] + c.min[k] = Math.min(c.min[k], d[j][k]) + c.max[k] = Math.max(c.max[k], d[j][k]) } } - c.m = m.map(v => (n > 0 ? v / n : 0)) + c.m = sum.map(v => (c.n > 0 ? v / c.n : 0)) const s = Array(dim).fill(0) - for (let j = 0; j < n; j++) { + for (let j = 0; j < c.n; j++) { for (let k = 0; k < dim; k++) { - s[k] += (d[j][k] - m[k]) ** 2 + s[k] += (d[j][k] - c.m[k]) ** 2 } } - c.s = s.map(v => (n > 0 ? Math.sqrt(v / n) : 0)) + c.s = s.map(v => (c.n > 0 ? Math.sqrt(v / c.n) : 0)) + c.dist = Array(dim).fill('normal') + + c.area = bottomSpace } for (let k = cells.length - 2; k >= 0; k--) { for (let i = 0; i < cells[k].length; i++) { - let n = 0 - const m = Array(dim).fill(0) + let nki = 0 + let aki = 0 + const sum = Array(dim).fill(0) const min = (cells[k][i].min = Array(dim).fill(Infinity)) const max = (cells[k][i].max = Array(dim).fill(-Infinity)) const s = Array(dim).fill(0) - for (const ccell of cells[k + 1].slice(i * spl_size, (i + 1) * spl_size)) { - n += ccell.n + const ccells = cells[k + 1].slice(i * spl_size, (i + 1) * spl_size) + for (const ccell of ccells) { + nki += ccell.n + aki += ccell.area for (let p = 0; p < dim; p++) { - m[p] += ccell.m[p] * ccell.n + sum[p] += ccell.m[p] * ccell.n min[p] = Math.min(min[p], ccell.min[p]) max[p] = Math.max(max[p], ccell.max[p]) s[p] += (ccell.s[p] ** 2 + ccell.m[p] ** 2) * ccell.n } } - cells[k][i].n = n - cells[k][i].m = m.map(v => (n > 0 ? v / n : 0)) - cells[k][i].s = s.map((v, p) => (n > 0 ? Math.sqrt(v / n - m[p] ** 2) : 0)) + cells[k][i].n = nki + cells[k][i].m = sum.map(v => (nki > 0 ? v / nki : 0)) + cells[k][i].s = s.map((v, p) => (nki > 0 ? Math.sqrt(v / nki - (sum[p] / nki) ** 2) : 0)) + const eps = 0.1 + cells[k][i].dist = Array(dim).fill('normal') + cells[k][i].area = aki + for (let d = 0; d < dim; d++) { + let confl = 0 + let dist = 'normal' + for (const ccell of ccells) { + let mdiff = 0 + let sdiff = 0 + if (cells[k][i].m[d] !== 0) { + mdiff += Math.abs((cells[k][i].m[d] - ccell.m[d]) / cells[k][i].m[d]) + } else if (ccell.m[d] !== 0) { + mdiff += Math.abs((cells[k][i].m[d] - ccell.m[d]) / ccell.m[d]) + } + if (cells[k][i].s[d] !== 0) { + sdiff += Math.abs((cells[k][i].s[d] - ccell.s[d]) / cells[k][i].s[d]) + } else if (ccell.s[d] !== 0) { + sdiff += Math.abs((cells[k][i].s[d] - ccell.s[d]) / ccell.s[d]) + } + if (dist !== ccell.dist && mdiff < eps && sdiff < eps) { + confl += ccell.n + } else if (mdiff >= eps || sdiff >= eps) { + confl = nki + } + } + if (nki > 0 && confl / nki > this._t) { + dist = 'none' + } + cells[k][i].dist[d] = dist + } + } + } + + let relevantCells = [this._cells] + for (let k = 1; k < cells.length; k++) { + const childRelevantCells = [] + for (let i = 0; i < relevantCells.length; i++) { + for (const child of relevantCells[i].children) { + if (child.n < child.area * this._c) { + continue + } + childRelevantCells.push(child) + } + } + relevantCells = childRelevantCells + } + + this._clusters = [] + const stack = [] + while (true) { + if (stack.length === 0) { + if (relevantCells.length === 0) { + break + } + stack.push(relevantCells.pop()) + this._clusters.push([]) + } + const curcell = stack.pop() + this._clusters[this._clusters.length - 1].push(curcell) + + for (let k = relevantCells.length - 1; k >= 0; k--) { + const c = relevantCells[k] + let adjointCnt = 0 + for (let d = 0; d < dim && adjointCnt < 2; d++) { + if (curcell.ranges[d][0] === c.ranges[d][0] && curcell.ranges[d][1] === c.ranges[d][1]) { + continue + } else if (curcell.ranges[d][0] === c.ranges[d][1] || curcell.ranges[d][1] === c.ranges[d][0]) { + adjointCnt++ + } else { + adjointCnt = Infinity + } + } + if (adjointCnt === 1) { + stack.push(c) + relevantCells.splice(k, 1) + } } } } @@ -109,5 +201,19 @@ export default class STING { * @param {Array>} datas Sample data * @returns {number[]} Predicted values */ - predict(datas) {} + predict(datas) { + const p = [] + for (let i = 0; i < datas.length; i++) { + p[i] = -1 + for (let k = 0; k < this._clusters.length && p[i] < 0; k++) { + for (const cell of this._clusters[k]) { + if (datas[i].every((v, d) => cell.ranges[d][0] <= v && v <= cell.ranges[d][1])) { + p[i] = k + break + } + } + } + } + return p + } } diff --git a/tests/gui/view/sting.test.js b/tests/gui/view/sting.test.js new file mode 100644 index 000000000..277483245 --- /dev/null +++ b/tests/gui/view/sting.test.js @@ -0,0 +1,40 @@ +import { getPage } from '../helper/browser' + +describe('clustering', () => { + /** @type {Awaited>} */ + let page + beforeEach(async () => { + page = await getPage() + const taskSelectBox = page.locator('#ml_selector dl:first-child dd:nth-child(5) select') + await taskSelectBox.selectOption('CT') + const modelSelectBox = page.locator('#ml_selector .model_selection #mlDisp') + await modelSelectBox.selectOption('sting') + }) + + afterEach(async () => { + await page?.close() + }) + + test('initialize', async () => { + const methodMenu = page.locator('#ml_selector #method_menu') + const buttons = methodMenu.locator('.buttons') + + const c = buttons.locator('input:nth-of-type(1)') + await expect(c.inputValue()).resolves.toBe('500') + }) + + test('learn', async () => { + const methodMenu = page.locator('#ml_selector #method_menu') + const buttons = methodMenu.locator('.buttons') + + const clusters = buttons.locator('span:last-child') + await expect(clusters.textContent()).resolves.toBe('') + + const fitButton = buttons.locator('input[value=Fit]') + await fitButton.dispatchEvent('click') + + const svg = page.locator('#plot-area svg') + await expect(svg.locator('.datas circle').count()).resolves.toBe(300) + await expect(clusters.textContent()).resolves.toMatch(/^[0-9]+$/) + }) +}) diff --git a/tests/lib/model/sting.test.js b/tests/lib/model/sting.test.js new file mode 100644 index 000000000..6a1515c8c --- /dev/null +++ b/tests/lib/model/sting.test.js @@ -0,0 +1,25 @@ +import Matrix from '../../../lib/util/matrix.js' +import STING from '../../../lib/model/sting.js' + +import { randIndex } from '../../../lib/evaluate/clustering.js' + +test('clustering', () => { + const model = new STING(1) + const n = 50 + const x0 = Matrix.concat( + Matrix.concat(Matrix.randn(n, 2, 0, 0.1), Matrix.randn(n, 2, 5, 0.1)), + Matrix.randn(n, 2, [0, 5], 0.1) + ) + const x = x0.toArray() + + model.fit(x) + const y = model.predict(x) + expect(y).toHaveLength(x.length) + + const t = [] + for (let i = 0; i < x.length; i++) { + t[i] = Math.floor(i / n) + } + const ri = randIndex(y, t) + expect(ri).toBeGreaterThan(0.9) +})