Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
goldensunliu committed Aug 31, 2018
0 parents commit 309a006
Show file tree
Hide file tree
Showing 9 changed files with 3,952 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
*.iml
*.ipr
*.iws
node_modules
dist/
package-lock.json
5 changes: 5 additions & 0 deletions .npmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*.iml
*.ipr
*.iws
!dist/
webpack.config.js
22 changes: 22 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"name": "major-colors",
"version": "0.1.0",
"description": "color segmentation using kmeans+++ for clustering and CIEDE2000 algorithm for color distance",
"main": "dist/lib.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"webpack": "webpack"
},
"author": "Sitian Liu <goldensunliu@gmail.com> (https://www.sitianliu.com/)",
"license": "MIT",
"devDependencies": {
"@babel/core": "^7.0.0",
"@babel/plugin-proposal-object-rest-spread": "^7.0.0",
"@babel/preset-env": "^7.0.0",
"babel-loader": "^8.0.0",
"babel-plugin-transform-class-properties": "^6.24.1",
"fast-memoize": "^2.5.1",
"webpack": "^4.17.1",
"webpack-cli": "^3.1.0"
}
}
262 changes: 262 additions & 0 deletions src/cluster.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
/**
* given a set of n weights,
* choose a value in the range 0..n-1
* at random using weights as a distribution.
*
* @param {*} weights
* @param normalizationWeight
*/
function weightedRandomIndex(weights, normalizationWeight) {
const n = weights.length;
if(typeof normalizationWeight !== 'number') {
normalizationWeight = 0.0;
for(let i = 0; i < n; i += 1) {
normalizationWeight += weights[i];
}
}


const r = Math.random(); // uniformly random number 0..1 (a probability)
let cumulativeWeight = 0.0;
for(let i = 0; i < n; i += 1) {
//
// use the uniform probability to search
// within the normalized weighting (we divide by totalWeight to normalize).
// once we hit the probability, we have found our index.
//
cumulativeWeight += weights[i] / normalizationWeight;
if(cumulativeWeight > r) {
return i;
}
}

throw Error("algorithmic failure choosing weighted random index");
}

/**
* determine if two consecutive set of clusters are converged;
* the clusters are converged if the cluster assignments are the same.
*
* @param {*} model - object with observations, centroids, assignments
* @param {*} newModel - object with observations, centroids, assignments
*/
function assignmentsConverged(model, newModel) {
function arraysEqual(a, b) {
if (a === b) return true;
if (a === undefined || b === undefined) return false;
if (a === null || b === null) return false;
if (a.length !== b.length) return false;

// If you don't care about the order of the elements inside
// the array, you should sort both arrays here.

for (let i = 0; i < a.length; ++i) {
if (a[i] !== b[i]) return false;
}
return true;
}

return arraysEqual(model.assignments, newModel.assignments);
}

function distanceSquared(p, q) {
const d = p.length; // dimension of vectors

if(d !== q.length) throw Error("p and q vectors must be the same length");

let sum = 0;
for(let i = 0; i < d; i += 1) {
sum += (p[i] - q[i])**2
}
return sum;
}

function distance(p, q) {
return Math.sqrt(distanceSquared(p, q));
}

/**
* Calculate the centroid for the given observations.
* This takes the average of all observations (at each dimension).
* This average vector is the centroid for those observations.
*
* @return [number] centroid for given observations (vector of same dimension as observations)
* @param observations
*/
function calculateCentroid(observations) {
const n = observations.length; // number of observations
const d = observations[0].length; // dimension of vectors

// create zero vector of same dimension as observation
let centroid = [];
for(let i = 0; i < d; i += 1) {
centroid.push(0.0);
}

//
// sum all observations at each dimension
//
for(let i = 0; i < n; i += 1) {
//
// add the observation to the sum vector, element by element
// to prepare to calculate the average at each dimension.
//
for(let j = 0; j < d; j += 1) {
centroid[j] += observations[i][j];
}
}

//
// divide each dimension by the number of observations
// to create the average vector.
//
for(let j = 0; j < d; j += 1) {
centroid[j] /= n;
}

return centroid;
}

export default class Cluster {
constructor(options) {
this.distanceFn = options.distanceFn || distance;
this.convergedFn = options.convergedFn || assignmentsConverged;
this.maximumIterations = options.maximumIterations || 200;
this.debug = options.debug || false;
}

findClosestCentroid = (centroids, observation) => {
const k = centroids.length; // number of clusters/centroids

let centroid = 0;
let minDistance = this.distanceFn(centroids[0], observation);
for(let i = 1; i < k; i += 1) {
const dist = this.distanceFn(centroids[i], observation);
if(dist < minDistance) {
centroid = i;
minDistance = dist;
}
}
return centroid;
};

assignClusters = (centroids, observations) => {
const n = observations.length; // number of observations

const assignments = [];
for(let i = 0; i < n; i += 1) {
assignments.push(this.findClosestCentroid(centroids, observations[i]));
}

return assignments; // centroid index for each observation
};

kmeansStep = (centroids, observations) => {
const k = centroids.length; // number of clusters/centroids

// assign each observation to the nearest centroid to create clusters
const assignments = this.assignClusters(centroids, observations); // array of cluster indices that correspond observations

// calculate a new centroid for each cluster given the observations in the cluster
const newCentroids = [];
for(let i = 0; i < k; i += 1) {
// get the observations for this cluster/centroid
const clusteredObservations = observations.filter((v, j) => assignments[j] === i);

// calculate a new centroid for the observations
newCentroids.push(calculateCentroid(clusteredObservations));
}
return {'observations': observations, 'centroids': newCentroids, 'assignments': assignments }
};

clusterModel = (model) => {
const start = new Date();
// calculate new centroids and cluster assignments
let newModel = this.kmeansStep(model.centroids, model.observations);

// continue until centroids do not change (within given delta)
let i = 0;
while((i < this.maximumIterations) && !this.convergedFn(model, newModel)) {
model = newModel; // new model is our model now
// calculate new centroids and cluster assignments
newModel = this.kmeansStep(model.centroids, model.observations);
i += 1;
}
const finish = new Date();
return {'model': newModel, 'iterations': i, 'durationMs': (finish.getTime() - start.getTime())};
};

cluster = (observations, k) => {
const model = this.init(observations, k);
return this.clusterModel(model)
};

init = (observations, k) => {

const n = observations.length;
const distanceToCloseCentroid = []; // distance D(x) to closest centroid for each observation
const centroids = []; // indices of observations that are chosen as centroids

//
// keep list of all observations' indices so
// we can remove centroids as they are created
// so they can't be chosen twice
//
const index = [];
for(let i = 0; i < n; i += 1) {
index[i] = i;
}

//
// 1. Choose one center uniformly at random from among the data points.
//
let centroidIndex = Math.floor(Math.random() * n);
centroids.push(centroidIndex);

for(let c = 1; c < k; c += 1) {
index.slice(centroids[c - 1], 1); // remove previous centroid from further consideration
distanceToCloseCentroid[centroids[c - 1]] = 0; // this effectively removes it from the probability distribution

//
// 2. For each data point x, compute D(x), the distance between x and
// the nearest center that has already been chosen.
//
// NOTE: we used the distance squared (L2 norm)
//
let totalWeight = 0.0;
for(let i = 0; i < index.length; i += 1) {
//
// if this is the first time through, the distance is undefined, so just set it.
// Otherwise, choose the minimum of the prior closest and this new centroid
//
const distanceToCentroid = this.distanceFn(observations[index[i]], observations[centroids[c - 1]]);
distanceToCloseCentroid[index[i]] =
(typeof distanceToCloseCentroid[index[i]] === 'number')
? Math.min(distanceToCloseCentroid[index[i]], distanceToCentroid)
: distanceToCentroid;
totalWeight += distanceToCloseCentroid[index[i]];
}

//
// 3. Choose one new data point at random as a new center,
// using a weighted probability distribution where a point x is chosen with probability proportional to D(x)^2.
//
centroidIndex = index[weightedRandomIndex(distanceToCloseCentroid, totalWeight)];
centroids.push(centroidIndex);

// 4. Repeat Steps 2 and 3 until k centers have been chosen.
}

//
// 5. Now that the initial centers have been chosen, proceed using
// standard k-means clustering. Return the model so that
// kmeans can continue.
//
return {
observations,
centroids: centroids.map(x => observations[x]), // map centroid index to centroid value
assignments: observations.map((x, i) => i % centroids.length) // distribute among centroids
}
}
}
Loading

0 comments on commit 309a006

Please sign in to comment.