Permalink
Browse files

a first implementation of density-based clustering.

  • Loading branch information...
Giuseppe Burtini
Giuseppe Burtini committed Dec 30, 2011
1 parent c0a6729 commit e1ec0fbdec3947aa3f85f766c0a93a08bcad1d92
Showing with 63 additions and 2 deletions.
  1. +3 −2 lib/parametric/regression/gradient_descent.php
  2. +60 −0 lib/unsupervised/dbscan.php
@@ -24,8 +24,8 @@ public function train() {
if($distance <= $newDistance)
{
$badIterationsCount++;
- // we could autoresolve learning rate here by setting it equal to L.R./2
-
+ // line search and backtrack for an appropriate learning rate here.
+
if($badIterationsCount > $this->badIterationsThreshold)
throw new BadIterationsException("Distance is increasing on iterations. You probably want to set a lower learning rate.");
} else {
@@ -95,6 +95,7 @@ protected function distance() {
return $result;
}
+ // computes gradients by passing in with_regard_to_index.
protected function distanceDerivative($with_regard_to_index) {
$data_count = count($this->ys);
$result = 0;
@@ -0,0 +1,60 @@
+<?php
+/*
+ * Density-Based Clustering
+ *
+ * Domenica Arlia, Massimo Coppola. "Experiments in Parallel Clustering with DBSCAN". Euro-Par 2001: Parallel Processing: 7th International Euro-Par Conference Manchester, UK August 28–31, 2001, Proceedings. Springer Berlin.
+ * Hans-Peter Kriegel, Peer Kröger, Jörg Sander, Arthur Zimek (2011). "Density-based Clustering". WIREs Data Mining and Knowledge Discovery 1 (3): 231–240. doi:10.1002/widm.30.
+ */
+
+function ll_dbscan($data, $e, $minimumPoints=10) {
+ $clusters = array();
+ $visited = array();
+
+ foreach($data as $index=>$datum) {
+ if(in_array($index, $visited))
+ continue;
+
+ $visited[] = $index;
+
+ $regionPoints = _ll_points_in_region(array($index=>$datum), $data, $e);
+ if(count($regionPoints) >= $minimumPoints) {
+ $clusters[] = _ll_expand_cluster(array($index=>$datum), $regionPoints, $e, $minimumPoints, &$visited);
+ }
+ }
+}
+
+function _ll_points_in_region($point, $data, $epsilon) {
+ $region = array();
+ foreach($data as $index=>$datum) {
+ if(ll_euclidian_distance($point, $datum) < $epsilon) {
+ $region[$index] = $datum;
+ }
+ }
+ return $region;
+}
+
+function _ll_expand_cluster($point, $data, $epsilon, $minimumPoints, &$visited) {
+ $cluster[] = $point;
+
+ foreach($data as $index=>$datum) {
+ if(!in_array($index, $visited)) {
+ $visited[] = $index;
+ $regionPoints = _ll_points_in_region(array($index=>$datum), $data, $epsilon);
+
+ if(count($regionPoints) > $minimumPoints) {
+ $cluster = _ll_join_clusters($regionPoints, $cluster);
+ }
+ }
+
+ // supposed to check if it belongs to any clusters here.
+ // only add the point if it isn't clustered yet.
+ $cluster[] = array($index=>$datum);
+ }
+}
+
+function _ll_join_clusters($one, $two) {
+ return array_merge($one, $two);
+}
+
+
+?>

0 comments on commit e1ec0fb

Please sign in to comment.