In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import savemat, loadmat
from scipy.spatial import distance

class DBSCAN:
    
    def __init__(self, eps, min_pts):
        self.eps = eps
        self.min_pts = min_pts
        self.cluster_count = 0
        self.visited = []
        self.clusterID = []
    
    def clustering(self, data):
        self.init_params()
        self.data = data

        for point in data:
            if not self.visited_or_not(point, self.visited):
                self.visited.append(point)
                neighbour_pts = self.region_query(point)
                if len(neighbour_pts) < self.min_pts:
                    self.clusterID.append(-1)
                else:
                    self.expand_cluster(self.cluster_count, point, neighbour_pts)
                    self.cluster_count += 1

    def expand_cluster(self, cluster, point, neighbour_pts):
        
        self.clusterID.append(self.cluster_count)
        for p in neighbour_pts:
            if not self.visited_or_not(p, self.visited):
                self.visited.append(p)
                self.clusterID.append(self.cluster_count)
                np = self.region_query(p)
                if len(np) > 0:
                    for n in np:
                        if not self.visited_or_not(n, neighbour_pts):
                            neighbour_pts.append(n)
                     
    def region_query(self, point):
        result = []
        for d_point in self.data:
            if np.any(d_point!=point):
                if distance.euclidean(d_point, point) <= self.eps:
                    result.append(d_point)
        return result
    
    def visited_or_not(self, point, target_set):
        visited = False
        for d_point in target_set:
            if np.any(d_point==point):
                visited = True
                break
        return visited

    def init_params(self):
        self.cluster_count = 0
        self.clusterID = []
        self.visited = []   
        
Points = loadmat('DBSCAN-Points.mat')
X = Points['Points']

dbc = DBSCAN(0.12, 3)
dbc.clustering(X)

labels = np.copy(dbc.clusterID)
for i,point in enumerate(X):
    for index,d_point in enumerate(dbc.visited):
        if np.all(d_point==point):
            labels[i] = dbc.clusterID[index]

fig = plt.figure()
plt.scatter(X[:,0], X[:,1], 24, c=labels)
plt.show()

from sklearn.cluster import DBSCAN
db = DBSCAN(eps=0.12, min_samples=3).fit(X)

fig = plt.figure()
plt.scatter(X[:,0], X[:,1], 24, c=db.labels_)
plt.show()

In [None]:
fig.savefig('DBSCAN.jpg')

In [None]:
#!/usr/bin/python
from math import sqrt, pow
import scipy.io
import numpy as np
data = scipy.io.loadmat('DBSCAN-Points.mat')
points = list(data['Points'])
# points = np.concatenate((data['Points'],data['Points']), axis = 0)
# print points
# points = [(0,0), (1, 0), (1,1),  (2,2), (3,1),(3,0), (0,1), (3,2), (6,3)]
visited = []
def distance(p1, p2):
    result = sqrt(pow(p1[0] - p2[0], 2) + pow(p1[1] - p2[1], 2))
    # print "distance - "+str(p1)+", "+str(p2)+" - "+str(result)
    return result

def isVisited(point, points):
    for p in points:
        if np.any(p == point):
            return True
    return False
def getNeighbors(point, esp):
    result = []
    result.append(point)
    # print points

    for p in points:
        if (not isVisited(p, visited)):
            print len(visited)
            # print "distance - "+str(p) +": "+str(point)
            # print "distance - "+str(p)+": "+str(distance(p, point))
            if (distance(p, point) <= esp):
                # print "result"
                # print result
                # print getNeighbors(p,esp)
                # print "p is "+str(p)
                visited.append(p)
                # points.remove(p)
                result = result + getNeighbors(p,esp)
        # print p
        # print "result"
        # print result
    return result
        

cluster = []
noise = []
e = 0.15
minPts = 3

for point in points:
    if (not isVisited(point, visited)):
        # p = points.pop()
        print len(visited)
        visited.append(point)
        # print p
        # print points
        neighbors = getNeighbors(point, e)
        # print "neighbors"
        # print neighbors
        if (len(neighbors) <= minPts):
            noise.append(point)
        else:
            temp = []
            for n in neighbors:
                temp.append(n)
            # print "temp\n"
            # print temp
            cluster.append(temp)
        
# print distance((3,2), (3,1))

# print getNeighbors((3, 2), 1)
print "--------- result ---------"
# print points
print "number of cluster: "+str(len(cluster))
print cluster
print noise

# TODO 1: fix the following bug
# TODO 2: plot graph from result
# TODO 3: line 40 narray does not have pop()

# remark: (3,1) is skipped  
# (3, 2)
# [(0, 0), (1, 0), (1, 1), (2, 2), (3, 1), (3, 0), (0, 1)]
# distance - (0, 0), (3, 2) - 3.60555127546
# distance - (1, 0), (3, 2) - 2.82842712475
# distance - (1, 1), (3, 2) - 2.2360679775
# distance - (2, 2), (3, 2) - 1.0
# [(0, 0), (1, 0), (1, 1), (3, 1), (3, 0), (0, 1)]
# distance - (0, 0), (2, 2) - 2.82842712475
# distance - (1, 0), (2, 2) - 2.2360679775
# distance - (1, 1), (2, 2) - 1.41421356237
# distance - (3, 1), (2, 2) - 1.41421356237
# distance - (3, 0), (2, 2) - 2.2360679775
# distance - (0, 1), (2, 2) - 2.2360679775
# distance - (3, 0), (3, 2) - 2.0
# distance - (0, 1), (3, 2) - 3.16227766017
# neighbors
# [(3, 2), (2, 2)]




In [None]:
labels = np.copy(dbc.clusterID)
for i in range(len(cluster)):
    for index,d_point in enumerate(X):
        for p in cluster[i]:
            if np.all(d_point==p):
                labels[index] = i
        for p in noise:
            if np.all(d_point==p):
                labels[index] = -1


In [None]:
fig = plt.figure()
plt.scatter(X[:,0], X[:,1], 24, c=labels)
plt.show()
fig.savefig('./Vanessa/DBSCAN-0.15-3.jpg')

In [None]:
from sklearn.cluster import DBSCAN
db = DBSCAN(eps=0.15, min_samples=3).fit(X)

fig = plt.figure()
plt.scatter(X[:,0], X[:,1], 24, c=db.labels_)
plt.show()