### Python Implementation

In [1]:
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler

In [10]:
# Data Creation
centers = [[1,1],[-1,-1],[1,-1]]
x, labels_true = make_blobs(n_samples = 750, centers = centers, cluster_std = 0.4,random_state =0)# generate sample blobs

In [11]:
x = StandardScaler().fit_transform(x)
x[:5]

array([[ 0.49426097,  1.45106697],
       [-1.42808099, -0.83706377],
       [ 0.33855918,  1.03875871],
       [ 0.11900101, -1.05397553],
       [ 1.1224246 ,  1.77493654]])

In [30]:
# DBSCAN
db = DBSCAN(eps = 0.25,min_samples = 4).fit(x)

In [31]:
labels = db.labels_ # similar to the model.fit()method,it gives the lables of the clustered data
print(labels)

[ 0  1  0  1  0  1  1  1  0  0  1  1  1  1  1  0 -1  1  1  1  1  1  1  1
  1  1  1  0  0  1  0  1  1  0  1  0  1  0  0  1  1  1  1  1  1  1  0  1
  0  1  1  1  1  1  1  1  1  0  1  1  1  1  1  1  1  0  1  1  0  0  0  1
  0  0  1  1  0  1  0  1  1  1  0  0  0  0  1  1  1  1  1  0  1  0  1 -1
  1  1  0  0  1  1  1  0  1  1  1  1  0  0  0  1  1  1  1  0  0  1  0  1
  1  1  0  0  1  1  1  0  0  1  0  1  1  1  0 -1  1  1  0  1  0  1  0  0
  1  1  1  1  1  0  1  1  1  1  1  1  0  1  0  1  0  1  0  1  1 -1  1  1
  1  1  0  1  1  1  1  1  1  1  1  0  1  1  0  0  1  0  0  1  0  1  0  1
  1  1  1  0  0  1  1  1  1  1  1  1  1  0  1  0  1  1  0  1  1  1  0  0
  1  1  1  1  1  1  1  1  1  1  0  0  1  0  0  0  1  0  1  1  1  1  1  1
  0  1  1  1  1  1  1  1  0  0  1  1  1  0  1  0  1  0  1  1  1  1  1  1
  1  0  0  1  1  0  0  1  1 -1  1  1  1  1  1  1  0  1 -1  0  1  1  1  0
  1  1  0  0  1  0  1  0  1  1  0  0  0  1  1  1  1  1  0  1  1  0  0  1
  1  1  1  1  1  0  1  1  1  1  0  0  1  1  1  0  1

In [32]:
len(set(labels))

3

In [33]:
1 if -1 in labels else 0

1

In [34]:
# Number of clusters in labels,ignoring noise if present.
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)# the label -1 is considered as noise by DBSCAN algorithms
n_noise = list(labels).count(-1)

In [35]:
print('Estimated numbers of clusters: %d' % n_clusters)
print('Estimated number of points: %d' % n_noise)

Estimated numbers of clusters: 2
Estimated number of points: 11


# Finding counts in each cluster

In [36]:
np.unique(labels,return_counts = True)

(array([-1,  0,  1], dtype=int64), array([ 11, 247, 492], dtype=int64))

### selecting only noise or outliers for further action.

In [38]:
noises = x[labels == -1]
print(noises)

[[-1.4302079  -1.82380068]
 [ 1.22425496  0.50161091]
 [-1.53199232 -1.74146844]
 [-0.46307453 -1.26531795]
 [ 1.62198217  0.05020132]
 [-0.18357785 -0.12543358]
 [ 0.90491286 -1.62473398]
 [-0.43185167 -1.10093485]
 [-0.3164503  -1.51218921]
 [-2.27447427 -1.08752048]
 [-0.37447929 -1.13624183]]
