<a href="https://colab.research.google.com/github/elinabhasin/DIY-Deep-Learning/blob/main/DBSCAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [138]:
#DBSCAN : Density Based Spatial CLustering of Applications with Noise

In [139]:
import pandas as pd
import math
import numpy as np
from sklearn.datasets import load_iris

In [140]:
columns = [item.strip('(cm) ') for item in load_iris().feature_names]

In [141]:
iris_dataset = load_iris()

In [142]:
data = pd.DataFrame(iris_dataset.data,columns=columns)

In [143]:
data

Unnamed: 0,sepal length,sepal width,petal length,petal width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [144]:
target = iris_dataset.target

In [145]:
print(target)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [146]:
#Brute Force

In [147]:
epsilon = 0.9
minPts = 30

In [148]:
def calc_dist_matrix(df):
  rows = df.shape[0]
  matrix = np.zeros((rows,rows))

  for row in range(rows):
    for column in range(row+1):
      matrix[row,column] = math.sqrt(((df.iloc[row]-df.iloc[column])**2).sum().item())
  return matrix

In [149]:
def assign_labels(dist_matrix):
  labels = []
  rows = dist_matrix.shape[0]
  for row in range(rows):
    points = set()
    for column in range(row+1):
      if dist_matrix[row,column]<=epsilon:
        points.add(column)
    else:
      labels.append({'Label':'Noise','Points':points})

  for column in range(rows):
    for row in range(column+1,rows):
      if dist_matrix[row,column]<=epsilon:
        labels[column]['Points'].add(row)

  return labels

In [150]:
test = pd.DataFrame([[3,7],[4,6],[5,5],[6,4],[7,3],[6,2],[7,2],[8,4],[3,3],[2,6],[3,5],[2,4]])

In [151]:
test

Unnamed: 0,0,1
0,3,7
1,4,6
2,5,5
3,6,4
4,7,3
5,6,2
6,7,2
7,8,4
8,3,3
9,2,6


In [152]:
def find_core(arr):

  for obj in arr:
    if len(obj['Points']) >= minPts:
      obj['Label'] = 'Core'
  return arr

In [153]:
def find_core_neighbors(labels):
  for point in labels:
    if point['Label'] == 'Core':
      print(point)

  core_sets = [point['Points'] for point in labels if point['Label']=='Core']
  temp = set()
  for s in core_sets:
    temp = temp.union(s)
  return temp

In [154]:
def find_border(labels):

  core_neighbors = find_core_neighbors(labels)

  for idx,noise in enumerate(labels):
    if noise['Label'] == 'Noise':
      if idx in core_neighbors:
        noise['Label'] = 'Border'
  return labels

In [155]:
dist_matrix = calc_dist_matrix(data)

In [156]:
labels = assign_labels(dist_matrix)
print(labels)

[{'Label': 'Noise', 'Points': {0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49}}, {'Label': 'Noise', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49}}, {'Label': 'Noise', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 42, 43, 45, 46, 47, 48, 49}}, {'Label': 'Noise', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 49}}, {'Label': 'Noise', 'Points': {0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49}}, {'Label': 'Noise', 'Points': {0,

In [157]:
labels = find_core(labels)
print(labels)

[{'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49}}, {'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49}}, {'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 42, 43, 45, 46, 47, 48, 49}}, {'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 49}}, {'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49}}, {'Label': 'Core', 'Points': {0, 4, 5,

In [158]:
core_pts = find_core_neighbors(labels)
print(core_pts)

{'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49}}
{'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49}}
{'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 42, 43, 45, 46, 47, 48, 49}}
{'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 49}}
{'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49}}
{'Label': 'Core', 'Points': {0, 4, 5, 7, 10

In [160]:
labels = find_border(labels)
labels

{'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49}}
{'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49}}
{'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 42, 43, 45, 46, 47, 48, 49}}
{'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 49}}
{'Label': 'Core', 'Points': {0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49}}
{'Label': 'Core', 'Points': {0, 4, 5, 7, 10

[{'Label': 'Core',
  'Points': {0,
   1,
   2,
   3,
   4,
   5,
   6,
   7,
   9,
   10,
   11,
   12,
   14,
   16,
   17,
   18,
   19,
   20,
   21,
   22,
   23,
   24,
   25,
   26,
   27,
   28,
   29,
   30,
   31,
   32,
   33,
   34,
   35,
   36,
   37,
   38,
   39,
   40,
   42,
   43,
   44,
   45,
   46,
   47,
   48,
   49}},
 {'Label': 'Core',
  'Points': {0,
   1,
   2,
   3,
   4,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   17,
   19,
   20,
   21,
   22,
   23,
   24,
   25,
   26,
   27,
   28,
   29,
   30,
   31,
   34,
   35,
   36,
   37,
   38,
   39,
   40,
   41,
   42,
   43,
   45,
   46,
   47,
   48,
   49}},
 {'Label': 'Core',
  'Points': {0,
   1,
   2,
   3,
   4,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   17,
   19,
   20,
   21,
   22,
   23,
   24,
   25,
   26,
   27,
   28,
   29,
   30,
   31,
   34,
   35,
   36,
   37,
   38,
   39,
   40,
   42,
   43,
   45,
   46,
   47,
   48,
   49}},
 {'Label': 'Core',
  'Points': 