In [None]:
# Importing libraries to be used
from scipy.io import arff
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler

# Importing datset from local file
data = arff.loadarff('diabetes1.arff')
df = pd.DataFrame(data[0])

# Converting dataframe into 2-D array
X = df.iloc[:,0:8].values

# Standardizing data to get desired results correctly
temp = StandardScaler().fit(X)
X = temp.transform(X)

print(X.shape)
print(X)

In [None]:
# Function to calculate euclidian distance
def eucl_dist(p,q):
    return np.sqrt(np.sum(np.square(p-q)))

# Func. to get neighbours of a particular point with index value i in 2-D array 'X' with length 'n'
def neighbours_of_i(X, n, i, eps):
    neighbours = []
    for j in range(n):
        if eucl_dist(X[i], X[j]) < eps and i!=j:
            neighbours.append(j)
    return neighbours

# Func. to expand cluster with a point having index i, where cluster no. is 'c_n';
def expand_cluster(X, n, result, i, c_n, eps, min_pts):
#     print("Expanding i:",i)
    
    neighbours = neighbours_of_i(X, n, i, eps)     # getting neighbours of i
#     print("Neighbours of i",neighbours)
    n_neighbour = len(neighbours)
    
    if n_neighbour == 0:                           # i is a Noise
      result[i][0] = 'Noise'
      return False

    elif n_neighbour < min_pts:                  # i is a Border point
      result[i][0] = 'Border'                     
      return False
    
    else:                                          # i is a Core point
#         print("********CORE*********")
#         print("\nNeighbours of ",i,"\n",neighbours,"\nwith length ",n_neighbour)
                              
        result[i][0] = 'Core'
        result[i][1] = c_n                         # assigning i the cluster no.
        for nbr in neighbours:
            result[nbr][1] = c_n                         # assigning all neighbours of i the same cluster no.

        j=0    
        while n_neighbour > 0:                     # for all neighbours of i do the following
            j+=1 
            curr_nbr = neighbours[0]                     # get the current neighbour

            nbrs_curr_nbr = neighbours_of_i(X, n, curr_nbr, eps) # getting neighbours of current neighbour as a list
            n_nbrs_curr_nbr = len(nbrs_curr_nbr)
            
            if n_nbrs_curr_nbr >= min_pts:                     # if current neighbour is a core point

                result[curr_nbr][0] = 'Core'                     # assign Core
                for i in range(n_nbrs_curr_nbr):                 # for all neighbours of current neighbour do the following

                    nbr_curr_nbr = nbrs_curr_nbr[i]                        # get the neighbour
                    
                    if result[nbr_curr_nbr][0] in ['NA','Border']:         # if it is either Unmarked or Border point

                        if result[nbr_curr_nbr][0] == 'NA' and (nbr_curr_nbr not in neighbours):# if it is unmarked for sure then 
                            neighbours.append(nbr_curr_nbr)                           # add this neighbour of neighbour of i to i's neighbours list for mark processing
                        
                        result[nbr_curr_nbr][1] = c_n                         # assign them the same cluster no. expanded by i point
            
            else:
                result[curr_nbr][0] = 'Border'                   # else assign Border

            neighbours.pop(0)                         # remove the previously processed neighour of i
            n_neighbour = len(neighbours)             #update the length i.e no of neighbours
#             print("\n*******No. of neighbours of ",i," remaining to be processed:",n_neighbour,"*******")

        return True                                # returning true, confirms that i point was core point for cluster no. passed as argument so get next cluster no.


# Main driving function for DBSCAN algorithm
def DBSCAN(X, eps, min_pts):

    n_entries = len(X)                        # no. of entries in X: 2-D array of data
    c_n = 1                                   # cluster no. initialised from '1'
    result = np.array([['NA',0]] * n_entries) # LIST(for all entries) OF LIST(2 elements, classification and cluster no.)
                                              #'0' -> Noise, 'NA' -> not classified                                                                                  
    j=0
    for i in range(n_entries):                # iterating for all entries which are not classified to any of the type(Core,Border or Noise)
        entry = X[i]
#         print("\nChecking i:",i)
        if result[i][0] == 'NA':
            j+=1
            if expand_cluster(X, n_entries, result, i, c_n, eps, min_pts): # if expand_cluster returns true that implies :- 
                                                           #we expanded and found all cluster points for given cluster no. 'c_n'
                c_n += 1                                   # so increase the cluster_no.
#    print(j)
    return result

In [None]:
# calling the main driving function and getting desired list of list into variable named clusters
clusters = DBSCAN(X,2,5)
set(clusters[:,1])

In [None]:
# Generating list of list of individual clusters with cluster no. and type of point

cluster_n = set(clusters[:,1])
clusters_arr = [None]*len(cluster_n)
for value in cluster_n:
    cluster_i= []
    for index in range(len(clusters)):
        mark = clusters[index][0]
        if clusters[index][1] == value:
            cluster_i.append([index,mark])
    clusters_arr[int(value)] = cluster_i

print("Indexes of cluster points with type of point\n")
for value in cluster_n:
    if value != '0':
        print("Cluster",value,": ",clusters_arr[int(value)],"\n")