In [61]:
import numpy as np 
import pandas as pd
from scipy.io import loadmat
from sklearn.cluster import DBSCAN
from sklearn.metrics import zero_one_loss
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui
%gui qt5

import time

import matplotlib.pyplot as plt

import os

### Plotting Setup

In [7]:
#set up plottig GUI
app = QtGui.QApplication([])
pg.setConfigOption('background','w')   

In [9]:
win = pg.GraphicsWindow(title="Occupancy Detection GUI")
plot1 = win.addPlot()
plot1.setXRange(-6,6)
plot1.setYRange(0,6)
plot1.setLabel('left',text = 'Y position (m)')
plot1.setLabel('bottom', text= 'X position (m)')
s1 = plot1.plot([],[],pen=None,symbol='o')

### Helper Functions

In [2]:
def validateChecksum(recieveHeader):
    h = recieveHeader.view(dtype=np.uint16)
    a = np.array([sum(h)], dtype=np.uint32)
    b = np.array([sum(a.view(dtype=np.uint16))], dtype=np.uint16)
    CS = np.uint16(~(b))
    return CS

def readHeader(recieveHeader):
    headerContent = dict()
    index = 0
    
    headerContent['magicBytes'] = recieveHeader[index:index+8]
    index += 20
    
    headerContent['packetLength'] = recieveHeader[index:index+4].view(dtype=np.uint32)
    index += 4
        
    headerContent['frameNumber'] = recieveHeader[index:index+4].view(dtype=np.uint32)
    index += 24
    
    headerContent['numTLVs'] = recieveHeader[index:index+2].view(dtype=np.uint16)
    
    return headerContent

def tlvParsing(data, dataLength, tlvHeaderLengthInBytes, pointLengthInBytes, targetLengthInBytes):
    
    targetDict = dict()
    pointCloud = None
    index = 0
    #tlv header parsing
    tlvType = data[index:index+4].view(dtype=np.uint32)
    tlvLength = data[index+4:index+8].view(dtype=np.uint32)
    #TLV size check
    if (tlvLength + index > dataLength):
        print('TLV SIZE IS WRONG')
        lostSync = True
        return
    
    index += tlvHeaderLengthInBytes
    pointCloudDataLength = tlvLength - tlvHeaderLengthInBytes
    if tlvType == 6: #point cloud TLV
        numberOfPoints = pointCloudDataLength/pointLengthInBytes
        if numberOfPoints > 0:
            p = data[index:index+pointCloudDataLength[0]].view(dtype=np.single)
            #form the appropriate array 
            #each point is 16 bytes - 4 bytes for each property - range, azimuth, doppler, snr
            pointCloud = np.reshape(p,(4, int(numberOfPoints)),order="F")
    
    return pointCloud

def liveParsing(tlvStream):
    
    tlvHeaderLengthInBytes = 8
    pointLengthInBytes = 16
    frameNumber = 0
    
    tlvStream = np.frombuffer(tlvStream, dtype = 'uint8')
    #tlv header
    index = 0
    #tlv header parsing
    tlvType = tlvStream[index:index+4].view(dtype=np.uint32)
    tlvLength = tlvStream[index+4:index+8].view(dtype=np.uint32)

    index += tlvHeaderLengthInBytes
    tlvDataLength = tlvLength - tlvHeaderLengthInBytes

    if tlvType == 6: 
        numberOfPoints = tlvDataLength/pointLengthInBytes
        p = tlvStream[index:index+tlvDataLength[0]].view(np.single)
        pointCloud = np.reshape(p,(4, int(numberOfPoints)),order="F")

        if not(pointCloud is None):
            #constrain point cloud to within the effective sensor range
            #range 1 < x < 6
            #azimuth -50 deg to 50 deg
            #check whether corresponding range and azimuth data are within the constraints

            effectivePointCloud = np.array([])
            for index in range(0, len(pointCloud[0,:])):
                if (pointCloud[0,index] > 1 and pointCloud[0,index] < 6) \
                and (pointCloud[1, index] > -50*np.pi/180 \
                    and pointCloud[1, index] < 50*np.pi/180):

                    #concatenate columns to the new point cloud
                    if len(effectivePointCloud) == 0:
                        effectivePointCloud = np.reshape(pointCloud[:, index], (4,1), order="F")
                    else:
                        point = np.reshape(pointCloud[:, index], (4,1),order="F")
                        effectivePointCloud = np.hstack((effectivePointCloud, point))

            if len(effectivePointCloud) != 0:
                posX = np.multiply(effectivePointCloud[0,:], np.sin(effectivePointCloud[1,:]))
                posY = np.multiply(effectivePointCloud[0,:], np.cos(effectivePointCloud[1,:]))
                return posX, posY
            
    return None, None

In [3]:
def iterativeDfs(vertexID, edgeMatrix, startNode):
    
    visited = np.array([], dtype=np.int)
    dfsStack = np.array([startNode])

    while np.logical_not(np.equal(dfsStack.size,0)):
        vertex, dfsStack = dfsStack[-1], dfsStack[:-1] #equivalent to stack pop function
        if vertex not in visited:
            #find unvisited nodes
            unvisitedNodes = vertexID[np.logical_not(np.isnan(edgeMatrix[int(vertex), :]))]
            visited = np.append(visited, vertex)
            #add unvisited nodes to the stack
            dfsStack = np.append(dfsStack, unvisitedNodes[np.logical_not(np.isin(unvisitedNodes,visited))])
    
    return visited

In [4]:
def CombinationClustering(posX, posY):
    weightThreshold = 0.5 #minimum distance between points
    minClusterSize = 20
    
    vertexID = np.arange(len(posX))
    vertexList = np.arange(len(posX))
    
    xMean = np.array([])
    yMean = np.array([])

    if len(posX) >= minClusterSize:
        edgeMatrix = np.zeros((len(posX), len(posY)))

        #create distance matrix
        #x1 - x0
        xDifference = np.subtract(np.repeat(posX, repeats=len(posX)).reshape(len(posX), len(posX)), 
                                  np.transpose(np.repeat(posX, repeats=len(posX)).reshape(len(posX), len(posX))))
        #y1 - y0
        yDifference = np.subtract(np.repeat(posY, repeats=len(posY)).reshape(len(posY), len(posY)), 
                                  np.transpose(np.repeat(posY, repeats=len(posY)).reshape(len(posY), len(posY))))
        #euclidean distance calculation
        edgeMatrix = np.sqrt(np.add(np.square(xDifference), np.square(yDifference)))

        #weight based reduction of graph/remove edges by replacing edge weight by np.NaN
        weightMask = np.logical_or(np.greater(edgeMatrix,weightThreshold), np.equal(edgeMatrix, 0))
        edgeMatrix[weightMask] = np.NaN

        #perform iterative dfs
        pointsX = np.array([])
        pointsY = np.array([])

        centroidNumber = 0
        while vertexID.size > 0:
            startNode = vertexID[0]
            visited = iterativeDfs(vertexList, edgeMatrix, startNode)
            #remove visited nodes (ie only slice off all unvisited nodes)
            vertexID = vertexID[np.logical_not(np.isin(vertexID, visited))]
            #visited is a component, extract cluster from it if possible
            if visited.size >= minClusterSize:
                pointsX = np.append(pointsX, posX[visited])
                pointsY = np.append(pointsY, posY[visited]) 

        if pointsX.size > 0:
            clusterer = DBSCAN(eps=0.7, min_samples=25)
            clusterer.fit(pd.DataFrame(np.transpose(np.array([pointsX,pointsY]))).values)

            if clusterer.core_sample_indices_.size > 0:
                #array that contains the x,y positions and the cluster association number
                clusters = np.array([pointsX[clusterer.core_sample_indices_],
                          pointsY[clusterer.core_sample_indices_], 
                         clusterer.labels_[clusterer.core_sample_indices_]])
                for centroidNumber in np.unique(clusters[2,:]):
                    xMean = np.append(xMean, np.mean(clusters[0,:][np.isin(clusters[2,:], centroidNumber)]))
                    yMean = np.append(yMean, np.mean(clusters[1,:][np.isin(clusters[2,:], centroidNumber)]))
    return yMean, xMean

In [5]:
def DBSCANOnlyClustering(pointsX, pointsY, epsValue, minSamplesValue):
    
    #initialize constraints/variables
    minClusterSize = 1
    xMean = np.array([])
    yMean = np.array([])
    
    if len(pointsX) >= minClusterSize:

        clusterer = DBSCAN(eps=epsValue, min_samples=minSamplesValue)
        
        clusterer.fit(pd.DataFrame(np.transpose(np.array([pointsX,pointsY]))).values)

        if clusterer.core_sample_indices_.size > 0:
            #array that contains the x,y positions and the cluster association number
            clusters = np.array([pointsX[clusterer.core_sample_indices_],
                      pointsY[clusterer.core_sample_indices_], 
                     clusterer.labels_[clusterer.core_sample_indices_]])
            for centroidNumber in np.unique(clusters[2,:]):
                xMean = np.append(xMean, np.mean(clusters[0,:][np.isin(clusters[2,:], centroidNumber)]))
                yMean = np.append(yMean, np.mean(clusters[1,:][np.isin(clusters[2,:], centroidNumber)]))
                


    return yMean, xMean

### Main Code

In [86]:
#import data (mat file)
parsingMatFile = 'C:\\Users\\hasna\\Documents\\GitHub\\OccupancyDetection\\Data\\Experiment Data 2\\3PeopleWalking.mat'
tlvData = (loadmat(parsingMatFile))['tlvStream'][0]

#for one person walking location based
trueLabels = np.repeat(3, repeats=len(tlvData))

#parameter sweep
eps = np.arange(start=0.1, stop=0.8, step=0.05)
minSamples = np.arange(start=2, stop=30, step=1)

parameterInformation = np.array([])

for epsValue in eps:
    for minSampleValue in minSamples:
        predictedLabels = np.array([])
        for tlvStream in tlvData:
            #parse
            posX, posY = liveParsing(tlvStream)
            try:
                if (posX == None).all():
                    predictedLabels = np.append(predictedLabels, np.array([0]))
            except AttributeError:
                predictedLabels = np.append(predictedLabels, np.array([0]))
                continue
            #cluster
            yMean, xMean = DBSCANOnlyClustering(posX, posY, epsValue, minSampleValue)
            predictedInstance = np.array([len(yMean)])
            predictedLabels = np.append(predictedLabels, predictedInstance)
        #calculate rmse
        misclassifications = zero_one_loss(trueLabels, predictedLabels, normalize=False)
        if len(parameterInformation) == 0:
            parameterInformation = np.array([epsValue, minSampleValue, misclassifications])
        else:
            parameterInformation = np.vstack((parameterInformation,np.array([epsValue, minSampleValue, misclassifications])))


In [87]:
#save parameter information as csv
DBSCANDf = pd.DataFrame(parameterInformation, columns=['Eps','MinSamples', 'Misclassifications'])
DBSCANDf.to_csv('ClusteringParameterInfo_3PeopleWalking.csv')

In [69]:
#find best parameter set - Dhari
DBSCANDf[DBSCANDf['Misclassifications']==np.min(DBSCANDf['Misclassifications'])]

Unnamed: 0,Eps,MinSamples,Misclassifications
336,0.7,2.0,104.0
364,0.75,2.0,104.0


In [65]:
#find best parameter set - Alan
DBSCANDf[DBSCANDf['Misclassifications']==np.min(DBSCANDf['Misclassifications'])]

Unnamed: 0,Eps,MinSamples,Misclassifications
364,0.75,2.0,68.0


In [84]:
#find best parameter set - 2 People Walking
print(DBSCANDf[DBSCANDf['Misclassifications']==np.min(DBSCANDf['Misclassifications'])])
DBSCANDf.loc[[364]]

     Eps  MinSamples  Misclassifications
168  0.4         2.0               247.0


Unnamed: 0,Eps,MinSamples,Misclassifications
364,0.75,2.0,257.0


In [91]:
#find best parameter set - 3PeopleWalking
print(DBSCANDf[DBSCANDf['Misclassifications']==np.min(DBSCANDf['Misclassifications'])])
print()
print(DBSCANDf.loc[[364]])

     Eps  MinSamples  Misclassifications
224  0.5         2.0                93.0

      Eps  MinSamples  Misclassifications
364  0.75         2.0               132.0
