In [1]:
import numpy as np
from sklearn.cluster import DBSCAN
import pandas as pd

In [2]:
filePath = r"C:\Users\mhanowar\Box\Iowa State Research\Shared Materials_Abdullah_Hasan\Abdullah Data\ring_centers.npy"
data = np.load(filePath)

data1 = list()
for x in range(236):
    
    data1.append(data[:1000,x,0:3].tolist())

data = data1

In [3]:
# data = data[:200000]
# data = np.array(data)
# data.shape

In [4]:
# cluster = DBSCAN(eps=9, min_samples=4).fit(data[0])

# print("Number of Clusters = ", max(cluster.labels_)+1)

In [5]:
class ConvoyCandidate(object):
    """
    Attributes:
        indices(set): The object indices assigned to the convoy
        is_assigned (bool):
        start_time (int):  The start index of the convoy
        end_time (int):  The last index of the convoy
    """
    __slots__ = ('indices', 'is_assigned', 'start_time', 'end_time')

    def __init__(self, indices, is_assigned, start_time, end_time):
        self.indices = indices
        self.is_assigned = is_assigned
        self.start_time = start_time
        self.end_time = end_time

    def __repr__(self):
        return '<%r %r indices=%r, is_assigned=%r, start_time=%r, end_time=%r>' % (self.__class__.__name__, id(self), self.indices, self.is_assigned, self.start_time, self.end_time)

In [6]:
class CMC(object):
    """Coherence Moving Cluster (CMC) algorithm

    Attributes:
        k (int):  Min number of consecutive timestamps to be considered a convoy
        m (int):  Min number of elements to be considered a convoy
    """
    def __init__(self, clf, k, m):
        self.clf = clf
        self.k = k
        self.m = m

    def fit_predict(self, X, y=None, sample_weight=None):
        convoy_candidates = set()
        columns = len(X[0])
        column_iterator = range(columns)
        output_convoys = []

        for column in column_iterator:
            current_convoy_candidates = set()
            values = [row[column] if isinstance(row[column], (list, set)) else [row[column]] for row in X]
            if len(values) < self.m:
                continue
            clusters = self.clf.fit_predict(values, y=y, sample_weight=sample_weight)
            unique_clusters = set(clusters)
            clusters_indices = dict((cluster, ConvoyCandidate(indices=set(), is_assigned=False, start_time=None, end_time=None)) for cluster in unique_clusters)

            for index, cluster_assignment in enumerate(clusters):
                clusters_indices[cluster_assignment].indices.add(index)

            # update existing convoys
            for convoy_candidate in convoy_candidates:
                convoy_candidate_indices = convoy_candidate.indices
                convoy_candidate.is_assigned = False
                for cluster in unique_clusters:
                    cluster_indices = clusters_indices[cluster].indices
                    cluster_candidate_intersection = cluster_indices & convoy_candidate_indices
                    if len(cluster_candidate_intersection) < self.m:
                        continue
                    convoy_candidate.indices = cluster_candidate_intersection
                    current_convoy_candidates.add(convoy_candidate)
                    convoy_candidate.end_time = column
                    clusters_indices[cluster].is_assigned = convoy_candidate.is_assigned = True

                # check if candidates qualify as convoys
                candidate_life_time = (convoy_candidate.end_time - convoy_candidate.start_time) + 1
                if (not convoy_candidate.is_assigned or column == column_iterator[-1]) and candidate_life_time >= self.k:
                    output_convoys.append(convoy_candidate)

            # create new candidates
            for cluster in unique_clusters:
                cluster_data = clusters_indices[cluster]
                if cluster_data.is_assigned:
                    continue
                cluster_data.start_time = cluster_data.end_time = column
                current_convoy_candidates.add(cluster_data)
            convoy_candidates = current_convoy_candidates
        return output_convoys

In [7]:
# Clustering using DBSCAN
clustering_clf = DBSCAN(eps=8, min_samples=5)


In [8]:
# # # # Test data of 3D Coordinates
# # # # Elements (Molecules) are in row, timesteps are in column

# data = ([[3,1,3],[3,2,4],[2,4,5]],
#         [[4,2,2],[6,2,3],[4,4,5]],
#         [[1,1,3],[51,2,-1],[55,3,0]],
#         [[51,2,1],[52,2,4],[56,-3, 2]])

In [12]:
# Min elements for convoy = m
# Min consecutive timesteps = k

clf = CMC(clustering_clf, k=100, m=5)

# Convoy calculation using Test data
convoys = clf.fit_predict(data)
print(len(convoys))

25


In [11]:
# file = open("output.txt", "w")
for convoy in convoys:
    print('Detected Convoy')
#     for i in convoy.indices:
#         print('%i: %r - Start Time: %r, End Time: %r' % (i, data[i], convoy.start_time, convoy.end_time))
    print(convoy)
#     file.write('%r - S: %r, E: %r\n' % (convoy.indices, convoy.start_time, convoy.end_time))
#     file.write(str(convoy.start_time))
#     file.write(str(convoy.end_time))
# file.close()

Detected Convoy
<'ConvoyCandidate' 1960350765696 indices={161, 145, 152, 153, 157, 158}, is_assigned=False, start_time=0, end_time=10>
Detected Convoy
<'ConvoyCandidate' 1960350807616 indices={19, 21, 24, 28, 29}, is_assigned=False, start_time=7, end_time=39>
Detected Convoy
<'ConvoyCandidate' 1960350732544 indices={66, 71, 57, 61, 62}, is_assigned=False, start_time=29, end_time=45>
Detected Convoy
<'ConvoyCandidate' 1960350620800 indices={162, 166, 167, 170, 174}, is_assigned=False, start_time=0, end_time=50>
Detected Convoy
<'ConvoyCandidate' 1960350807424 indices={129, 132, 135, 136, 137}, is_assigned=False, start_time=10, end_time=50>
Detected Convoy
<'ConvoyCandidate' 1960350639616 indices={175, 178, 179, 180, 188}, is_assigned=False, start_time=27, end_time=52>
Detected Convoy
<'ConvoyCandidate' 1960350755008 indices={194, 181, 184, 185, 189}, is_assigned=False, start_time=21, end_time=57>
Detected Convoy
<'ConvoyCandidate' 1960350755264 indices={16, 19, 21, 24, 29}, is_assigned=