In [1]:
import json, requests, io, zipfile
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, precision_recall_curve, roc_curve

username = 'HDaftary'
token = '<<secret>>'
session = requests.Session()
session.auth = (username, token)

# providing raw url to download data zip from github
zip_url = 'https://raw.githubusercontent.com/HDaftary97/ML/main/KMeans/data.zip'
download = session.get(zip_url)
z = zipfile.ZipFile(io.BytesIO(download.content))
z.extractall()

tracklets.json is a file that contains vehicle tracklets, obtained by running a tracking algorithm
to track vehicles in a video file. This file contains multiple tracklets, each tracklet is supposed to correspond
to one vehicle in the video. Each tracklet is associated with a numerical ID, the vehicle class (among: car,
bike, bus, truck, others), and a list of detections. Each detection is a 5-tuple, where the first entry is the
frame number of the video, and the last four entries are the x; y coordinates of the detected bounding box.
Specifically, the first 2 coordinates correspond to the x; y coordinates of the top left corner of the detected
bounding box and the next 2 coordinates correspond to the x; y coordinates of the bottom right corner of the
detected bounding box.

In [2]:
import random
import collections
import copy
import argparse
import json
from tqdm import tqdm
import os
import numpy as np
from sklearn.cluster import KMeans
import cv2

class TrackletClustering(object):
    tracklet_database = []
    centroids = []

    def __init__(self, num_cluster):
        self.num_cluster = num_cluster

    def convert_tracklet_feature(self, tracklet):
        """
        using the vector connecting the centers of the first and last detected bounding boxes in the tracklet
        """
        category = tracklet['class']
        vehicle_type = None
        if category == 'car':
            vehicle_type = 0
        elif category == 'bike':
            vehicle_type = 1
        if category == 'bus':
            vehicle_type = 2
        if category == 'truck':
            vehicle_type = 3
        if category == 'others':
            vehicle_type = 4

        first_point = tracklet['tracks'][0]
        last_point = tracklet['tracks'][-1]
        x1 = (first_point[0] + first_point[2]) / 2
        y1 = (first_point[1] + first_point[3]) / 2

        x2 = (last_point[0] + last_point[2]) / 2
        y2 = (last_point[1] + last_point[3]) / 2

        feature_list = np.array([vehicle_type, tracklet['length'], x1, y1, x2, y2])
        feature_list = np.array([x1, y1, x2, y2])
        return feature_list

    # Used to convert index list to python dict with key as centroid of cluster k
    # and value as array of indices of features in that centroid
    def convert_index_list_dict(self, ind_list, k):
        classifications = {}
        for i in range(k):
            classifications[i] = []

        # append index value based on key of classifications dict
        for index, ind_value in enumerate(ind_list):
            classifications[ind_value].append(index)

        return classifications

    def add_tracklet(self, tracklet):
        "Add a new tracklet into the database"
        feature_list = self.convert_tracklet_feature(tracklet)
        self.tracklet_database.append(feature_list)

    def build_clustering_model(self):
        "Perform clustering algorithm"
        x_train = np.array(self.tracklet_database)
        prev_ind_list = []
        self.centroids = [x_train[initial] for initial in range(self.num_cluster)]
        for epoch in range(100):
            ind_list = []
            for feature in x_train:  # Runs X.shape[0] == n times
                distances = [np.linalg.norm(feature - centroid) for centroid in self.centroids]
                least_distance = min(distances)  # Data-point is assigned to centroid having least distance
                classification = distances.index(least_distance)
                ind_list.append(classification)

            classifications = self.convert_index_list_dict(ind_list, self.num_cluster)  # re-initialize classifications for each iteration
            print("Epoch ", epoch)
            if collections.Counter(prev_ind_list) == collections.Counter(ind_list):
                print("The assignment list has not changed, thus converged!")
                break
            prev_ind_list = copy.deepcopy(ind_list)

            # prev_centroids = copy.deepcopy(centroids)
            # assign new centroid, change mu, Recenter step
            for classification in classifications:
                new_mean = np.average(x_train[classifications[classification]], axis=0)
                self.centroids[classification] = new_mean
        self.centroids = np.array(self.centroids)
        self.kmeans_sklearn = KMeans(n_clusters=self.num_cluster)
        self.kmeans_sklearn.fit(x_train)

    def get_cluster_id(self, tracklet):
        """
        Assign the cluster ID for a tracklet. This function must return a non-negative integer <= num_cluster
        """
        centroids = self.centroids  # Not giving very good clustering video at end
        centroids = self.kmeans_sklearn.cluster_centers_
        feature_vector = self.convert_tracklet_feature(tracklet)
        distances = [np.linalg.norm(feature_vector - centroid) ** 2 for centroid in centroids]
        least_distance = min(distances)  # Data-point is assigned to centroid having least distance
        classification = distances.index(least_distance)
        return classification + 1


In [3]:
cluster_list = [2,4,3]
tracklet_file_list = ['/content/data/cam_04_debug.json','/content/data/cam_10_debug.json','/content/data/cam_16_debug.json']

for i in range(3):
  clust_obj = TrackletClustering(cluster_list[i])

  with open(tracklet_file_list[i]) as f:
      vehicle_data = json.load(f)
  # Gather the data
  print("First pass over data to gather all tracklets")
  for v_id in tqdm(vehicle_data):
      v_tracklet = vehicle_data[v_id]
      clust_obj.add_tracklet(v_tracklet)

  # build clustering model
  clust_obj.build_clustering_model()

  # perform assignment
  print("Second pass over data to assign tracklets to clusters")
  for v_id in tqdm(vehicle_data):
      v_tracklet = vehicle_data[v_id]
      v_tracklet["direction_id"] = clust_obj.get_cluster_id(v_tracklet) # obtain cluster id and update the data file

  dir_path = os.path.dirname(tracklet_file_list[i])
  out_tracket_file = "{}/out_{}".format(dir_path, os.path.basename(tracklet_file_list[i]))

  # save the results to output data file
  with open(out_tracket_file, "w") as f:
      json.dump(vehicle_data, f, indent=4)

100%|██████████| 422/422 [00:00<00:00, 74629.86it/s]
100%|██████████| 422/422 [00:00<00:00, 17944.55it/s]

First pass over data to gather all tracklets
Epoch  0
Epoch  1
Epoch  2
Epoch  3
Epoch  4
Epoch  5
Epoch  6
The assignment list has not changed, thus converged!
Second pass over data to assign tracklets to clusters



100%|██████████| 1775/1775 [00:00<00:00, 136031.90it/s]

First pass over data to gather all tracklets
Epoch  0
Epoch  1





Epoch  2
Epoch  3
Epoch  4
Epoch  5
Epoch  6
Epoch  7
Epoch  8
Epoch  9
Epoch  10
Epoch  11
Epoch  12
Epoch  13
Epoch  14
Epoch  15
Epoch  16
Epoch  17
Epoch  18
Epoch  19
Epoch  20
Epoch  21
Epoch  22
Epoch  23
Epoch  24
Epoch  25
Epoch  26
Epoch  27
Epoch  28
Epoch  29
Epoch  30
Epoch  31
Epoch  32
The assignment list has not changed, thus converged!


100%|██████████| 1775/1775 [00:00<00:00, 22376.31it/s]

Second pass over data to assign tracklets to clusters



100%|██████████| 974/974 [00:00<00:00, 117995.84it/s]

First pass over data to gather all tracklets
Epoch  0
Epoch  1





Epoch  2
Epoch  3
Epoch  4
Epoch  5
Epoch  6
Epoch  7
Epoch  8
Epoch  9
Epoch  10
Epoch  11
Epoch  12
Epoch  13
Epoch  14
Epoch  15
Epoch  16
Epoch  17
Epoch  18
Epoch  19
Epoch  20
Epoch  21
Epoch  22
Epoch  23
Epoch  24
Epoch  25
Epoch  26
Epoch  27
Epoch  28
Epoch  29
Epoch  30
The assignment list has not changed, thus converged!


100%|██████████| 974/974 [00:00<00:00, 13604.08it/s]

Second pass over data to assign tracklets to clusters





Below function is used to visualize the results in video form

In [4]:
def disp_vehicles(vehicle_file, input_video, output_video):
    with open(vehicle_file) as f:
        vehicle_data = json.load(f)

    disp_data = {}
    max_cluster_id = 0

    print("Processing tracklets")
    for v_id in tqdm(vehicle_data):
        v_tracklet = vehicle_data[v_id]
        v_class = v_tracklet["class"]
        if "direction_id" in v_tracklet:
            v_cluster_id = v_tracklet["direction_id"]
            assert v_cluster_id >=0, "direction_id cannot be negative"
            max_cluster_id = max(max_cluster_id, v_cluster_id)
        else:
            v_cluster_id = 0

        for v_det in v_tracklet["tracks"]:
            frm_id = v_det[0]
            bbox = v_det[1:]

            disp_info = [v_id, v_class, v_cluster_id, bbox]

            if frm_id in disp_data:
                disp_data[frm_id].append(disp_info)
            else:
                disp_data[frm_id] = [disp_info]

    cam = cv2.VideoCapture(input_video)
    fps = cam.get(cv2.CAP_PROP_FPS)
    width = cam.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
    height = cam.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out_video = cv2.VideoWriter(output_video, fourcc, int(fps), (int(width), int(height)))
    num_frames = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    # create colormap for clusters
    num_cluster = max_cluster_id + 1
    colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255), (0, 255, 255), (128, 128, 128)]
    if num_cluster > 7: # add random colors
        for i in range(7, num_cluster):
            color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
            colors.append(color)

    print("Rendering video frames")
    frm_id = 0
    for _ in tqdm(range(num_frames)):
        _, im = cam.read()
        if im is None:
            continue
        frm_id += 1

        if frm_id in disp_data:
            vehicles2disp = disp_data[frm_id]

            for vehicle in vehicles2disp:
                bbox = vehicle[3]
                bbox2 = [int(x) for x in bbox]
                cluster_id = vehicle[2]
                color = colors[cluster_id]
                start_point = (bbox2[0], bbox2[1])
                end_point = (bbox2[2], bbox2[3])
                thickness = 2
                im = cv2.rectangle(im, start_point, end_point, color, thickness)
                if cluster_id == 0:
                    im = cv2.putText(im, "Outlier: {} {}".format(vehicle[1], vehicle[0]), (bbox2[0], bbox2[1]),
                                     cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
                else:
                    im = cv2.putText(im, "{} {}".format(vehicle[1], vehicle[0]), (bbox2[0], bbox2[1]),
                                     cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)

        out_video.write(im)

    out_video.release()


In [5]:
output_tracklet_file_list = ['/content/data/out_cam_04_debug.json','/content/data/out_cam_10_debug.json','/content/data/out_cam_16_debug.json']
input_video_list = ['/content/data/cam_04_debug.mp4','/content/data/cam_10_debug.mp4','/content/data/cam_16_debug.mp4']
output_video_list = ['/content/data/out_cam_04_debug.mp4','/content/data/out_cam_10_debug.mp4','/content/data/out_cam_16_debug.mp4']

In [6]:
for i in range(3):
  disp_vehicles(output_tracklet_file_list[i], input_video_list[i], output_video_list[i])

100%|██████████| 422/422 [00:00<00:00, 23522.17it/s]
  0%|          | 0/1208 [00:00<?, ?it/s]

Processing tracklets
Rendering video frames


100%|██████████| 1208/1208 [00:14<00:00, 81.82it/s]
 25%|██▍       | 443/1775 [00:00<00:00, 4072.76it/s]

Processing tracklets


100%|██████████| 1775/1775 [00:00<00:00, 6945.07it/s]
  1%|          | 7/1200 [00:00<00:20, 58.40it/s]

Rendering video frames


100%|██████████| 1200/1200 [00:19<00:00, 61.83it/s]
100%|██████████| 974/974 [00:00<00:00, 44785.10it/s]
  1%|          | 8/1200 [00:00<00:15, 77.43it/s]

Processing tracklets
Rendering video frames


100%|██████████| 1200/1200 [00:15<00:00, 79.01it/s]
