<a href="https://colab.research.google.com/github/VishnuRathore98/Machine_Learning/blob/master/Speed_Estimation_and_Vehicle_tracking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi

In [None]:
!pip install -q supervision inference ultralytics

In [None]:
# Importing libraries

# For computations
import numpy as np

# For computer vision tasks
import cv2
from google.colab.patches import cv2_imshow

# For getting the model
import supervision as sv
from supervision.assets import VideoAssets, download_assets
# For displaying video
from IPython.display import Video

# For getting the model for detection
from inference.models.utils import get_roboflow_model

In [None]:
# Downloading the video
download_assets(VideoAssets.VEHICLES)

In [None]:
# Declaring variables
SOURCE_VIDEO = "vehicles.mp4"
TARGET_VIDEO = "vehicles-result.mp4"
SOURCE = np.array([[1252, 787], [2298, 803], [5039, 2159], [-550, 2159]])
TARGET_WIDTH = 25
TARGET_HEIGHT = 250
TARGET = np.array([
    [0, 0],
    [TARGET_WIDTH-1, 0],
    [TARGET_WIDTH-1, TARGET_HEIGHT-1],
    [0, TARGET_HEIGHT-1]
])

In [None]:
# Playing the video
# display(Video(SOURCE_VIDEO, embed=True))

In [None]:
# For resolving perspective distortion and get the coordinates for objects as per their position in the frame
class ViewTransformer:
  def __init__(self, source, target):
    source = source.astype(np.float32)
    target = target.astype(np.float32)
    self.matrix = cv2.getPerspectiveTransform(source, target)

  def transformed_points(self, points):
    reshaped_points = points.reshape(-1, 1, 2).astype(np.float32)
    transformed_points = cv2.perspectiveTransform(reshaped_points, self.matrix)
    return transformed_points.reshape(-1, 2)

In [None]:
from collections import deque
from typing import DefaultDict
# Getting information about the video
video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO)


# Get the model
model = get_roboflow_model("yolov8x-640")

# Using byte track to track individual object to get its id using video frames
byte_track = sv.ByteTrack(frame_rate=video_info.fps)

# Getting bounding box line and text thickness
thickness = sv.calculate_optimal_line_thickness(resolution_wh=video_info.resolution_wh)
text_scale = sv.calculate_optimal_text_scale(resolution_wh=video_info.resolution_wh)

# Bounding boxes
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=thickness)

# Labelling the bounding box
label_annotator = sv.LabelAnnotator(text_scale=text_scale, text_thickness=thickness)

# Plotting the polygon box to limit the detection boundary, and calculating speed
polygon_zone = sv.PolygonZone(SOURCE, frame_resolution_wh=video_info.resolution_wh)

# Calling the ViewTransformer
view_transformer = ViewTransformer(source=SOURCE, target=TARGET)

# Getting video frames
frame_generator = sv.get_video_frames_generator(SOURCE_VIDEO)


In [None]:
# Annotating a single frame
frame = iter(frame_generator)
frame = next(frame)

result = model.infer(frame)[0]
detections = sv.Detections.from_inference(result)

# Detecting only inside polygon zone
detections = detections[polygon_zone.trigger(detections)]

# Labelling objects with id's
detections = byte_track.update_with_detections(detections=detections)

#
points = detections.get_anchors_coordinates(anchor=sv.Position.BOTTOM_CENTER)
points = view_transformer.transformed_points(points).astype(int)

# Labels list
labels = [
    f"#x:{x}, y:{y}"
    for [x, y]
    in points
]

# Annotating the frame

annotated_frame = frame.copy()

# Drawing the polygon onto the frame
annotated_frame = sv.draw_polygon(scene=annotated_frame, polygon=SOURCE, color=sv.Color.RED)

annotated_frame = bounding_box_annotator.annotate(scene=annotated_frame, detections=detections)
annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)

sv.plot_image(annotated_frame)

## Looping over frames to annotate vehicles frame by frame
# for frame in frame_generator:
#   result = model.infer(frame)[0]
#   detections = sv.Detections.from_inference(result)

#   annotated_frame = frame.copy()
#   annotated_frame = bounding_box_annotator.annotate(scene=annotated_frame, detections=detections)
#   annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections)

#   # cv2_imshow(annotated_frame)
#   if cv2.waitKey(1) & 0xFF == ord("q"):
#     break

# # Closing the window
# cv2.destroyAllWindows()