In [None]:
import cv2
import numpy as np
from tqdm import tqdm


#  Load the Original Video

In [None]:
# Load the original video
video = cv2.VideoCapture('input_video.avi')

frame_width = int(video.get(3))
frame_height = int(video.get(4))
fps = video.get(cv2.CAP_PROP_FPS)
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

print(f"Video loaded: {frame_width}x{frame_height}, {fps} FPS, {total_frames} frames")


# Object Detection and Tracking


In [None]:
# Load YOLOv5 model
net = cv2.dnn.readNet("yolov5.weights", "yolov5.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]


out_writer_object_detection = cv2.VideoWriter('output_object_detection.avi', 
                                               cv2.VideoWriter_fourcc(*'XVID'), fps, 
                                               (frame_width, frame_height))


with tqdm(total=total_frames, desc="Object Detection and Tracking", unit="frame") as pbar:
    while True:
        ret, frame = video.read()
        if not ret:
            break

        
        blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
        net.setInput(blob)
        detections = net.forward(output_layers)

        boxes, confidences, class_ids = [], [], []
        for out in detections:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.5:
                    center_x = int(detection[0] * frame_width)
                    center_y = int(detection[1] * frame_height)
                    w = int(detection[2] * frame_width)
                    h = int(detection[3] * frame_height)
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
        for i in range(len(boxes)):
            if i in indexes:
                x, y, w, h = boxes[i]
                label = str(classes[class_ids[i]])
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

       
        out_writer_object_detection.write(frame)
        pbar.update(1)

video.release()
out_writer_object_detection.release()
print("Object detection and tracking video saved as 'output_object_detection.avi'")


# Background Subtraction

In [None]:

video = cv2.VideoCapture('input_video.avi')


backgroundObject = cv2.createBackgroundSubtractorMOG2(detectShadows=True)


out_writer_bg_subtracted = cv2.VideoWriter('output_bg_subtracted.avi', 
                                           cv2.VideoWriter_fourcc(*'XVID'), fps, 
                                           (frame_width, frame_height))


with tqdm(total=total_frames, desc="Background Subtraction", unit="frame") as pbar:
    while True:
        ret, frame = video.read()
        if not ret:
            break


        fgmask = backgroundObject.apply(frame)
        _, fgmask = cv2.threshold(fgmask, 250, 255, cv2.THRESH_BINARY)
        fgmask = cv2.erode(fgmask, None, iterations=1)
        fgmask = cv2.dilate(fgmask, None, iterations=2)


        foregroundPart = cv2.bitwise_and(frame, frame, mask=fgmask)


        out_writer_bg_subtracted.write(foregroundPart)
        pbar.update(1)

video.release()
out_writer_bg_subtracted.release()
print("Background-subtracted video saved as 'output_bg_subtracted.avi'")


# Combined Video: Background Subtraction with Object Detection and Tracking

In [None]:

video = cv2.VideoCapture('output_bg_subtracted.avi')

out_writer_combined = cv2.VideoWriter('output_combined.avi', 
                                      cv2.VideoWriter_fourcc(*'XVID'), fps, 
                                      (frame_width, frame_height))


prev_bat_pos = None
bat_speeds = []
pixels_to_meters = 	0.0002645833  # Average size of a pixel
conversion_factor = pixels_to_meters * 3600 / 1000
speed_threshold = 10  

def calculate_speed(prev_pos, curr_pos, fps):
    distance_px = np.linalg.norm(np.array(curr_pos) - np.array(prev_pos))
    speed_px_s = distance_px * fps
    speed_kmh = speed_px_s * conversion_factor
    return speed_kmh

with tqdm(total=total_frames, desc="Combining Background Subtraction with Object Detection", unit="frame") as pbar:
    while True:
        ret, frame = video.read()
        if not ret:
            break


        blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
        net.setInput(blob)
        detections = net.forward(output_layers)

        boxes, confidences, class_ids = [], [], []
        curr_bat_pos = None

        for out in detections:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.5:
                    center_x = int(detection[0] * frame_width)
                    center_y = int(detection[1] * frame_height)
                    w = int(detection[2] * frame_width)
                    h = int(detection[3] * frame_height)
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
        for i in range(len(boxes)):
            if i in indexes:
                x, y, w, h = boxes[i]
                label = str(classes[class_ids[i]])
                if label == 'baseball bat':  # Replace with relevant label
                    curr_bat_pos = (x + w // 2, y + h // 2)
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        if curr_bat_pos and prev_bat_pos:
            curr_speed_kmh = calculate_speed(prev_bat_pos, curr_bat_pos, fps)
            if curr_speed_kmh > speed_threshold:
                bat_speeds.append(curr_speed_kmh)
                cv2.putText(frame, f'Speed: {curr_speed_kmh:.2f} km/h', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        prev_bat_pos = curr_bat_pos


        out_writer_combined.write(frame)
        pbar.update(1)

video.release()
out_writer_combined.release()
print("Combined video saved as 'output_combined.avi'")
