In [None]:
!git clone https://github.com/ultralytics/yolov5.git


Cloning into 'yolov5'...
remote: Enumerating objects: 17067, done.[K
remote: Counting objects: 100% (45/45), done.[K
remote: Compressing objects: 100% (33/33), done.[K
remote: Total 17067 (delta 24), reused 28 (delta 12), pack-reused 17022 (from 1)[K
Receiving objects: 100% (17067/17067), 15.68 MiB | 22.12 MiB/s, done.
Resolving deltas: 100% (11714/11714), done.


In [None]:
%cd /content/yolov5



/content/yolov5


In [None]:
%pip install -r requirements.txt



Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting ultralytics>=8.2.34 (from -r requirements.txt (line 18))
  Downloading ultralytics-8.3.40-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics>=8.2.34->-r requirements.txt (line 18))
  Downloading ultralytics_thop-2.0.12-py3-none-any.whl.metadata (9.4 kB)
Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Downloading ultralytics-8.3.40-py3-none-any.whl (898 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m898.5/898.5 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.12-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.3.40 ultralytics-thop-2.0.12


In [None]:
import cv2
import os
import torch
from yolov5.models.experimental import attempt_load
from yolov5.utils.torch_utils import select_device
from yolov5.utils.general import non_max_suppression
from yolov5.utils.augmentations import letterbox
from yolov5.utils.plots import Annotator

import numpy as np

# Function to extract frames from a video
def extract_frames(video_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
    print(f"Extracting {frame_count} frames at {frame_rate} FPS.")

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_folder, f"frame_{frame_idx:04d}.jpg")
        cv2.imwrite(frame_path, frame)
        frame_idx += 1

    cap.release()
    print(f"Frames saved to {output_folder}")
    return frame_rate

# Function to preprocess image for YOLOv5
def preprocess_image(image, img_size=640):
    img_resized = letterbox(image, new_shape=img_size)[0]
    img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1)  # Convert BGR to RGB and reshape
    img_resized = np.ascontiguousarray(img_resized)
    img_tensor = torch.from_numpy(img_resized).float()
    img_tensor /= 255.0  # Normalize
    if img_tensor.ndimension() == 3:
        img_tensor = img_tensor.unsqueeze(0)
    return img_tensor

# Function to perform YOLOv5 inference
def predict_yolo(model, img_tensor, original_shape):
    model.eval()
    with torch.no_grad():
        pred = model(img_tensor)[0]
    pred = non_max_suppression(pred, 0.25, 0.45, classes=None, agnostic=False)

    results = []
    for det in pred:  # Loop through predictions for the frame
        if det is not None and len(det):
            det[:, :4] = det[:, :4].clamp(0)  # Ensure bounding boxes are non-negative
            for *xyxy, conf, cls in det:  # Unpack bounding box, confidence, and class
                xyxy = [int(x.item()) for x in xyxy]  # Convert tensor to list
                results.append((cls, conf, xyxy))  # Append (class, confidence, bbox)
    return results

# Function to draw bounding boxes on an image
def draw_boxes(image, results, model):
    annotator = Annotator(image, line_width=2, font_size=0.5)
    for cls, conf, (x1, y1, x2, y2) in results:
        label = f"{model.names[int(cls)]} {conf:.2f}"
        annotator.box_label([x1, y1, x2, y2], label, color=(0, 255, 0))
    return annotator.result()

# Function to recombine frames into a video
def create_video_from_frames(frames_folder, output_video_path, frame_rate):
    frame_files = sorted([os.path.join(frames_folder, f) for f in os.listdir(frames_folder) if f.endswith('.jpg')])
    if not frame_files:
        raise ValueError("No frames found in the directory.")

    first_frame = cv2.imread(frame_files[0])
    height, width, _ = first_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for output video
    out = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (width, height))

    for frame_file in frame_files:
        frame = cv2.imread(frame_file)
        out.write(frame)

    out.release()
    print(f"Video saved to {output_video_path}")

# Main function
def main(video_path, model_path, output_folder, output_video_path):
    # Load YOLOv5 model
    device = select_device('cpu')
    model = attempt_load(model_path)  # Load the model without specifying map_location
    model.to(device)  # Move the model to the desired device

    # Extract frames
    frame_rate = extract_frames(video_path, output_folder)

    # Process each frame
    for frame_file in sorted(os.listdir(output_folder)):
        frame_path = os.path.join(output_folder, frame_file)
        image = cv2.imread(frame_path)
        original_shape = image.shape[:2]

        # Preprocess image
        img_tensor = preprocess_image(image)

        # YOLOv5 prediction
        results = predict_yolo(model, img_tensor, original_shape)

        # Draw bounding boxes
        processed_image = draw_boxes(image, results, model)

        # Save processed frame
        cv2.imwrite(frame_path, processed_image)

    # Recombine frames into a video
    create_video_from_frames(output_folder, output_video_path, frame_rate)

# Run the pipeline
video_path = '/content/drive/MyDrive/b.mp4'  # Path to input video
model_path = '/content/drive/MyDrive/fine_tuned_yolo_model_50.pt'  # Path to YOLOv5 weights
output_folder = '/content/frames'  # Folder to save extracted and processed frames
output_video_path = '/content/drive/MyDrive/ot.mp4'  # Path to save output video

main(video_path, model_path, output_folder, output_video_path)


YOLOv5 🚀 v7.0-388-g882c35fc Python-3.10.12 torch-2.5.1+cu121 CPU

Fusing layers... 
Model summary: 157 layers, 7066762 parameters, 0 gradients, 15.9 GFLOPs


Extracting 917 frames at 29 FPS.
Frames saved to /content/frames
Video saved to /content/drive/MyDrive/ot.mp4


In [None]:
import cv2
import os
import torch
from yolov5.models.experimental import attempt_load
from yolov5.utils.torch_utils import select_device
from yolov5.utils.general import non_max_suppression
from yolov5.utils.augmentations import letterbox
from yolov5.utils.plots import Annotator
import numpy as np

# Function to extract frames from a video
def extract_frames(video_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
    print(f"Extracting {frame_count} frames at {frame_rate} FPS.")

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_folder, f"frame_{frame_idx:04d}.jpg")
        cv2.imwrite(frame_path, frame)
        frame_idx += 1

    cap.release()
    print(f"Frames saved to {output_folder}")
    return frame_rate

# Function to preprocess image for YOLOv5
def preprocess_image(image, img_size=640):
    img_resized = letterbox(image, new_shape=img_size)[0]
    img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1)  # Convert BGR to RGB and reshape
    img_resized = np.ascontiguousarray(img_resized)
    img_tensor = torch.from_numpy(img_resized).float()
    img_tensor /= 255.0  # Normalize
    if img_tensor.ndimension() == 3:
        img_tensor = img_tensor.unsqueeze(0)
    return img_tensor

# Function to perform YOLOv5 inference
def predict_yolo(model, img_tensor, original_shape):
    model.eval()
    with torch.no_grad():
        pred = model(img_tensor)[0]
    pred = non_max_suppression(pred, 0.25, 0.45, classes=None, agnostic=False)

    results = []
    for det in pred:  # Loop through predictions for the frame
        if det is not None and len(det):
            det[:, :4] = det[:, :4].clamp(0)  # Ensure bounding boxes are non-negative
            for *xyxy, conf, cls in det:  # Unpack bounding box, confidence, and class
                xyxy = [int(x.item()) for x in xyxy]  # Convert tensor to list
                results.append((cls, conf, xyxy))  # Append (class, confidence, bbox)
    return results

# Function to draw bounding boxes on an image
def draw_boxes(image, results, model):
    annotator = Annotator(image, line_width=2, font_size=0.5)
    for cls, conf, (x1, y1, x2, y2) in results:
        label = f"{model.names[int(cls)]} {conf:.2f}"
        annotator.box_label([x1, y1, x2, y2], label, color=(0, 255, 0))
    return annotator.result()

# Function to recombine frames into a video
def create_video_from_frames(frames_folder, output_video_path, frame_rate):
    frame_files = sorted([os.path.join(frames_folder, f) for f in os.listdir(frames_folder) if f.endswith('.jpg')])
    if not frame_files:
        raise ValueError("No frames found in the directory.")

    first_frame = cv2.imread(frame_files[0])
    height, width, _ = first_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for output video
    out = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (width, height))

    for frame_file in frame_files:
        frame = cv2.imread(frame_file)
        out.write(frame)

    out.release()
    print(f"Video saved to {output_video_path}")

# Function to track and avoid double-counting vehicles across frames
def track_vehicles(frame_results, tracked_vehicles, threshold=0.5):
    new_vehicles = []
    for cls, conf, bbox in frame_results:
        matched = False
        for i, (prev_cls, prev_conf, prev_bbox) in enumerate(tracked_vehicles):
            # Calculate Intersection over Union (IoU) between bounding boxes
            iou = calculate_iou(bbox, prev_bbox)
            if iou > threshold and cls == prev_cls:
                matched = True
                tracked_vehicles[i] = (cls, conf, bbox)  # Update tracked vehicle
                break
        if not matched:
            tracked_vehicles.append((cls, conf, bbox))  # Add new vehicle if no match
            new_vehicles.append((cls, conf, bbox))  # Track this vehicle

    return new_vehicles, tracked_vehicles

# Function to calculate Intersection over Union (IoU)
def calculate_iou(bbox1, bbox2):
    x1, y1, x2, y2 = bbox1
    x1_2, y1_2, x2_2, y2_2 = bbox2
    # Calculate intersection
    inter_x1 = max(x1, x1_2)
    inter_y1 = max(y1, y1_2)
    inter_x2 = min(x2, x2_2)
    inter_y2 = min(y2, y2_2)
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)

    # Calculate union
    area1 = (x2 - x1) * (y2 - y1)
    area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
    union_area = area1 + area2 - inter_area

    # Return IoU
    return inter_area / union_area if union_area > 0 else 0

# Main function
def main(video_path, model_path, output_folder, output_video_path):
    # Load YOLOv5 model
    device = select_device('cpu')
    model = attempt_load(model_path)  # Load the model without specifying map_location
    model.to(device)  # Move the model to the desired device

    # Extract frames
    frame_rate = extract_frames(video_path, output_folder)

    # Vehicle tracking variables
    tracked_vehicles = []
    vehicle_count_per_class = {0: 0}  # Assuming vehicle class index is 0, expand this for multiple classes

    # Process each frame
    for frame_file in sorted(os.listdir(output_folder)):
        frame_path = os.path.join(output_folder, frame_file)
        image = cv2.imread(frame_path)
        original_shape = image.shape[:2]

        # Preprocess image
        img_tensor = preprocess_image(image)

        # YOLOv5 prediction
        results = predict_yolo(model, img_tensor, original_shape)

        # Track vehicles across frames
        new_vehicles, tracked_vehicles = track_vehicles(results, tracked_vehicles)

        # Update vehicle count for each class
        for cls, conf, _ in new_vehicles:
            vehicle_count_per_class[cls] = vehicle_count_per_class.get(cls, 0) + 1

        # Draw bounding boxes
        processed_image = draw_boxes(image, results, model)

        # Save processed frame
        cv2.imwrite(frame_path, processed_image)

    # Recombine frames into a video
    create_video_from_frames(output_folder, output_video_path, frame_rate)

    # Generate theoretical report
    print("Detection Report:")
    for cls, count in vehicle_count_per_class.items():
        print(f"Class {cls}: {count} vehicles detected")

# Run the pipeline
video_path = '/content/drive/MyDrive/b.mp4'  # Path to input video
model_path = '/content/drive/MyDrive/fine_tuned_yolo_model_50.pt'  # Path to YOLOv5 weights
output_folder = '/content/frames'  # Folder to save extracted and processed frames
output_video_path = '/content/drive/MyDrive/ot2.mp4'  # Path to save output video

main(video_path, model_path, output_folder, output_video_path)


YOLOv5 🚀 v7.0-388-g882c35fc Python-3.10.12 torch-2.5.1+cu121 CPU

Fusing layers... 
Model summary: 157 layers, 7066762 parameters, 0 gradients, 15.9 GFLOPs


Extracting 917 frames at 29 FPS.
Frames saved to /content/frames
Video saved to /content/drive/MyDrive/ot2.mp4
Detection Report:
Class 0: 0 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 11.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 10.0: 1 vehicles detec

In [None]:
import cv2
import os
import torch
from yolov5.models.experimental import attempt_load
from yolov5.utils.torch_utils import select_device
from yolov5.utils.general import non_max_suppression
from yolov5.utils.augmentations import letterbox
from yolov5.utils.plots import Annotator
import numpy as np

# Function to extract frames from a video
def extract_frames(video_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
    print(f"Extracting {frame_count} frames at {frame_rate} FPS.")

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_folder, f"frame_{frame_idx:04d}.jpg")
        cv2.imwrite(frame_path, frame)
        frame_idx += 1

    cap.release()
    print(f"Frames saved to {output_folder}")
    return frame_rate

# Function to preprocess image for YOLOv5
def preprocess_image(image, img_size=640):
    img_resized = letterbox(image, new_shape=img_size)[0]
    img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1)  # Convert BGR to RGB and reshape
    img_resized = np.ascontiguousarray(img_resized)
    img_tensor = torch.from_numpy(img_resized).float()
    img_tensor /= 255.0  # Normalize
    if img_tensor.ndimension() == 3:
        img_tensor = img_tensor.unsqueeze(0)
    return img_tensor

# Function to perform YOLOv5 inference
def predict_yolo(model, img_tensor, original_shape):
    model.eval()
    with torch.no_grad():
        pred = model(img_tensor)[0]
    pred = non_max_suppression(pred, 0.25, 0.45, classes=None, agnostic=False)

    results = []
    for det in pred:  # Loop through predictions for the frame
        if det is not None and len(det):
            det[:, :4] = det[:, :4].clamp(0)  # Ensure bounding boxes are non-negative
            for *xyxy, conf, cls in det:  # Unpack bounding box, confidence, and class
                xyxy = [int(x.item()) for x in xyxy]  # Convert tensor to list
                results.append((cls, conf, xyxy))  # Append (class, confidence, bbox)
    return results

# Function to draw bounding boxes on an image
def draw_boxes(image, results, model):
    annotator = Annotator(image, line_width=2, font_size=0.5)
    for cls, conf, (x1, y1, x2, y2) in results:
        label = f"{model.names[int(cls)]} {conf:.2f}"
        annotator.box_label([x1, y1, x2, y2], label, color=(0, 255, 0))
    return annotator.result()

# Function to recombine frames into a video
def create_video_from_frames(frames_folder, output_video_path, frame_rate):
    frame_files = sorted([os.path.join(frames_folder, f) for f in os.listdir(frames_folder) if f.endswith('.jpg')])
    if not frame_files:
        raise ValueError("No frames found in the directory.")

    first_frame = cv2.imread(frame_files[0])
    height, width, _ = first_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for output video
    out = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (width, height))

    for frame_file in frame_files:
        frame = cv2.imread(frame_file)
        out.write(frame)

    out.release()
    print(f"Video saved to {output_video_path}")

# Function to track and avoid double-counting vehicles across frames
def track_vehicles(frame_results, tracked_vehicles, threshold=0.5):
    new_vehicles = []
    for cls, conf, bbox in frame_results:
        matched = False
        for i, (prev_cls, prev_conf, prev_bbox) in enumerate(tracked_vehicles):
            # Calculate Intersection over Union (IoU) between bounding boxes
            iou = calculate_iou(bbox, prev_bbox)
            if iou > threshold and cls == prev_cls:
                matched = True
                tracked_vehicles[i] = (cls, conf, bbox)  # Update tracked vehicle
                break
        if not matched:
            tracked_vehicles.append((cls, conf, bbox))  # Add new vehicle if no match
            new_vehicles.append((cls, conf, bbox))  # Track this vehicle

    return new_vehicles, tracked_vehicles

# Function to calculate Intersection over Union (IoU)
def calculate_iou(bbox1, bbox2):
    x1, y1, x2, y2 = bbox1
    x1_2, y1_2, x2_2, y2_2 = bbox2
    # Calculate intersection
    inter_x1 = max(x1, x1_2)
    inter_y1 = max(y1, y1_2)
    inter_x2 = min(x2, x2_2)
    inter_y2 = min(y2, y2_2)
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)

    # Calculate union
    area1 = (x2 - x1) * (y2 - y1)
    area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
    union_area = area1 + area2 - inter_area

    # Return IoU
    return inter_area / union_area if union_area > 0 else 0

# Main function
def main(video_path, model_path, output_folder, output_video_path):
    # Load YOLOv5 model
    device = select_device('cpu')
    model = attempt_load(model_path)  # Load the model without specifying map_location
    model.to(device)  # Move the model to the desired device

    # Extract frames
    frame_rate = extract_frames(video_path, output_folder)

    # Vehicle tracking variables
    tracked_vehicles = []
    vehicle_count_per_class = {0: 0}  # Assuming vehicle class index is 0, expand this for multiple classes

    # Process each frame
    for frame_file in sorted(os.listdir(output_folder)):
        frame_path = os.path.join(output_folder, frame_file)
        image = cv2.imread(frame_path)
        original_shape = image.shape[:2]

        # Preprocess image
        img_tensor = preprocess_image(image)

        # YOLOv5 prediction
        results = predict_yolo(model, img_tensor, original_shape)

        # Track vehicles across frames
        new_vehicles, tracked_vehicles = track_vehicles(results, tracked_vehicles)

        # Update vehicle count for each class
        for cls, conf, _ in new_vehicles:
            vehicle_count_per_class[cls] = vehicle_count_per_class.get(cls, 0) + 1

        # Draw bounding boxes
        processed_image = draw_boxes(image, results, model)

        # Save processed frame
        cv2.imwrite(frame_path, processed_image)

    # Recombine frames into a video
    create_video_from_frames(output_folder, output_video_path, frame_rate)

    # Generate theoretical report
    print("Detection Report:")
    for cls, count in vehicle_count_per_class.items():
        print(f"Class {cls}: {count} vehicles detected")

# Run the pipeline
video_path = '/content/drive/MyDrive/b.mp4'  # Path to input video
model_path = '/content/drive/MyDrive/fine_tuned_yolo_model_50.pt'  # Path to YOLOv5 weights
output_folder = '/content/frames'  # Folder to save extracted and processed frames
output_video_path = '/content/drive/MyDrive/ot3.mp4'  # Path to save output video

main(video_path, model_path, output_folder, output_video_path)


YOLOv5 🚀 v7.0-388-g882c35fc Python-3.10.12 torch-2.5.1+cu121 CPU

Fusing layers... 
Model summary: 157 layers, 7066762 parameters, 0 gradients, 15.9 GFLOPs


Extracting 917 frames at 29 FPS.
Frames saved to /content/frames
Video saved to /content/drive/MyDrive/ot3.mp4
Detection Report:
Class 0: 0 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 11.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 10.0: 1 vehicles detec

In [None]:
import cv2
import os
import torch
from yolov5.models.experimental import attempt_load
from yolov5.utils.torch_utils import select_device
from yolov5.utils.general import non_max_suppression
from yolov5.utils.augmentations import letterbox
from yolov5.utils.plots import Annotator
import numpy as np

# Function to extract frames from a video
def extract_frames(video_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
    print(f"Extracting {frame_count} frames at {frame_rate} FPS.")

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_folder, f"frame_{frame_idx:04d}.jpg")
        cv2.imwrite(frame_path, frame)
        frame_idx += 1

    cap.release()
    print(f"Frames saved to {output_folder}")
    return frame_rate

# Function to preprocess image for YOLOv5
def preprocess_image(image, img_size=640):
    img_resized = letterbox(image, new_shape=img_size)[0]
    img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1)  # Convert BGR to RGB and reshape
    img_resized = np.ascontiguousarray(img_resized)
    img_tensor = torch.from_numpy(img_resized).float()
    img_tensor /= 255.0  # Normalize
    if img_tensor.ndimension() == 3:
        img_tensor = img_tensor.unsqueeze(0)
    return img_tensor

# Function to perform YOLOv5 inference
def predict_yolo(model, img_tensor, original_shape):
    model.eval()
    with torch.no_grad():
        pred = model(img_tensor)[0]
    pred = non_max_suppression(pred, 0.25, 0.45, classes=None, agnostic=False)

    results = []
    for det in pred:  # Loop through predictions for the frame
        if det is not None and len(det):
            det[:, :4] = det[:, :4].clamp(0)  # Ensure bounding boxes are non-negative
            for *xyxy, conf, cls in det:  # Unpack bounding box, confidence, and class
                xyxy = [int(x.item()) for x in xyxy]  # Convert tensor to list
                results.append((cls, conf, xyxy))  # Append (class, confidence, bbox)
    return results

# Function to draw bounding boxes on an image
# Function to draw bounding boxes on an image
def draw_boxes(image, results, model):
    annotator = Annotator(image, line_width=3)  # Thicker lines
    for cls, conf, (x1, y1, x2, y2) in results:
        label = f"{model.names[int(cls)]} {conf:.2f}"

        # Draw the bounding box
        color = (0, 255, 0)  # Green color for bounding box
        thickness = 4  # Thicker bounding box
        cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)

        # Draw the label with thicker text
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.8  # Adjust text size
        font_thickness = 2  # Thicker text
        cv2.putText(image, label, (x1, y1 - 10), font, font_scale, color, font_thickness, lineType=cv2.LINE_AA)

    return image


# Function to recombine frames into a video
def create_video_from_frames(frames_folder, output_video_path, frame_rate):
    frame_files = sorted([os.path.join(frames_folder, f) for f in os.listdir(frames_folder) if f.endswith('.jpg')])
    if not frame_files:
        raise ValueError("No frames found in the directory.")

    first_frame = cv2.imread(frame_files[0])
    height, width, _ = first_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for output video
    out = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (width, height))

    for frame_file in frame_files:
        frame = cv2.imread(frame_file)
        out.write(frame)

    out.release()
    print(f"Video saved to {output_video_path}")

# Function to track and avoid double-counting vehicles across frames
def track_vehicles(frame_results, tracked_vehicles, threshold=0.5):
    new_vehicles = []
    for cls, conf, bbox in frame_results:
        matched = False
        for i, (prev_cls, prev_conf, prev_bbox) in enumerate(tracked_vehicles):
            # Calculate Intersection over Union (IoU) between bounding boxes
            iou = calculate_iou(bbox, prev_bbox)
            if iou > threshold and cls == prev_cls:
                matched = True
                tracked_vehicles[i] = (cls, conf, bbox)  # Update tracked vehicle
                break
        if not matched:
            tracked_vehicles.append((cls, conf, bbox))  # Add new vehicle if no match
            new_vehicles.append((cls, conf, bbox))  # Track this vehicle

    return new_vehicles, tracked_vehicles

# Function to calculate Intersection over Union (IoU)
def calculate_iou(bbox1, bbox2):
    x1, y1, x2, y2 = bbox1
    x1_2, y1_2, x2_2, y2_2 = bbox2
    # Calculate intersection
    inter_x1 = max(x1, x1_2)
    inter_y1 = max(y1, y1_2)
    inter_x2 = min(x2, x2_2)
    inter_y2 = min(y2, y2_2)
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)

    # Calculate union
    area1 = (x2 - x1) * (y2 - y1)
    area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
    union_area = area1 + area2 - inter_area

    # Return IoU
    return inter_area / union_area if union_area > 0 else 0

# Main function
def main(video_path, model_path, output_folder, output_video_path):
    # Load YOLOv5 model
    device = select_device('cpu')
    model = attempt_load(model_path)  # Load the model without specifying map_location
    model.to(device)  # Move the model to the desired device

    # Extract frames
    frame_rate = extract_frames(video_path, output_folder)

    # Vehicle tracking variables
    tracked_vehicles = []
    vehicle_count_per_class = {0: 0}  # Assuming vehicle class index is 0, expand this for multiple classes

    # Process each frame
    for frame_file in sorted(os.listdir(output_folder)):
        frame_path = os.path.join(output_folder, frame_file)
        image = cv2.imread(frame_path)
        original_shape = image.shape[:2]

        # Preprocess image
        img_tensor = preprocess_image(image)

        # YOLOv5 prediction
        results = predict_yolo(model, img_tensor, original_shape)

        # Track vehicles across frames
        new_vehicles, tracked_vehicles = track_vehicles(results, tracked_vehicles)

        # Update vehicle count for each class
        for cls, conf, _ in new_vehicles:
            vehicle_count_per_class[cls] = vehicle_count_per_class.get(cls, 0) + 1

        # Draw bounding boxes
        processed_image = draw_boxes(image, results, model)

        # Save processed frame
        cv2.imwrite(frame_path, processed_image)

    # Recombine frames into a video
    create_video_from_frames(output_folder, output_video_path, frame_rate)

    # Generate theoretical report
    print("Detection Report:")
    for cls, count in vehicle_count_per_class.items():
        print(f"Class {cls}: {count} vehicles detected")

# Run the pipeline
video_path = '/content/drive/MyDrive/b.mp4'  # Path to input video
model_path = '/content/drive/MyDrive/fine_tuned_yolo_model_50.pt'  # Path to YOLOv5 weights
output_folder = '/content/frames'  # Folder to save extracted and processed frames
output_video_path = '/content/drive/MyDrive/ot5.mp4'  # Path to save output video

main(video_path, model_path, output_folder, output_video_path)


YOLOv5 🚀 v7.0-388-g882c35fc Python-3.10.12 torch-2.5.1+cu121 CPU

Fusing layers... 
Model summary: 157 layers, 7066762 parameters, 0 gradients, 15.9 GFLOPs


Extracting 917 frames at 29 FPS.
Frames saved to /content/frames
Video saved to /content/drive/MyDrive/ot5.mp4
Detection Report:
Class 0: 0 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 11.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 10.0: 1 vehicles detec

In [None]:
import cv2
import os
import torch
from yolov5.models.experimental import attempt_load
from yolov5.utils.torch_utils import select_device
from yolov5.utils.general import non_max_suppression
from yolov5.utils.augmentations import letterbox
from yolov5.utils.plots import Annotator
import numpy as np

# Function to extract frames from a video
def extract_frames(video_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
    print(f"Extracting {frame_count} frames at {frame_rate} FPS.")

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_folder, f"frame_{frame_idx:04d}.jpg")
        cv2.imwrite(frame_path, frame)
        frame_idx += 1

    cap.release()
    print(f"Frames saved to {output_folder}")
    return frame_rate

# Function to preprocess image for YOLOv5
def preprocess_image(image, img_size=640):
    img_resized = letterbox(image, new_shape=img_size)[0]
    img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1)  # Convert BGR to RGB and reshape
    img_resized = np.ascontiguousarray(img_resized)
    img_tensor = torch.from_numpy(img_resized).float()
    img_tensor /= 255.0  # Normalize
    if img_tensor.ndimension() == 3:
        img_tensor = img_tensor.unsqueeze(0)
    return img_tensor

# Function to perform YOLOv5 inference
def predict_yolo(model, img_tensor, original_shape):
    model.eval()
    with torch.no_grad():
        pred = model(img_tensor)[0]
    pred = non_max_suppression(pred, 0.25, 0.45, classes=None, agnostic=False)

    results = []
    for det in pred:  # Loop through predictions for the frame
        if det is not None and len(det):
            det[:, :4] = det[:, :4].clamp(0)  # Ensure bounding boxes are non-negative
            for *xyxy, conf, cls in det:  # Unpack bounding box, confidence, and class
                xyxy = [int(x.item()) for x in xyxy]  # Convert tensor to list
                results.append((cls, conf, xyxy))  # Append (class, confidence, bbox)
    return results

# Function to draw bounding boxes on an image
# Function to draw bounding boxes on an image
# Function to change vehicle color based on class
def draw_boxes(image, results, model):
    # Define a color palette for different classes (you can customize these colors)
    class_colors = {
        0: (0, 255, 0),     # Green for class 0 (e.g., Car)
        1: (255, 0, 0),     # Blue for class 1 (e.g., Truck)
        2: (0, 0, 255),     # Red for class 2 (e.g., Bus)
        3: (255, 255, 0),   # Cyan for class 3 (e.g., Motorcycle)
        4: (0, 255, 255),   # Yellow for class 4 (e.g., Bicycle)
        5: (255, 0, 255),   # Magenta for class 5 (e.g., Pedestrian)
        6: (255, 165, 0),   # Orange for class 6
        7: (255, 192, 203), # Pink for class 7
        8: (128, 0, 128),   # Purple for class 8
        9: (0, 128, 0),     # Dark Green for class 9
        10: (128, 128, 0),  # Olive for class 10
        11: (0, 0, 128),    # Navy for class 11
        12: (128, 128, 128),# Grey for class 12
        13: (0, 255, 127),  # Spring Green for class 13
        14: (255, 105, 180) # Hot Pink for class 14
    }

    for cls, conf, (x1, y1, x2, y2) in results:
        # Get the color for the class (default to white if class is not defined in the dictionary)
        color = class_colors.get(int(cls), (255, 255, 255))

        # Apply the color to the detected vehicle region
        image[y1:y2, x1:x2] = cv2.addWeighted(image[y1:y2, x1:x2], 0.5, np.full_like(image[y1:y2, x1:x2], color), 0.5, 0)

        # Optionally, add label with confidence (if needed)
        label = f"{model.names[int(cls)]} {conf:.2f}"
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.8  # Adjust text size
        font_thickness = 2  # Thicker text
        cv2.putText(image, label, (x1, y1 - 10), font, font_scale, color, font_thickness, lineType=cv2.LINE_AA)

    return image



# Function to recombine frames into a video
def create_video_from_frames(frames_folder, output_video_path, frame_rate):
    frame_files = sorted([os.path.join(frames_folder, f) for f in os.listdir(frames_folder) if f.endswith('.jpg')])
    if not frame_files:
        raise ValueError("No frames found in the directory.")

    first_frame = cv2.imread(frame_files[0])
    height, width, _ = first_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for output video
    out = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (width, height))

    for frame_file in frame_files:
        frame = cv2.imread(frame_file)
        out.write(frame)

    out.release()
    print(f"Video saved to {output_video_path}")

# Function to track and avoid double-counting vehicles across frames
def track_vehicles(frame_results, tracked_vehicles, threshold=0.5):
    new_vehicles = []
    for cls, conf, bbox in frame_results:
        matched = False
        for i, (prev_cls, prev_conf, prev_bbox) in enumerate(tracked_vehicles):
            # Calculate Intersection over Union (IoU) between bounding boxes
            iou = calculate_iou(bbox, prev_bbox)
            if iou > threshold and cls == prev_cls:
                matched = True
                tracked_vehicles[i] = (cls, conf, bbox)  # Update tracked vehicle
                break
        if not matched:
            tracked_vehicles.append((cls, conf, bbox))  # Add new vehicle if no match
            new_vehicles.append((cls, conf, bbox))  # Track this vehicle

    return new_vehicles, tracked_vehicles

# Function to calculate Intersection over Union (IoU)
def calculate_iou(bbox1, bbox2):
    x1, y1, x2, y2 = bbox1
    x1_2, y1_2, x2_2, y2_2 = bbox2
    # Calculate intersection
    inter_x1 = max(x1, x1_2)
    inter_y1 = max(y1, y1_2)
    inter_x2 = min(x2, x2_2)
    inter_y2 = min(y2, y2_2)
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)

    # Calculate union
    area1 = (x2 - x1) * (y2 - y1)
    area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
    union_area = area1 + area2 - inter_area

    # Return IoU
    return inter_area / union_area if union_area > 0 else 0

# Main function
def main(video_path, model_path, output_folder, output_video_path):
    # Load YOLOv5 model
    device = select_device('cpu')
    model = attempt_load(model_path)  # Load the model without specifying map_location
    model.to(device)  # Move the model to the desired device

    # Extract frames
    frame_rate = extract_frames(video_path, output_folder)

    # Vehicle tracking variables
    tracked_vehicles = []
    vehicle_count_per_class = {0: 0}  # Assuming vehicle class index is 0, expand this for multiple classes

    # Process each frame
    for frame_file in sorted(os.listdir(output_folder)):
        frame_path = os.path.join(output_folder, frame_file)
        image = cv2.imread(frame_path)
        original_shape = image.shape[:2]

        # Preprocess image
        img_tensor = preprocess_image(image)

        # YOLOv5 prediction
        results = predict_yolo(model, img_tensor, original_shape)

        # Track vehicles across frames
        new_vehicles, tracked_vehicles = track_vehicles(results, tracked_vehicles)

        # Update vehicle count for each class
        for cls, conf, _ in new_vehicles:
            vehicle_count_per_class[cls] = vehicle_count_per_class.get(cls, 0) + 1

        # Draw bounding boxes
        processed_image = draw_boxes(image, results, model)

        # Save processed frame
        cv2.imwrite(frame_path, processed_image)

    # Recombine frames into a video
    create_video_from_frames(output_folder, output_video_path, frame_rate)

    # Generate theoretical report
    print("Detection Report:")
    for cls, count in vehicle_count_per_class.items():
        print(f"Class {cls}: {count} vehicles detected")

# Run the pipeline
video_path = '/content/drive/MyDrive/b.mp4'  # Path to input video
model_path = '/content/drive/MyDrive/fine_tuned_yolo_model_50.pt'  # Path to YOLOv5 weights
output_folder = '/content/frames'  # Folder to save extracted and processed frames
output_video_path = '/content/drive/MyDrive/ot6.mp4'  # Path to save output video

main(video_path, model_path, output_folder, output_video_path)


YOLOv5 🚀 v7.0-388-g882c35fc Python-3.10.12 torch-2.5.1+cu121 CPU

Fusing layers... 
Model summary: 157 layers, 7066762 parameters, 0 gradients, 15.9 GFLOPs


Extracting 917 frames at 29 FPS.
Frames saved to /content/frames
Video saved to /content/drive/MyDrive/ot6.mp4
Detection Report:
Class 0: 0 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 11.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 10.0: 1 vehicles detec

In [None]:
#perfect
import cv2
import os
import torch
from yolov5.models.experimental import attempt_load
from yolov5.utils.torch_utils import select_device
from yolov5.utils.general import non_max_suppression
from yolov5.utils.augmentations import letterbox
from yolov5.utils.plots import Annotator
import numpy as np

# Function to extract frames from a video
def extract_frames(video_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
    print(f"Extracting {frame_count} frames at {frame_rate} FPS.")

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_folder, f"frame_{frame_idx:04d}.jpg")
        cv2.imwrite(frame_path, frame)
        frame_idx += 1

    cap.release()
    print(f"Frames saved to {output_folder}")
    return frame_rate

# Function to preprocess image for YOLOv5
# Updated preprocess_image function
def preprocess_image(image, img_size=640):
    # Resize image while maintaining aspect ratio
    img_resized, ratio, padding = letterbox(image, new_shape=img_size, auto=True)
    img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1)  # Convert BGR to RGB and reshape
    img_resized = np.ascontiguousarray(img_resized)
    img_tensor = torch.from_numpy(img_resized).float()
    img_tensor /= 255.0  # Normalize to [0, 1]
    if img_tensor.ndimension() == 3:
        img_tensor = img_tensor.unsqueeze(0)
    return img_tensor, ratio, padding


# Updated predict_yolo function
def predict_yolo(model, img_tensor, original_shape, ratio, padding):
    model.eval()
    with torch.no_grad():
        pred = model(img_tensor)[0]
    pred = non_max_suppression(pred, 0.25, 0.45, classes=None, agnostic=False)

    results = []
    for det in pred:
        if det is not None and len(det):
            # Scale boxes back to original image size
            det[:, :4] = scale_coords(img_tensor.shape[2:], det[:, :4], original_shape, ratio_pad=(ratio, padding)).round()
            for *xyxy, conf, cls in det:
                xyxy = [int(x.item()) for x in xyxy]  # Convert tensor to integers
                results.append((cls, conf, xyxy))  # Append (class, confidence, bbox)
    return results

# Scale coordinates from resized image back to original image dimensions
# Scale coordinates from resized image back to original image dimensions
def scale_coords(img1_shape, coords, img0_shape, ratio_pad):
    # Extract gain as a scalar (ensure uniform scaling if ratio_pad[0] is a tuple)
    gain = ratio_pad[0][0] if isinstance(ratio_pad[0], (list, tuple)) else ratio_pad[0]
    pad = ratio_pad[1]  # Extract padding (x, y)

    # Adjust coordinates based on padding
    coords[:, [0, 2]] -= pad[0]  # Subtract padding from x-coordinates
    coords[:, [1, 3]] -= pad[1]  # Subtract padding from y-coordinates

    # Scale coordinates to the original image size
    coords[:, :4] /= gain  # Divide by the scaling factor (now a scalar)

    # Clip coordinates to ensure they are within the image bounds
    coords[:, [0, 2]] = coords[:, [0, 2]].clamp(0, img0_shape[1])  # Clip x-coordinates
    coords[:, [1, 3]] = coords[:, [1, 3]].clamp(0, img0_shape[0])  # Clip y-coordinates

    return coords




# Function to draw bounding boxes on an image
# Function to draw bounding boxes on an image
def draw_boxes(image, results, model):
    annotator = Annotator(image, line_width=3)  # Thicker lines
    for cls, conf, (x1, y1, x2, y2) in results:
        label = f"{model.names[int(cls)]} {conf:.2f}"

        # Draw the bounding box
        color = (0, 255, 0)  # Green color for bounding box
        thickness = 4  # Thicker bounding box
        cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)

        # Draw the label with thicker text
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.8  # Adjust text size
        font_thickness = 2  # Thicker text
        cv2.putText(image, label, (x1, y1 - 10), font, font_scale, color, font_thickness, lineType=cv2.LINE_AA)

    return image


# Function to recombine frames into a video
def create_video_from_frames(frames_folder, output_video_path, frame_rate):
    frame_files = sorted([os.path.join(frames_folder, f) for f in os.listdir(frames_folder) if f.endswith('.jpg')])
    if not frame_files:
        raise ValueError("No frames found in the directory.")

    first_frame = cv2.imread(frame_files[0])
    height, width, _ = first_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for output video
    out = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (width, height))

    for frame_file in frame_files:
        frame = cv2.imread(frame_file)
        out.write(frame)

    out.release()
    print(f"Video saved to {output_video_path}")

# Function to track and avoid double-counting vehicles across frames
def track_vehicles(frame_results, tracked_vehicles, threshold=0.5):
    new_vehicles = []
    for cls, conf, bbox in frame_results:
        matched = False
        for i, (prev_cls, prev_conf, prev_bbox) in enumerate(tracked_vehicles):
            # Calculate Intersection over Union (IoU) between bounding boxes
            iou = calculate_iou(bbox, prev_bbox)
            if iou > threshold and cls == prev_cls:
                matched = True
                tracked_vehicles[i] = (cls, conf, bbox)  # Update tracked vehicle
                break
        if not matched:
            tracked_vehicles.append((cls, conf, bbox))  # Add new vehicle if no match
            new_vehicles.append((cls, conf, bbox))  # Track this vehicle

    return new_vehicles, tracked_vehicles

# Function to calculate Intersection over Union (IoU)
def calculate_iou(bbox1, bbox2):
    x1, y1, x2, y2 = bbox1
    x1_2, y1_2, x2_2, y2_2 = bbox2
    # Calculate intersection
    inter_x1 = max(x1, x1_2)
    inter_y1 = max(y1, y1_2)
    inter_x2 = min(x2, x2_2)
    inter_y2 = min(y2, y2_2)
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)

    # Calculate union
    area1 = (x2 - x1) * (y2 - y1)
    area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
    union_area = area1 + area2 - inter_area

    # Return IoU
    return inter_area / union_area if union_area > 0 else 0

# Main function
def main(video_path, model_path, output_folder, output_video_path):
    # Load YOLOv5 model
    device = select_device('cpu')
    model = attempt_load(model_path)  # Load the model without specifying map_location
    model.to(device)  # Move the model to the desired device

    # Extract frames
    frame_rate = extract_frames(video_path, output_folder)

    # Vehicle tracking variables
    tracked_vehicles = []
    vehicle_count_per_class = {0: 0}  # Assuming vehicle class index is 0, expand this for multiple classes

    # Process each frame
    for frame_file in sorted(os.listdir(output_folder)):
        frame_path = os.path.join(output_folder, frame_file)
        image = cv2.imread(frame_path)
        original_shape = image.shape[:2]

        # Preprocess image
        img_tensor, ratio, padding = preprocess_image(image)

        # YOLOv5 prediction
        results = predict_yolo(model, img_tensor, original_shape, ratio, padding)
        # Track vehicles across frames
        new_vehicles, tracked_vehicles = track_vehicles(results, tracked_vehicles)


        # Update vehicle count for each class
        for cls, conf, _ in new_vehicles:
            vehicle_count_per_class[cls] = vehicle_count_per_class.get(cls, 0) + 1

        # Draw bounding boxes
        processed_image = draw_boxes(image, results, model)

        # Save processed frame
        cv2.imwrite(frame_path, processed_image)

    # Recombine frames into a video
    create_video_from_frames(output_folder, output_video_path, frame_rate)

    # Generate theoretical report
    print("Detection Report:")
    for cls, count in vehicle_count_per_class.items():
        print(f"Class {cls}: {count} vehicles detected")

# Run the pipeline
video_path = '/content/drive/MyDrive/b.mp4'  # Path to input video
model_path = '/content/drive/MyDrive/fine_tuned_yolo_model_50.pt'  # Path to YOLOv5 weights
output_folder = '/content/frames'  # Folder to save extracted and processed frames
output_video_path = '/content/drive/MyDrive/ot7.mp4'  # Path to save output video

main(video_path, model_path, output_folder, output_video_path)


YOLOv5 🚀 v7.0-388-g882c35fc Python-3.10.12 torch-2.5.1+cu121 CPU

Fusing layers... 
Model summary: 157 layers, 7066762 parameters, 0 gradients, 15.9 GFLOPs


Extracting 917 frames at 29 FPS.


KeyboardInterrupt: 

In [None]:
import cv2
import os
import torch
from yolov5.models.experimental import attempt_load
from yolov5.utils.torch_utils import select_device
from yolov5.utils.general import non_max_suppression
from yolov5.utils.augmentations import letterbox
from yolov5.utils.plots import Annotator
import numpy as np

# Function to extract frames from a video
def extract_frames(video_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
    print(f"Extracting {frame_count} frames at {frame_rate} FPS.")

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_folder, f"frame_{frame_idx:04d}.jpg")
        cv2.imwrite(frame_path, frame)
        frame_idx += 1

    cap.release()
    print(f"Frames saved to {output_folder}")
    return frame_rate

# Preprocess image for YOLOv5
def preprocess_image(image, img_size=640):
    img_resized, ratio, padding = letterbox(image, new_shape=img_size, auto=True)
    img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1)
    img_resized = np.ascontiguousarray(img_resized)
    img_tensor = torch.from_numpy(img_resized).float()
    img_tensor /= 255.0
    if img_tensor.ndimension() == 3:
        img_tensor = img_tensor.unsqueeze(0)
    return img_tensor, ratio, padding

# Predict using YOLOv5
def predict_yolo(model, img_tensor, original_shape, ratio, padding):
    model.eval()
    with torch.no_grad():
        pred = model(img_tensor)[0]
    pred = non_max_suppression(pred, 0.25, 0.45, classes=None, agnostic=False)
    results = []
    for det in pred:
        if det is not None and len(det):
            det[:, :4] = scale_coords(img_tensor.shape[2:], det[:, :4], original_shape, ratio_pad=(ratio, padding)).round()
            for *xyxy, conf, cls in det:
                xyxy = [int(x.item()) for x in xyxy]
                results.append((cls, conf, xyxy))
    return results

# Scale coordinates from resized image back to original dimensions
def scale_coords(img1_shape, coords, img0_shape, ratio_pad):
    gain = ratio_pad[0][0] if isinstance(ratio_pad[0], (list, tuple)) else ratio_pad[0]
    pad = ratio_pad[1]
    coords[:, [0, 2]] -= pad[0]
    coords[:, [1, 3]] -= pad[1]
    coords[:, :4] /= gain
    coords[:, [0, 2]] = coords[:, [0, 2]].clamp(0, img0_shape[1])
    coords[:, [1, 3]] = coords[:, [1, 3]].clamp(0, img0_shape[0])
    return coords

# Function to draw bounding boxes on an image
def draw_boxes(image, results, model):
    annotator = Annotator(image, line_width=3)
    for cls, conf, (x1, y1, x2, y2) in results:
        label = f"{model.names[int(cls)]} {conf:.2f}"
        color = (0, 255, 0)
        thickness = 4
        cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.8
        font_thickness = 2
        cv2.putText(image, label, (x1, y1 - 10), font, font_scale, color, font_thickness, lineType=cv2.LINE_AA)
    return image

# Recombine frames into a video
def create_video_from_frames(frames_folder, output_video_path, frame_rate):
    frame_files = sorted([os.path.join(frames_folder, f) for f in os.listdir(frames_folder) if f.endswith('.jpg')])
    if not frame_files:
        raise ValueError("No frames found in the directory.")
    first_frame = cv2.imread(frame_files[0])
    height, width, _ = first_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (width, height))
    for frame_file in frame_files:
        frame = cv2.imread(frame_file)
        out.write(frame)
    out.release()
    print(f"Video saved to {output_video_path}")

# Track vehicles across frames
def track_vehicles(frame_results, tracked_vehicles, threshold=0.5):
    new_vehicles = []
    for cls, conf, bbox in frame_results:
        matched = False
        for i, (prev_cls, prev_conf, prev_bbox) in enumerate(tracked_vehicles):
            iou = calculate_iou(bbox, prev_bbox)
            if iou > threshold and cls == prev_cls:
                matched = True
                tracked_vehicles[i] = (cls, conf, bbox)
                break
        if not matched:
            tracked_vehicles.append((cls, conf, bbox))
            new_vehicles.append((cls, conf, bbox))
    return new_vehicles, tracked_vehicles

# Calculate IoU
def calculate_iou(bbox1, bbox2):
    x1, y1, x2, y2 = bbox1
    x1_2, y1_2, x2_2, y2_2 = bbox2
    inter_x1 = max(x1, x1_2)
    inter_y1 = max(y1, y1_2)
    inter_x2 = min(x2, x2_2)
    inter_y2 = min(y2, y2_2)
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
    area1 = (x2 - x1) * (y2 - y1)
    area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
    union_area = area1 + area2 - inter_area
    return inter_area / union_area if union_area > 0 else 0

# Main pipeline
def main(video_path, model_path, output_folder, output_video_path):
    device = select_device('cpu')
    model = attempt_load(model_path)
    model.to(device)
    frame_rate = extract_frames(video_path, output_folder)
    tracked_vehicles = []
    vehicle_count_per_class = {}
    for frame_file in sorted(os.listdir(output_folder)):
        frame_path = os.path.join(output_folder, frame_file)
        image = cv2.imread(frame_path)
        original_shape = image.shape[:2]
        img_tensor, ratio, padding = preprocess_image(image)
        results = predict_yolo(model, img_tensor, original_shape, ratio, padding)
        new_vehicles, tracked_vehicles = track_vehicles(results, tracked_vehicles)
        for cls, conf, _ in new_vehicles:
            vehicle_count_per_class[cls] = vehicle_count_per_class.get(cls, 0) + 1
        processed_image = draw_boxes(image, results, model)
        cv2.imwrite(frame_path, processed_image)
    create_video_from_frames(output_folder, output_video_path, frame_rate)
    print("Detection Report:")
    for cls, count in vehicle_count_per_class.items():
        print(f"Class {cls}: {count} vehicles detected")

# Run the pipeline
video_path = '/content/drive/MyDrive/b.mp4'
model_path = '/content/drive/MyDrive/fine_tuned_yolo_model_50.pt'
output_folder = '/content/frames'
output_video_path = '/content/drive/MyDrive/ot8.mp4'

main(video_path, model_path, output_folder, output_video_path)


YOLOv5 🚀 v7.0-388-g882c35fc Python-3.10.12 torch-2.5.1+cu121 CPU

Fusing layers... 
Model summary: 157 layers, 7066762 parameters, 0 gradients, 15.9 GFLOPs


Extracting 917 frames at 29 FPS.
Frames saved to /content/frames
Video saved to /content/drive/MyDrive/ot8.mp4
Detection Report:
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 11.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 4.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 5.0: 1 vehicles detected
Class 17.0: 1 vehicles detected
Class 10.0: 1 vehicles detected
Class 5.0: 1 vehicles det

In [None]:
import cv2
import os
import torch
from yolov5.models.experimental import attempt_load
from yolov5.utils.torch_utils import select_device
from yolov5.utils.general import non_max_suppression
from yolov5.utils.augmentations import letterbox
from yolov5.utils.plots import Annotator
import numpy as np

# Function to extract frames from a video
def extract_frames(video_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
    print(f"Extracting {frame_count} frames at {frame_rate} FPS.")

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_folder, f"frame_{frame_idx:04d}.jpg")
        cv2.imwrite(frame_path, frame)
        frame_idx += 1

    cap.release()
    print(f"Frames saved to {output_folder}")
    return frame_rate

# Preprocess image for YOLOv5
def preprocess_image(image, img_size=640):
    img_resized, ratio, padding = letterbox(image, new_shape=img_size, auto=True)
    img_resized = img_resized[:, :, ::-1].transpose(2, 0, 1)
    img_resized = np.ascontiguousarray(img_resized)
    img_tensor = torch.from_numpy(img_resized).float()
    img_tensor /= 255.0
    if img_tensor.ndimension() == 3:
        img_tensor = img_tensor.unsqueeze(0)
    return img_tensor, ratio, padding

# Predict using YOLOv5
def predict_yolo(model, img_tensor, original_shape, ratio, padding):
    model.eval()
    with torch.no_grad():
        pred = model(img_tensor)[0]
    pred = non_max_suppression(pred, 0.25, 0.45, classes=None, agnostic=False)
    results = []
    for det in pred:
        if det is not None and len(det):
            det[:, :4] = scale_coords(img_tensor.shape[2:], det[:, :4], original_shape, ratio_pad=(ratio, padding)).round()
            for *xyxy, conf, cls in det:
                xyxy = [int(x.item()) for x in xyxy]
                results.append((cls, conf, xyxy))
    return results

# Scale coordinates from resized image back to original dimensions
def scale_coords(img1_shape, coords, img0_shape, ratio_pad):
    gain = ratio_pad[0][0] if isinstance(ratio_pad[0], (list, tuple)) else ratio_pad[0]
    pad = ratio_pad[1]
    coords[:, [0, 2]] -= pad[0]
    coords[:, [1, 3]] -= pad[1]
    coords[:, :4] /= gain
    coords[:, [0, 2]] = coords[:, [0, 2]].clamp(0, img0_shape[1])
    coords[:, [1, 3]] = coords[:, [1, 3]].clamp(0, img0_shape[0])
    return coords

# Function to draw bounding boxes on an image
def draw_boxes(image, results, model):
    annotator = Annotator(image, line_width=3)
    for cls, conf, (x1, y1, x2, y2) in results:
        label = f"{model.names[int(cls)]} {conf:.2f}"
        color = (0, 255, 0)
        thickness = 4
        cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.8
        font_thickness = 2
        cv2.putText(image, label, (x1, y1 - 10), font, font_scale, color, font_thickness, lineType=cv2.LINE_AA)
    return image

# Recombine frames into a video
def create_video_from_frames(frames_folder, output_video_path, frame_rate):
    frame_files = sorted([os.path.join(frames_folder, f) for f in os.listdir(frames_folder) if f.endswith('.jpg')])
    if not frame_files:
        raise ValueError("No frames found in the directory.")
    first_frame = cv2.imread(frame_files[0])
    height, width, _ = first_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (width, height))
    for frame_file in frame_files:
        frame = cv2.imread(frame_file)
        out.write(frame)
    out.release()
    print(f"Video saved to {output_video_path}")

# Track vehicles across frames
def track_vehicles(frame_results, tracked_vehicles, threshold=0.5):
    new_vehicles = []
    for cls, conf, bbox in frame_results:
        matched = False
        for i, (prev_cls, prev_conf, prev_bbox) in enumerate(tracked_vehicles):
            iou = calculate_iou(bbox, prev_bbox)
            if iou > threshold and cls == prev_cls:
                matched = True
                tracked_vehicles[i] = (cls, conf, bbox)
                break
        if not matched:
            tracked_vehicles.append((cls, conf, bbox))
            new_vehicles.append((cls, conf, bbox))
    return new_vehicles, tracked_vehicles

# Calculate IoU
def calculate_iou(bbox1, bbox2):
    x1, y1, x2, y2 = bbox1
    x1_2, y1_2, x2_2, y2_2 = bbox2
    inter_x1 = max(x1, x1_2)
    inter_y1 = max(y1, y1_2)
    inter_x2 = min(x2, x2_2)
    inter_y2 = min(y2, y2_2)
    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
    area1 = (x2 - x1) * (y2 - y1)
    area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
    union_area = area1 + area2 - inter_area
    return inter_area / union_area if union_area > 0 else 0

# Main pipeline
# Main pipeline
def main(video_path, model_path, output_folder, output_video_path):
    device = select_device('cpu')
    model = attempt_load(model_path)  # Load the model without specifying map_location
    model.to(device)  # Move the model to the desired device


    # Extract frames from video
    frame_rate = extract_frames(video_path, output_folder)

    # Vehicle tracking variables
    tracked_vehicles = []
    vehicle_count_per_class = {}  # To store counts per class

    # Process each frame
    for frame_file in sorted(os.listdir(output_folder)):
        frame_path = os.path.join(output_folder, frame_file)
        image = cv2.imread(frame_path)
        original_shape = image.shape[:2]

        # Preprocess the image
        img_tensor, ratio, padding = preprocess_image(image)

        # YOLOv5 prediction
        results = predict_yolo(model, img_tensor, original_shape, ratio, padding)

        # Track vehicles across frames
        new_vehicles, tracked_vehicles = track_vehicles(results, tracked_vehicles)

        # Update counts for each class
        for cls, conf, _ in new_vehicles:
            vehicle_count_per_class[cls] = vehicle_count_per_class.get(cls, 0) + 1

        # Draw bounding boxes on the image
        processed_image = draw_boxes(image, results, model)

        # Save the processed frame
        cv2.imwrite(frame_path, processed_image)

    # Combine processed frames into a video
    create_video_from_frames(output_folder, output_video_path, frame_rate)

    # Summarize the detection results
    total_vehicles = sum(vehicle_count_per_class.values())
    print("\nDetection Report:")
    print(f"Total Vehicles Detected: {total_vehicles}")
    for cls, count in vehicle_count_per_class.items():
        class_name = model.names[int(cls)] if hasattr(model, 'names') else f"Class {cls}"
        print(f"{class_name}: {count} vehicles detected")

# Run the pipeline
video_path = '/content/drive/MyDrive/b.mp4'  # Path to input video
model_path = '/content/drive/MyDrive/fine_tuned_yolo_model_50.pt'  # YOLOv5 weights path
output_folder = '/content/frames'  # Folder for extracted frames
output_video_path = '/content/drive/MyDrive/ot22.mp4'  # Output video path

main(video_path, model_path, output_folder, output_video_path)


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


YOLOv5 🚀 v7.0-388-g882c35fc Python-3.10.12 torch-2.5.1+cu121 CPU

Fusing layers... 
Model summary: 157 layers, 7066762 parameters, 0 gradients, 15.9 GFLOPs


Extracting 917 frames at 29 FPS.
Frames saved to /content/frames
Video saved to /content/drive/MyDrive/ot22.mp4

Detection Report:
Total Vehicles Detected: 1108
car: 1 vehicles detected
bus: 1 vehicles detected
three wheelers -CNG-: 1 vehicles detected
three wheelers -CNG-: 1 vehicles detected
three wheelers -CNG-: 1 vehicles detected
car: 1 vehicles detected
car: 1 vehicles detected
three wheelers -CNG-: 1 vehicles detected
car: 1 vehicles detected
three wheelers -CNG-: 1 vehicles detected
car: 1 vehicles detected
bus: 1 vehicles detected
car: 1 vehicles detected
car: 1 vehicles detected
pickup: 1 vehicles detected
car: 1 vehicles detected
car: 1 vehicles detected
bus: 1 vehicles detected
three wheelers -CNG-: 1 vehicles detected
car: 1 vehicles detected
car: 1 vehicles detected
car: 1 vehicles detected
car: 1 vehicles detected
car: 1 vehicles detected
car: 1 vehicles detected
three wheelers -CNG-: 1 vehicles detected
motorbike: 1 vehicles detected
car: 1 vehicles detected
three wheel

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
