In [57]:
import ndjson
import json
import os
import requests
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
import cv2
import csv
import torch
import clip
from PIL import Image
from collections import deque
import datetime
import csv
from pathlib import Path
from ultralytics import YOLO

In [59]:
# path of the labelbox metadata file
META_DATA_PATH = 'Metadata/Export  project - Fred-Labels - 7_16_2025.ndjson'

# directory to download the videos
VIDEOS_DIRECTORY = r"data/videos"

IMAGES_DIRECTORY = 'data/images'

CSV_PATH = 'car_state.csv'

YOLO_MODEL_PATH = r"E:\VS Code Folders\yolo_training\runs\yolo11l.pt_2025-07-01_epochs_80_val_0.08\weights\best.pt"


In [60]:
def check_path(video_path):
    if not os.path.exists(video_path):
        os.makedirs(video_path)
        print(f"Path {video_path} has been created successfully..!!")
    else:
        print(f"Path {video_path} already exists..!!")

In [61]:
check_path(VIDEOS_DIRECTORY)
check_path(IMAGES_DIRECTORY)

Path data/videos already exists..!!
Path data/images already exists..!!


In [62]:
LABELS = ['offroad', 'onroad']

In [63]:
def download_video(video_url, video_name, video_path):
    """
    Downloads a video from the given URL and saves it with the specified name in the videos folder.
    
    Args:
        video_url (str): The URL of the video to be downloaded.
        video_name (str): The name of the video file.
        videos_folder (str): The path to the folder where the video will be saved.
    """
    if os.path.exists(video_path):
        print(f"Video '{video_path}' already exists. Skipping download.")
        return
    
    try:
        response = requests.get(video_url, stream=True)
        if response.status_code == 200:
            with open(video_path, 'wb') as file:
                for chunk in response.iter_content(chunk_size=1024):
                    file.write(chunk)
            print(f"Video '{video_name}' downloaded successfully.")
        else:
            print(f"Error downloading video '{video_name}': {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"Error downloading video '{video_name}': {e}")

In [64]:
def check_if_image_exists(path):
    if os.path.exists(path):
        print(f"Path: {path} already exists..!!")
        return True

In [65]:
def save_image(output_path, frame):
    if check_if_image_exists(output_path):
        print(f"Frame already exists at {output_path}")
        return False
    cv2.imwrite(output_path, frame)
    return True

In [66]:
def extract_frame_by_iteration(video_path, target_frame, output_path=None, save=False):
    if save and output_path and check_if_image_exists(output_path):
        print(f"Frame already exists at {output_path}")
        return cv2.imread(output_path)
    
    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print("Error: Could not open video.")
        return None
    
    frame_count = 0
    success = True

    while success and frame_count <= target_frame:
        success, frame = video.read()
        frame_count += 1

        if frame_count == target_frame:
            if save and output_path:
                save_image(output_path, frame)
                print(f"Frame {target_frame} extracted and saved successfully.")
            else:
                print(f"Frame {target_frame} extracted successfully (not saved).")
            break

    if frame_count != target_frame:
        print(f"Error: Could not extract frame {target_frame}.")
        frame = None

    video.release()
    return frame

# Test
# extract_frame_by_iteration('videos/test.mp4', 105, 'images/test_105.png', True)

In [67]:
def is_offroad(frame_idx, ranges):
    return any(start <= frame_idx <= end for start, end in ranges)

In [68]:
def run_yolo_on_frame(frame, model, conf_threshold=0.8):
    
    results = model(frame, verbose=False, classes=[0])[0]  # class 0 = car

    for box in results.boxes:
        if box.conf.item() > conf_threshold:
            return True  # At least one car detected

    return False  # No car detected above threshold


In [69]:
def load_yolo_model(path):
    """Load YOLO model from the specified path."""
    return YOLO(path, task='detect')

In [70]:
yolo_model = load_yolo_model(YOLO_MODEL_PATH)

In [None]:
with open(CSV_PATH, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["video", "image_label","frame_number", "label"])  # 0 = offroad, 1 = onroad

    with open(META_DATA_PATH, 'r') as file:
        metadata = ndjson.load(file)

        for item in metadata:
            video_url = item['data_row']['row_data']
            video_name = item['data_row']['external_id']
            video_path = os.path.join(VIDEOS_DIRECTORY, video_name)
            download_video(video_url, video_name, video_path)

            frame_count = item['media_attributes']['frame_count']
            project_key = next(iter(item.get("projects", {})), None)
            if not project_key:
                continue

            frames = item["projects"][project_key]["labels"][0]["annotations"]["frames"]

            # 1. Collect all offroad frames
            offroad_list = []
            for frame_str, annotations in frames.items():
                if any(obj['value'] == 'offroad' for obj in annotations['classifications']):
                    offroad_list.append(int(frame_str))
            offroad_list.sort()

            # 2. Form offroad ranges
            offroad_ranges = []
            if len(offroad_list) % 2 == 0:
                for i in range(0, len(offroad_list), 2):
                    start = offroad_list[i]
                    end = offroad_list[i + 1]
                    offroad_ranges.append((start, end))

            # 3. Write offroad frames
            for start, end in offroad_ranges:
                for frame_number in range(start, end + 1):
                    image_label = f"{IMAGES_DIRECTORY}/{video_name[:-4]}_{frame_number}_0.png"
                    writer.writerow([video_name, image_label, frame_number, 0])  # Label 0 = offroad
                    extract_frame_by_iteration(video_path, frame_number, image_label[12:], save=True)

            # 4. Sample every 10th other frame
            for frame_number in range(1, frame_count, 10):
                if is_offroad(frame_number, offroad_ranges):
                    continue  # Already handled

                frame = extract_frame_by_iteration(video_path, frame_number)
                if frame is None:
                    continue

                detections = run_yolo_on_frame(frame, yolo_model)
                if (detections):
                    image_label = f"{IMAGES_DIRECTORY}/{video_name[:-4]}_{frame_number}_1.png"
                    if save_image(image_label, frame):
                        print(f"Frame {frame_number} extracted and saved successfully.")
                    writer.writerow([video_name, image_label[12:], frame_number, 1])  # Label 1 = onroad

Video 'converted_Backtobeginining.-0.2647533527535197,53.80354607135195.1740319068.mp4' downloaded successfully.
Frame 1 extracted successfully (not saved).
Frame 1 extracted and saved successfully.
Frame 11 extracted successfully (not saved).
Frame 11 extracted and saved successfully.
Frame 21 extracted successfully (not saved).
Frame 21 extracted and saved successfully.
Frame 31 extracted successfully (not saved).
Frame 31 extracted and saved successfully.
Frame 41 extracted successfully (not saved).
Frame 41 extracted and saved successfully.
Frame 51 extracted successfully (not saved).
Frame 51 extracted and saved successfully.
Frame 61 extracted successfully (not saved).
Frame 61 extracted and saved successfully.
Frame 71 extracted successfully (not saved).
Frame 71 extracted and saved successfully.
Frame 81 extracted successfully (not saved).
Video 'converted_Backtobeginining.-0.2647729836647066,53.80405026284905.1740320645.mp4' downloaded successfully.
Frame 1 extracted successfu