In [3]:

import os
import string
from pytube import YouTube


def checkFileName(file):
    validChars = "-_.() %s%s" % (string.ascii_letters, string.digits)
    return ''.join(c if c in validChars else '_' for c in file)

# video URLs
urls = [
    "https://www.youtube.com/watch?v=2NFwY15tRtA&t=2s",
    "https://www.youtube.com/watch?v=5dRramZVu2Q",
    "https://www.youtube.com/watch?v=WeF4wpw7w9k"
]


def downloadVideo(video_urls):
    for url in urls:
        try:
            yt = YouTube(url)
            stream = yt.streams.get_highest_resolution()
            if stream:
                print(f"Downloading {yt.title}...")
                stream.download()
                print(f"{yt.title} downloaded successfully.")
            else:
                print(f"No stream found for {yt.title}.")
        except Exception as e:
            print(f"Error downloading {url}: {e}")


downloadVideo(urls)

Downloading Cyclist and vehicle tracking - 2...
Cyclist and vehicle tracking - 2 downloaded successfully.
Downloading Drone Tracking Video...
Drone Tracking Video downloaded successfully.
Downloading Cyclist and vehicle Tracking - 1...
Cyclist and vehicle Tracking - 1 downloaded successfully.


In [5]:
import os
import numpy as np
import cv2

def process_video(video_file, output_folder):
    
    os.makedirs(output_folder, exist_ok=True)
    
    cap = cv2.VideoCapture(video_file)
    if not cap.isOpened():
        print(f"Error: Could not open video file: {video_file}")
        return
    
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if frame_count == 0:
        print(f"Error: Video file {video_file} has no frames.")
        cap.release()
        return
    
    frames_processed = 0
    
    while frames_processed < frame_count:
        ret, frame = cap.read()
        if not ret:
            break
        
        #  the 7th frame
        if frames_processed % 7 != 0:
            frames_processed += 1
            continue
        
      
        frame_filename = f"frame_{frames_processed}.jpg"
        frame_path = os.path.join(output_folder, frame_filename)
        cv2.imwrite(frame_path, frame)
        
        frames_processed += 1
    
    cap.release()

def process_videos(filenames):
    for filename in filenames:
        print(f"Processing video: {filename}")
        video_name = os.path.splitext(os.path.basename(filename))[0]
        output_folder = f'{video_name}_frames'
        process_video(filename, output_folder)

filenames = [
    'Cyclist and vehicle Tracking - 1.mp4',
    'Cyclist and vehicle Tracking - 2.mp4',
    'Drone Tracking Video.mp4'
]

filenames = [filename.replace("’", "") for filename in filenames]
process_videos(filenames)


Processing video: Cyclist and vehicle Tracking - 1.mp4
Processing video: Cyclist and vehicle Tracking - 2.mp4
Processing video: Drone Tracking Video.mp4


In [18]:
import os
import numpy as np
import cv2

def process_video(video_file, output_folder):

    os.makedirs(output_folder, exist_ok=True)
    
    cap = cv2.VideoCapture(video_file)
    if not cap.isOpened():
        print(f"Error: Could not open video file: {video_file}")
        return
    
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if frame_count == 0:
        print(f"Error: Video file {video_file} has no frames.")
        cap.release()
        return
    
    frames_processed = 0
    all_frames = []
    
    while frames_processed < frame_count:
        ret, frame = cap.read()
        if not ret:
            break
        
        
        if frames_processed % 7 != 0:
            frames_processed += 1
            continue
        
        
        frame = cv2.resize(frame, (1080, 1920))  
        
    
        
        all_frames.append(frame)
        
        frames_processed += 1
    
    cap.release()
    
    
    frames_array = np.array(all_frames)
    np.save(os.path.join(output_folder, 'frames.npy'), frames_array)

def process_videos(filenames):
    for filename in filenames:
        print(f"Processing video: {filename}")
        video_name = os.path.splitext(os.path.basename(filename))[0]
        output_folder = f'{video_name}_frames'
        process_video(filename, output_folder)

filenames = [
    'Cyclist and vehicle Tracking - 1.mp4',
    'Cyclist and vehicle Tracking - 2.mp4',
    'Drone Tracking Video.mp4'
]

filenames = [filename.replace("’", "") for filename in filenames]
process_videos(filenames)


Processing video: Cyclist and vehicle Tracking - 1.mp4
Processing video: Cyclist and vehicle Tracking - 2.mp4
Processing video: Drone Tracking Video.mp4


In [21]:
import numpy as np
import os
import cv2

# Process image
def processImage(imgPath, res=(1080, 1920)):
    try:
        # Load the image
        img = cv2.imread(imgPath)

        # Resize the image to the given resolution
        resized = cv2.resize(img, res[::-1])

        return resized
    except Exception as e:
        print(f"Error processing image {imgPath}: {e}")
        return None

# List of folder paths containing image files
folderPaths = [
    'Cyclist and vehicle Tracking - 1_frames',
    'Cyclist and vehicle Tracking - 2_frames',
    'Drone Tracking Video_frames'
]


resolution = (1080, 1920)

# Iterate over each folder
for i, folder_path in enumerate(folderPaths, start=1):
    # List to store resized images
    resizedImages = []

    # Iterate over each file in the folder
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)

       
        resizedImage = processImage(image_path, resolution)
        if resizedImage is not None:
            resizedImages.append(resizedImage)

    # Convert resized images to a npy array
    resizedImgArray = np.array(resizedImages, dtype=np.uint8)

   
    framesFile = f'frames{i}.npy'
    np.save(framesFile, resizedImgArray)

    
    print(f"folder {i} processed and saved successfully.")


All images in folder 1 processed and saved to frames1.npy successfully.
All images in folder 2 processed and saved to frames2.npy successfully.
All images in folder 3 processed and saved to frames3.npy successfully.


In [2]:
import numpy as np
import os
import cv2
import torch

# use yolov5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5x')  # Adjust the model variant as needed

# only detect car or bicycle objects
classes_of_interest = ['car', 'bicycle']

# function to perform object detection on every colored frame
def detect_objects(frame):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = model(frame_rgb)
    detections = results.pandas().xyxy[0]  # Use pandas format for easier manipulation
    return detections

# process the video and output the detections to a folder
def process_video(frames_array, output_folder):
    os.makedirs(output_folder, exist_ok=True) 
    
    for frameNum, frame in enumerate(frames_array):
        detections = detect_objects(frame)
        car_bike_detections = detections[detections['name'].isin(classes_of_interest)]
        
        # show the frame with the corresponding bounding boxes (if found)
        for index, detection in car_bike_detections.iterrows():
            bbox = [int(detection['xmin']), int(detection['ymin']), int(detection['xmax']), int(detection['ymax'])]
            class_name = detection['name']
            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
            cv2.putText(frame, f'{class_name}', (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        output_path = os.path.join(output_folder, f'frame{frameNum}.jpg')
        cv2.imwrite(output_path, frame)
        print(f"Saved frame {frameNum} with detections to {output_path}")

# process each video
frames_files = ["frames1.npy", "frames2.npy", "frames3.npy"]
output_folders = ["video1", "video2", "video3"]
for frames_file, output_folder in zip(frames_files, output_folders):
    frames_array = np.load(frames_file)
    process_video(frames_array, output_folder)

# close OpenCV windows
cv2.destroyAllWindows()


Using cache found in C:\Users\abdul/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-3-2 Python-3.10.5 torch-2.2.1+cpu CPU

Fusing layers... 
YOLOv5x summary: 444 layers, 86705005 parameters, 0 gradients, 205.5 GFLOPs
Adding AutoShape... 


Saved frame 0 with detections to video1\frame0.jpg
Saved frame 1 with detections to video1\frame1.jpg
Saved frame 2 with detections to video1\frame2.jpg
Saved frame 3 with detections to video1\frame3.jpg
Saved frame 4 with detections to video1\frame4.jpg
Saved frame 5 with detections to video1\frame5.jpg
Saved frame 6 with detections to video1\frame6.jpg
Saved frame 7 with detections to video1\frame7.jpg
Saved frame 8 with detections to video1\frame8.jpg
Saved frame 9 with detections to video1\frame9.jpg
Saved frame 10 with detections to video1\frame10.jpg
Saved frame 11 with detections to video1\frame11.jpg
Saved frame 12 with detections to video1\frame12.jpg
Saved frame 13 with detections to video1\frame13.jpg
Saved frame 14 with detections to video1\frame14.jpg
Saved frame 15 with detections to video1\frame15.jpg
Saved frame 16 with detections to video1\frame16.jpg
Saved frame 17 with detections to video1\frame17.jpg
Saved frame 18 with detections to video1\frame18.jpg
Saved frame 1

In [25]:
import torch
import cv2
import numpy as np
from filterpy.kalman import KalmanFilter

# Load YOLOv5 model
yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5x')
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
yolo_model.to(device).eval()

# Define Kalman filter parameters
time_step = 1.0  # time step
kalman_filter = KalmanFilter(dim_x=4, dim_z=2)
kalman_filter.F = np.array([[1, time_step, 0,  0],
                             [0,  1, 0,  0],
                             [0,  0, 1, time_step],
                             [0,  0, 0,  1]])

kalman_filter.H = np.array([[1, 0, 0, 0],
                             [0, 0, 1, 0]])

kalman_filter.R *= 10  
kalman_filter.P *= 1000  
kalman_filter.Q *= 0.01  

objects_of_interest = ['car', 'bicycle']

# Array of video paths and output video names
video_paths = ['Cyclist and vehicle tracking - 2.mp4', 'Drone Tracking Video.mp4']
output_video_names = ['output_video2.mp4', 'output_video3.mp4']

for vid_path, output_vid_name in zip(video_paths, output_video_names):
    
    video_cap = cv2.VideoCapture(vid_path)
    
    # Get video properties
    vid_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    vid_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    vid_fps = int(video_cap.get(cv2.CAP_PROP_FPS))

    # Define codec and create VideoWriter object
    fourcc_codec = cv2.VideoWriter_fourcc(*'mp4v')
    vid_writer = cv2.VideoWriter(output_vid_name, fourcc_codec, vid_fps, (vid_width, vid_height))

    # Initialize variables for object tracking
    tracked_objects_dict = {}  # dictionary to store tracked object states
    obj_id_counter = 0  # counter for assigning object IDs
    frame_counter = 0  # counter for frames
    
    while video_cap.isOpened():
        ret, frame = video_cap.read()
        if not ret:
            break
        
        frame_counter += 1
        if frame_counter % 3 != 0:  # Skip frames that are not multiples of 3
            continue

        # Perform object detection using YOLOv5
        results = yolo_model(frame)
        detections = results.pandas().xyxy[0]  # Use pandas format for easier manipulation
        car_bike_detections = detections[detections['name'].isin(objects_of_interest)]
        
        # Update Kalman filter and track objects
        for index, detection in car_bike_detections.iterrows():
            bbox = [detection['xmin'], detection['ymin'], detection['xmax'], detection['ymax']]
            measurement = np.array([bbox[0] + (bbox[2] - bbox[0]) / 2, bbox[1] + (bbox[3] - bbox[1]) / 2])  
            
            measurement = measurement.reshape(2, 1)

            # Assign unique object ID
            obj_id_counter += 1
            obj_id = obj_id_counter

            if obj_id not in tracked_objects_dict:
                kalman_filter.x[:2] = measurement
                kalman_filter.x[2:] = 0
                tracked_objects_dict[obj_id] = kalman_filter

            tracked_objects_dict[obj_id].predict()
            tracked_objects_dict[obj_id].update(measurement)

            # Draw trajectory lines and object bounding boxes on the frame
            prediction = tracked_objects_dict[obj_id].x[:2].astype(int)
            center = (int(prediction[0]), int(prediction[1]))  # Ensure center coordinates are integers

            if 0 <= center[0] < vid_width and 0 <= center[1] < vid_height:
                radius = 4
                color = (255, 0, 0)  
                thickness = -1  
                cv2.circle(frame, center, radius, color, thickness)
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)

        # Write the frame to output video
        vid_writer.write(frame)

    video_cap.release()
    vid_writer.release()
    cv2.destroyAllWindows()


Using cache found in C:\Users\abdul/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-3-2 Python-3.10.5 torch-2.2.1+cpu CPU

Fusing layers... 
YOLOv5x summary: 444 layers, 86705005 parameters, 0 gradients, 205.5 GFLOPs
Adding AutoShape... 
  center = (int(prediction[0]), int(prediction[1]))  # Ensure center coordinates are integers
  center = (int(prediction[0]), int(prediction[1]))  # Ensure center coordinates are integers
  center = (int(prediction[0]), int(prediction[1]))  # Ensure center coordinates are integers
  center = (int(prediction[0]), int(prediction[1]))  # Ensure center coordinates are integers
  center = (int(prediction[0]), int(prediction[1]))  # Ensure center coordinates are integers
  center = (int(prediction[0]), int(prediction[1]))  # Ensure center coordinates are integers
  center = (int(prediction[0]), int(prediction[1]))  # Ensure center coordinates are integers
  center = (int(prediction[0]), int(prediction[1]))  # Ensure center coordinates are integers
  