In [3]:
import torch
import cv2
import numpy as np
from matplotlib import pyplot as plt
from ultralytics import YOLO
import os

In [2]:

model = YOLO("yolov8x")


def get_last_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames - 1)
    ret, frame = cap.read()
    cap.release()
    if ret:
        return frame
    else:
        raise ValueError("Could not read the last frame.")
   

# we define the region on intereset representing the lane where the cars are 
region_of_interest = [
    [(402, 216), (502,218),(751, 441), (655, 475)]]

# here we precess the videos and write the results in a txt file
def process_videos(video_directory, output_directory):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
        
    for filename in os.listdir(video_directory):
        if filename.endswith('.mp4'):
            video_path = os.path.join(video_directory, filename)
            try:
                last_frame = get_last_frame(video_path)
                #perform the inference
                results = model.predict(last_frame, classes=[2, 7], verbose=False,conf = 0.1)
                detections = results[0].boxes.xyxy.cpu().numpy() #tensori
                classes = results[0].boxes.cls.cpu().numpy()  # Classes
                
                #get the centers of the detected cars
                centers = []
                for detection, cls in zip(detections, classes):
                    x_min, y_min, x_max, y_max = detection[:4] #get the coordinates of the bounding box
                    center_x = int((x_min + x_max) / 2) 
                    center_y = int((y_min + y_max) / 2)
                    centers.append((center_x, center_y))
                
                #check if the centers are in the region of interest
                cars_in_roi = 0 
                for (center_x,center_y) in centers:
                    point = (center_x,center_y)
                    for car in region_of_interest:
                        car_arr = np.array(car,np.int32).reshape((-1,1,2))
                        if cv2.pointPolygonTest(car_arr,point,False) >= 0:
                            cars_in_roi += 1
                            break
                
                #write the results in a txt file
                output_filename = os.path.splitext(filename)[0] + '.txt'
                output_path = os.path.join(output_directory, output_filename)
                with open(output_path, 'w') as f:
                    f.write(str(cars_in_roi))
                print(f'Processed {filename}: {cars_in_roi} cars in the region of interest.')
                
            except Exception as e:
                print(f'Error processing {filename}: {e}')
                
# Function to read the ground truth data
def read_ground_truth(ground_truth_directory, video_filename):
    ground_truth_filename = os.path.splitext(video_filename)[0] + '_gt.txt'
    ground_truth_path = os.path.join(ground_truth_directory, ground_truth_filename)
    with open(ground_truth_path, 'r') as f:
        lines = f.readlines()
    ground_truth_count = int(lines[0].strip())
    return ground_truth_count


# Function to compare predictions with ground truth
def compare_with_ground_truth(predictions_directory, ground_truth_directory):
    total_files = 0
    correct_predictions = 0
    
    for filename in os.listdir(predictions_directory):
        if filename.endswith('.txt'):
            total_files += 1
            
            prediction_path = os.path.join(predictions_directory, filename)
            with open(prediction_path, 'r') as f:
                lines = f.readlines()
            predicted_count = int(lines[0].strip())
            
            ground_truth_count = read_ground_truth(ground_truth_directory, filename)
            
            if predicted_count == ground_truth_count:
                correct_predictions += 1
            else:
                print(f'{filename}: Prediction ({predicted_count}) does not match ground truth ({ground_truth_count})')
    
    accuracy = correct_predictions / total_files if total_files > 0 else 0
    print(f'Accuracy: {accuracy:.2%}')

               
video_directory = '/home/radu/Facultate/Computer_Vision/Project2/train/Task4/'
output_directory = '/home/radu/Facultate/Computer_Vision/Project2/train/Task4/output/'
ground_truth_directory = '/home/radu/Facultate/Computer_Vision/Project2/train/Task4/ground-truth/'
process_videos(video_directory, output_directory)

# Compare predictions with ground truth
compare_with_ground_truth(output_directory, ground_truth_directory)

  return F.conv2d(input, weight, bias, self.stride,


Processed 15.mp4: 1 cars in the region of interest.
Processed 03.mp4: 0 cars in the region of interest.
Processed 01.mp4: 1 cars in the region of interest.
Processed 06.mp4: 1 cars in the region of interest.
Processed 12.mp4: 1 cars in the region of interest.
Processed 13.mp4: 1 cars in the region of interest.
Processed 08.mp4: 4 cars in the region of interest.
Processed 10.mp4: 1 cars in the region of interest.
Processed 05.mp4: 2 cars in the region of interest.
Processed 02.mp4: 6 cars in the region of interest.
Processed 14.mp4: 1 cars in the region of interest.
Processed 09.mp4: 1 cars in the region of interest.
Processed 11.mp4: 2 cars in the region of interest.
Processed 07.mp4: 3 cars in the region of interest.
Processed 04.mp4: 1 cars in the region of interest.
08.txt: Prediction (4) does not match ground truth (5)
02.txt: Prediction (6) does not match ground truth (4)
Accuracy: 86.67%
