In [4]:
import cv2
from ultralytics import YOLO
from PIL import Image
import matplotlib.pyplot as plt
import cv2 as cv
import os
import numpy as np

In [5]:
model = YOLO("yolov9e")

In [6]:
parking_slots = [
    [(1603, 834), (1785,885),(1754, 1047), (1560, 1040)],#1
    [(1468, 745), (1615, 799),(1555, 1037), (1401, 928)],#2
    [(1355, 677), (1471,710),(1393, 919), (1268, 832)],#3
    [(1250, 622), (1342,662),(1265, 826), (1164, 752)],#4
    [(1176, 571), (1257,603),(1160, 748), (1077, 683)],#5
    [(1101, 528), (1151,570),(1075, 680), (1001, 627)],#6
    [(1031, 486), (1102,516),(1000, 626), (937, 581)],#7
    [(976, 439), (1037,482),(925, 581), (876, 538)],#8
    [(935, 411), (992,432),(876, 536), (831, 502)],#9
    [(881, 375), (941,418),(830, 494), (786, 471)]#10
] 

In [21]:
#function to get the last frame of the video
def get_last_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames - 1)
    success, frame = cap.read()
    cap.release()
    if not success:
        raise ValueError("Could not read the last frame.")
    return frame

# here we check if the center of the car is in the slot
def is_center_in_slot(center, slot):
    slot_arr = np.array(slot, np.int32).reshape((-1, 1, 2))
    return cv2.pointPolygonTest(slot_arr, center, False) >= 0


#write the detections in a file
def write_in_txt(filename, detected_occupied_slots):
    with open(filename, 'w') as file:
        for slot, status in detected_occupied_slots.items():
            file.write(f"{status}\n") #we just write the status of the slot in the file

def predict_detections(image, parking_slots, model):
    
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # perform the inference with classes car and trucks (2, 7)
    results = model.predict(image_rgb, classes=[2, 7], verbose=False)
    
    # get the bounding boxes and classes
    detections = results[0].boxes.xyxy  #bounding boxes
    classes = results[0].boxes.cls  #classes
     
    #here we calculate the center of detections
    centers = []
    for detection, cls in zip(detections, classes):
        if int(cls) == 2 or int(cls) == 7:  #we are interested only in cars and trucks
            x_min, y_min, x_max, y_max = detection[:4] #here we get the coordinates of the bounding box
            center_x = int((x_min + x_max) / 2) #we calculate the average of the x coordinates
            center_y = int((y_min + y_max) / 2) #calculate the average of the y coordinates
            centers.append((center_x, center_y)) #for every center calculated we append it to the centers list
    
    #we create a dictionary to store the detected occupied slots
    detected_occupied_slots = {slot_num: 0 for slot_num in range(1, 11)}
    for (center_x, center_y) in centers:
        point = (center_x, center_y)
        for index, slot in enumerate(parking_slots):
            if is_center_in_slot(point, slot): #if the center of the car is in the slot
                detected_occupied_slots[index + 1] = 1  #we mark the slot as occupied
    
    return detected_occupied_slots  #return the dictionary with the detected occupied slots



#we handle the videos from the folder
def process_videos_for_task2(video_folder, output_folder, parking_slots, model):
    os.makedirs(output_folder, exist_ok=True)
    video_files = [f for f in os.listdir(video_folder) if f.endswith('.mp4')] #we get the video files from the folder
    
    #for every video file we process the video and generate the detection file
    for filename in tqdm(video_files, desc="Processing videos"):
        video_path = os.path.join(video_folder, filename)
        output_path = os.path.join(output_folder, filename.replace('.mp4', '_det.txt'))
        
        #we get last frame of the video
        last_frame = get_last_frame(video_path)
        #predict the detections
        detected_occupied_slots = predict_detections(last_frame, parking_slots, model)
        #write the detections in a file
        write_in_txt(output_path, detected_occupied_slots)

# # Function to compare generated files with ground truth and visualize discrepancies
# def compare_with_ground_truth(ground_truth_folder, output_folder, video_folder, parking_slots):
#     ground_truth_files = [f for f in os.listdir(ground_truth_folder) if f.endswith('_gt.txt')]
#     total_correct = 0
#     total_slots = 0
#     mismatches = []
# 
#     for gt_file in ground_truth_files:
#         gt_path = os.path.join(ground_truth_folder, gt_file)
#         output_path = os.path.join(output_folder, gt_file.replace('_gt.txt', '_det.txt'))
#         video_path = os.path.join(video_folder, gt_file.replace('_gt.txt', '.mp4'))
# 
#         if not os.path.exists(output_path):
#             print(f"Missing detection file for {gt_file}")
#             continue
# 
#         with open(gt_path, 'r') as gt, open(output_path, 'r') as out:
#             gt_slots = [int(line.strip()) for line in gt.readlines()]
#             out_slots = [int(line.strip()) for line in out.readlines()]
# 
#             if len(gt_slots) != len(out_slots):
#                 print(f"Mismatch in number of slots for {gt_file}")
#                 continue
# 
#             for idx, (gt_status, out_status) in enumerate(zip(gt_slots, out_slots)):
#                 if gt_status != out_status:
#                     mismatches.append((video_path, idx + 1, gt_status, out_status))
#                 else:
#                     total_correct += 1
#                 total_slots += 1
# 
#     accuracy = total_correct / total_slots if total_slots > 0 else 0
#     print(f'Accuracy: {accuracy:.2f}')



In [25]:
import os
from tqdm import tqdm

video_folder = '/home/radu/Facultate/Computer_Vision/Project2/train/Task2/'
output_folder = '/home/radu/Facultate/Computer_Vision/Project2/train/Task2/output/'
ground_truth_folder = '/home/radu/Facultate/Computer_Vision/Project2/train/Task2/ground-truth/'


# apply the function to process the videos and generate the detection files
process_videos_for_task2(video_folder, output_folder, parking_slots, model)
print("The txt files are generated")
# Compare generated detection files with ground truth
# compare_with_ground_truth(ground_truth_folder, output_folder, video_folder, parking_slots)

Processing videos: 100%|██████████| 15/15 [00:03<00:00,  4.87it/s]

The txt files are generated



