In [51]:
import torch
import os
import cv2

In [52]:
# Replace 'model.pt' with the actual path to your model file
model = torch.hub.load('yolov5', 'custom', path='yolov5m_Objects365.pt', source='local')


YOLOv5 🚀 2024-4-24 Python-3.11.7 torch-2.2.2 CUDA:0 (NVIDIA A40, 45403MiB)

Fusing layers... 


YOLOv5m summary: 290 layers, 22323858 parameters, 0 gradients, 52.6 GFLOPs
Adding AutoShape... 


In [53]:
model_name = "yolo5"
results = {}
for folders in os.listdir(f'data/frames'):
    output_directory = f'data/results/{model_name}/{folders}'
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    predictions = {}
    folder_path = os.path.join('data/frames', folders)
    for j, file in enumerate(os.listdir(folder_path)):
        file_path = os.path.join(folder_path, file)
        frame = cv2.imread(file_path)
        height, width, channels = frame.shape

        # Detecting objects
        res = model(frame)
        # Initialize dictionary for this frame
        prediction = {}

        for d in res.pandas().xyxy[0].to_dict(orient="records"):
            # Extract class and confidence
            class_name = d["name"]
            confidence = d["confidence"]

            # Extract bounding box coordinates
            x1 = int(d["xmin"])
            y1 = int(d["ymin"])
            x2 = int(d["xmax"])
            y2 = int(d["ymax"])

            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{class_name} {confidence:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
            # Store prediction in dictionary
            prediction[class_name] = confidence


        # Save the annotated frame
        annotated_frame_path = os.path.join(output_directory, f"result_{j}.jpg")
        cv2.imwrite(annotated_frame_path, frame)

        # Store frame info in the predictions dictionary
        for k in prediction.keys():
            if k in predictions:
                predictions[k] = max(predictions[k], prediction[k])
            else:
                predictions[k] = prediction[k]

    # Store predictions in the results dictionary
    predictions = dict(sorted(predictions.items(), key=lambda item: item[1], reverse=True))
    results[folders] = predictions
    
    with open(f"{output_directory}/results.txt", "w") as f:
        for key in predictions:
            f.write(f"{key}: {predictions[key]}\n")

# Save the results dictionary to a file
output_file = f"{model_name}/results.json"

print(results)

{'video_1': {'Person': 0.9616837501525879, 'Hat': 0.9586727023124695, 'High Heels': 0.9048386812210083, 'Handbag/Satchel': 0.8986946940422058, 'Tie': 0.8888458609580994, 'Belt': 0.8234562873840332, 'Leather Shoes': 0.7883317470550537, 'Car': 0.7359131574630737, 'Sports Car': 0.5407417416572571, 'Other Shoes': 0.5004479289054871, 'Wild Bird': 0.4408917725086212, 'Ring': 0.4271615445613861, 'Book': 0.4010309875011444, 'Gloves': 0.34225064516067505, 'Glasses': 0.2980033755302429, 'Cell Phone': 0.25908422470092773}, 'video_2': {'Person': 0.9550806283950806, 'Hat': 0.9131593108177185, 'Clock': 0.7713071703910828, 'Cup': 0.7681269645690918, 'Helmet': 0.751285195350647, 'Tie': 0.7476629018783569, 'Glasses': 0.7330009341239929, 'Airplane': 0.6994612216949463, 'Horse': 0.5765413641929626, 'Lamp': 0.5695498585700989, 'Street Lights': 0.5459650158882141, 'Drum': 0.5176036953926086, 'Leather Shoes': 0.5149014592170715, 'Candle': 0.4996538758277893, 'Boots': 0.48709529638290405, 'Umbrella': 0.48274