In [14]:
!pip install gdown ultralytics

import gdown
import zipfile
import os
import cv2
from ultralytics import YOLO
from IPython.display import display, HTML
from PIL import Image
import matplotlib.pyplot as plt



In [15]:
# Step 1: Download zipped file from Google Drive
file_id = '1OFwygBHu97EMxBxzKMuJzq0yeSCiKL9U'
url = f'https://drive.google.com/uc?id={file_id}'
zip_path = 'aerialtest.zip'

gdown.download(url, zip_path, quiet=False)

# Unzip to folder
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('unseen_datas')

print("Extracted files:", os.listdir('unseen_datas'))

Downloading...
From: https://drive.google.com/uc?id=1OFwygBHu97EMxBxzKMuJzq0yeSCiKL9U
To: /content/aerialtest.zip
100%|██████████| 7.48M/7.48M [00:00<00:00, 15.5MB/s]

Extracted files: ['aerialtest']





In [16]:
# Step 3: Load your trained YOLOv8 model
model = YOLO('/content/best.pt')

In [17]:
# Step 4: Process videos and images
input_folder = 'unseen_datas/aerialtest'
output_folder = 'processed_datas'
os.makedirs(output_folder, exist_ok=True)

video_exts = ('.mp4', '.avi', '.mov')
image_exts = ('.jpg', '.jpeg', '.png')

files = os.listdir(input_folder)
video_files = [f for f in files if f.endswith(video_exts)]
image_files = [f for f in files if f.endswith(image_exts)]

In [18]:
# --- Process Videos ---
for video_file in video_files:
    input_path = os.path.join(input_folder, video_file)
    output_path = os.path.join(output_folder, f'detected_{os.path.splitext(video_file)[0]}.avi')

    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print(f"ERROR: Could not open video {input_path}")
        continue

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    if fps == 0 or fps != fps:
        fps = 20.0

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    print(f"Processing video: {video_file}")

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = model(frame)[0]

        boxes = results.boxes.xyxy.cpu().numpy()
        confidences = results.boxes.conf.cpu().numpy()
        classes = results.boxes.cls.cpu().numpy().astype(int)

        for box, conf, cls in zip(boxes, confidences, classes):
            xmin, ymin, xmax, ymax = map(int, box)
            label = model.names[cls]
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
            cv2.putText(frame, f"{label} {conf:.2f}", (xmin, ymin - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        out.write(frame)
        frame_count += 1

    cap.release()
    out.release()
    print(f"Finished {video_file}. Frames saved: {frame_count}")

Processing video: soldiersvideo3.mp4

0: 384x640 2 Soldiers, 64.2ms
Speed: 10.0ms preprocess, 64.2ms inference, 19.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Soldiers, 76.0ms
Speed: 2.8ms preprocess, 76.0ms inference, 5.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Soldier, 72.1ms
Speed: 2.9ms preprocess, 72.1ms inference, 4.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Soldiers, 61.1ms
Speed: 21.4ms preprocess, 61.1ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Soldiers, 46.4ms
Speed: 3.1ms preprocess, 46.4ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Soldier, 52.2ms
Speed: 3.0ms preprocess, 52.2ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Soldiers, 32.4ms
Speed: 3.7ms preprocess, 32.4ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Soldiers, 60.1ms
Speed: 3.1ms preprocess, 60.1ms inf

In [19]:
# --- Process Images ---
for image_file in image_files:
    input_path = os.path.join(input_folder, image_file)
    output_path = os.path.join(output_folder, f'detected_{image_file}')

    image = cv2.imread(input_path)
    if image is None:
        print(f"ERROR: Could not read image {input_path}")
        continue

    results = model(image)[0]

    boxes = results.boxes.xyxy.cpu().numpy()
    confidences = results.boxes.conf.cpu().numpy()
    classes = results.boxes.cls.cpu().numpy().astype(int)

    for box, conf, cls in zip(boxes, confidences, classes):
        xmin, ymin, xmax, ymax = map(int, box)
        label = model.names[cls]
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
        cv2.putText(image, f"{label} {conf:.2f}", (xmin, ymin - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    cv2.imwrite(output_path, image)
    print(f"Processed image: {image_file}")

print("All videos and images processed. Check the 'processed_datas' folder.")


0: 352x640 12 Soldiers, 17.1ms
Speed: 3.1ms preprocess, 17.1ms inference, 1.6ms postprocess per image at shape (1, 3, 352, 640)
Processed image: soldiers2.jpg

0: 384x640 14 Civilians, 4 Soldiers, 13.7ms
Speed: 2.7ms preprocess, 13.7ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)
Processed image: civilians2.jpg

0: 448x640 6 Soldiers, 13.5ms
Speed: 3.0ms preprocess, 13.5ms inference, 1.9ms postprocess per image at shape (1, 3, 448, 640)
Processed image: soldiers3.jpg
All videos and images processed. Check the 'processed_datas' folder.
