## Importing libraries
First, we'll start by importing the necessary libraries and modules.

In [2]:
import os
import torch
from PytorchWildlife.models import detection as pw_detection
from PytorchWildlife import utils as pw_utils
from tqdm import tqdm

## Model Initialization
We will initialize the MegaDetectorV6 model for image detection. This model is designed for detecting animals in images.

In [3]:
# Setting the device to use for computations ('cuda' indicates GPU)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
if DEVICE == "cuda":
    torch.cuda.set_device(0)
    
# Initializing the MegaDetectorV6 model for image detection
# Valid versions are MDV6-yolov9-c, MDV6-yolov9-e, MDV6-yolov10-c, MDV6-yolov10-e or MDV6-rtdetr-c
detection_model = pw_detection.MegaDetectorV6(device=DEVICE, pretrained=True, version="MDV6-yolov10-e")

# Uncomment the following line to use MegaDetectorV5 instead of MegaDetectorV6
#detection_model = pw_detection.MegaDetectorV5(device=DEVICE, pretrained=True, version="a")

Ultralytics 8.3.75 🚀 Python-3.10.16 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce GTX 1650 with Max-Q Design, 3897MiB)
YOLOv10x summary (fused): 503 layers, 31,589,858 parameters, 0 gradients, 169.8 GFLOPs


## Load ROF Data

We load the data using pandas

In [4]:
import pandas as pd
images = pd.read_csv("../../../data/tabular/none_images_df.csv")
images_paths = list(images.img_path)
len(images_paths)

35784

## Batch Image Detection
Next, we'll demonstrate how to process multiple images in batches. This is useful when you have a large number of images and want to process them efficiently.

In [None]:
torch.cuda.empty_cache()
results = detection_model.batch_image_detection("../../../data/images/md-test/0_10", batch_size=1);

 71%|███████   | 2531/3569 [15:12<07:16,  2.38it/s]


0: 480x640 1 animal, 85.5ms
Speed: 7.2ms preprocess, 85.5ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████   | 2532/3569 [15:12<07:24,  2.33it/s]


0: 384x640 1 animal, 67.9ms
Speed: 4.7ms preprocess, 67.9ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)


 71%|███████   | 2533/3569 [15:13<06:55,  2.49it/s]


0: 480x640 (no detections), 90.6ms
Speed: 5.7ms preprocess, 90.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████   | 2534/3569 [15:13<07:03,  2.45it/s]


0: 384x640 1 animal, 68.7ms
Speed: 7.8ms preprocess, 68.7ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)


 71%|███████   | 2535/3569 [15:14<06:49,  2.53it/s]


0: 480x640 (no detections), 92.8ms
Speed: 6.0ms preprocess, 92.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████   | 2536/3569 [15:14<06:47,  2.53it/s]


0: 480x640 (no detections), 90.2ms
Speed: 5.5ms preprocess, 90.2ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████   | 2537/3569 [15:14<06:43,  2.56it/s]


0: 480x640 1 animal, 86.4ms
Speed: 6.6ms preprocess, 86.4ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████   | 2538/3569 [15:15<06:42,  2.56it/s]


0: 384x640 1 animal, 69.2ms
Speed: 5.5ms preprocess, 69.2ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)


 71%|███████   | 2539/3569 [15:15<06:20,  2.71it/s]


0: 480x640 1 animal, 91.8ms
Speed: 11.3ms preprocess, 91.8ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████   | 2540/3569 [15:15<06:39,  2.58it/s]


0: 480x640 (no detections), 86.8ms
Speed: 5.7ms preprocess, 86.8ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████   | 2541/3569 [15:16<06:40,  2.57it/s]


0: 384x640 1 animal, 68.2ms
Speed: 5.3ms preprocess, 68.2ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)


 71%|███████   | 2542/3569 [15:16<06:11,  2.77it/s]


0: 480x640 2 animals, 90.4ms
Speed: 5.0ms preprocess, 90.4ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████▏  | 2543/3569 [15:16<06:09,  2.78it/s]


0: 480x640 1 animal, 86.1ms
Speed: 5.8ms preprocess, 86.1ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████▏  | 2544/3569 [15:17<06:09,  2.78it/s]


0: 480x640 1 animal, 86.7ms
Speed: 5.9ms preprocess, 86.7ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████▏  | 2545/3569 [15:17<06:29,  2.63it/s]


0: 480x640 1 animal, 87.9ms
Speed: 6.3ms preprocess, 87.9ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████▏  | 2546/3569 [15:18<06:33,  2.60it/s]


0: 384x640 (no detections), 66.6ms
Speed: 4.6ms preprocess, 66.6ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)


 71%|███████▏  | 2547/3569 [15:18<06:26,  2.64it/s]


0: 480x640 1 animal, 90.8ms
Speed: 6.2ms preprocess, 90.8ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████▏  | 2548/3569 [15:18<06:39,  2.55it/s]


0: 384x640 1 animal, 66.8ms
Speed: 4.7ms preprocess, 66.8ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)


 71%|███████▏  | 2549/3569 [15:19<06:10,  2.75it/s]


0: 384x640 1 animal, 65.3ms
Speed: 5.4ms preprocess, 65.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)


 71%|███████▏  | 2550/3569 [15:19<05:45,  2.95it/s]


0: 480x640 1 animal, 88.9ms
Speed: 5.5ms preprocess, 88.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 71%|███████▏  | 2551/3569 [15:19<05:48,  2.92it/s]


0: 480x640 (no detections), 88.1ms
Speed: 5.2ms preprocess, 88.1ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2552/3569 [15:20<05:40,  2.98it/s]


0: 384x640 1 animal, 68.3ms
Speed: 4.0ms preprocess, 68.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)


 72%|███████▏  | 2553/3569 [15:20<05:24,  3.13it/s]


0: 480x640 (no detections), 89.8ms
Speed: 7.7ms preprocess, 89.8ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2554/3569 [15:20<05:32,  3.05it/s]


0: 480x640 1 animal, 88.3ms
Speed: 5.9ms preprocess, 88.3ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2555/3569 [15:21<05:35,  3.02it/s]


0: 480x640 (no detections), 86.3ms
Speed: 5.7ms preprocess, 86.3ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2556/3569 [15:21<05:34,  3.03it/s]


0: 384x640 1 animal, 66.1ms
Speed: 4.5ms preprocess, 66.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)


 72%|███████▏  | 2557/3569 [15:21<05:18,  3.18it/s]


0: 480x640 1 animal, 90.3ms
Speed: 5.0ms preprocess, 90.3ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2558/3569 [15:22<05:27,  3.09it/s]


0: 480x640 1 animal, 87.0ms
Speed: 5.8ms preprocess, 87.0ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2559/3569 [15:22<05:31,  3.04it/s]


0: 480x640 (no detections), 89.4ms
Speed: 4.9ms preprocess, 89.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2560/3569 [15:22<05:35,  3.01it/s]


0: 480x640 1 animal, 86.1ms
Speed: 6.9ms preprocess, 86.1ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2561/3569 [15:23<05:40,  2.96it/s]


0: 480x640 1 animal, 88.4ms
Speed: 7.6ms preprocess, 88.4ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2562/3569 [15:23<05:46,  2.91it/s]


0: 384x640 1 animal, 66.8ms
Speed: 6.2ms preprocess, 66.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)


 72%|███████▏  | 2563/3569 [15:23<05:33,  3.02it/s]


0: 480x640 2 animals, 89.5ms
Speed: 4.7ms preprocess, 89.5ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2564/3569 [15:24<05:44,  2.91it/s]


0: 480x640 1 animal, 88.0ms
Speed: 5.5ms preprocess, 88.0ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2565/3569 [15:24<05:38,  2.96it/s]


0: 480x640 1 animal, 86.4ms
Speed: 5.2ms preprocess, 86.4ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2566/3569 [15:24<05:39,  2.95it/s]


0: 480x640 1 animal, 86.4ms
Speed: 6.1ms preprocess, 86.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2567/3569 [15:25<05:32,  3.01it/s]


0: 480x640 2 animals, 87.7ms
Speed: 4.8ms preprocess, 87.7ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2568/3569 [15:25<05:32,  3.01it/s]


0: 480x640 2 animals, 86.3ms
Speed: 6.5ms preprocess, 86.3ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2569/3569 [15:25<05:34,  2.99it/s]


0: 480x640 1 vehicle, 86.5ms
Speed: 7.9ms preprocess, 86.5ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2570/3569 [15:26<05:25,  3.07it/s]


0: 384x640 1 animal, 68.0ms
Speed: 3.9ms preprocess, 68.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)


 72%|███████▏  | 2571/3569 [15:26<05:00,  3.32it/s]


0: 480x640 (no detections), 89.6ms
Speed: 4.9ms preprocess, 89.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2572/3569 [15:26<05:11,  3.20it/s]


0: 480x640 (no detections), 87.6ms
Speed: 7.1ms preprocess, 87.6ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2573/3569 [15:27<05:16,  3.15it/s]


0: 480x640 (no detections), 87.8ms
Speed: 8.0ms preprocess, 87.8ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2574/3569 [15:27<05:28,  3.03it/s]


0: 480x640 1 animal, 86.1ms
Speed: 5.6ms preprocess, 86.1ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2575/3569 [15:27<05:29,  3.01it/s]


0: 384x640 (no detections), 67.7ms
Speed: 4.6ms preprocess, 67.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


 72%|███████▏  | 2576/3569 [15:27<05:06,  3.24it/s]


0: 480x640 1 animal, 89.4ms
Speed: 5.2ms preprocess, 89.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2577/3569 [15:28<05:15,  3.14it/s]


0: 480x640 (no detections), 86.9ms
Speed: 4.5ms preprocess, 86.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2578/3569 [15:28<05:24,  3.05it/s]


0: 480x640 1 animal, 87.6ms
Speed: 4.6ms preprocess, 87.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2579/3569 [15:28<05:19,  3.09it/s]


0: 480x640 4 animals, 88.4ms
Speed: 5.3ms preprocess, 88.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2580/3569 [15:29<05:26,  3.03it/s]


0: 480x640 (no detections), 89.2ms
Speed: 4.6ms preprocess, 89.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2581/3569 [15:29<05:22,  3.06it/s]


0: 384x640 1 animal, 68.2ms
Speed: 3.9ms preprocess, 68.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)


 72%|███████▏  | 2582/3569 [15:29<05:10,  3.18it/s]


0: 384x640 (no detections), 65.1ms
Speed: 4.1ms preprocess, 65.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


 72%|███████▏  | 2583/3569 [15:30<05:16,  3.12it/s]


0: 480x640 (no detections), 90.3ms
Speed: 5.8ms preprocess, 90.3ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2584/3569 [15:30<05:27,  3.01it/s]


0: 480x640 1 animal, 87.2ms
Speed: 7.3ms preprocess, 87.2ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2585/3569 [15:30<05:31,  2.97it/s]


0: 480x640 2 animals, 88.2ms
Speed: 5.2ms preprocess, 88.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2586/3569 [15:31<05:29,  2.98it/s]


0: 480x640 1 animal, 87.2ms
Speed: 4.8ms preprocess, 87.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 72%|███████▏  | 2587/3569 [15:31<05:25,  3.02it/s]


0: 480x640 1 animal, 86.0ms
Speed: 4.8ms preprocess, 86.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 73%|███████▎  | 2588/3569 [15:31<05:30,  2.97it/s]


0: 480x640 1 animal, 90.3ms
Speed: 4.6ms preprocess, 90.3ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)


 73%|███████▎  | 2589/3569 [15:32<05:29,  2.97it/s]


0: 480x640 2 animals, 86.0ms
Speed: 6.0ms preprocess, 86.0ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)


 73%|███████▎  | 2590/3569 [15:32<05:27,  2.99it/s]


0: 480x640 2 animals, 89.1ms
Speed: 4.8ms preprocess, 89.1ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


 73%|███████▎  | 2591/3569 [15:33<05:39,  2.88it/s]


0: 480x640 (no detections), 88.0ms
Speed: 9.6ms preprocess, 88.0ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)


 73%|███████▎  | 2592/3569 [15:33<05:52,  2.77it/s]


0: 480x640 1 animal, 88.5ms
Speed: 4.9ms preprocess, 88.5ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)


 73%|███████▎  | 2593/3569 [15:33<05:54,  2.75it/s]


0: 480x640 (no detections), 87.2ms
Speed: 5.6ms preprocess, 87.2ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)


 73%|███████▎  | 2594/3569 [15:34<05:50,  2.78it/s]


0: 480x640 1 animal, 89.1ms
Speed: 4.6ms preprocess, 89.1ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


 73%|███████▎  | 2595/3569 [15:34<05:43,  2.84it/s]


0: 480x640 (no detections), 89.5ms
Speed: 4.7ms preprocess, 89.5ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)


 73%|███████▎  | 2596/3569 [15:34<05:39,  2.87it/s]


0: 480x640 2 animals, 90.1ms
Speed: 5.8ms preprocess, 90.1ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)


 73%|███████▎  | 2597/3569 [15:35<05:38,  2.87it/s]


0: 480x640 2 animals, 88.7ms
Speed: 5.0ms preprocess, 88.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


 73%|███████▎  | 2598/3569 [15:35<05:49,  2.78it/s]


KeyboardInterrupt: 

## Output Results
PytorchWildlife allows to output detection results in multiple formats. Here are the examples:

### 1. Annotated Images:
This will output the images with bounding boxes drawn around the detected animals. The images will be saved in the specified output directory.

In [5]:
# pw_utils.save_detection_images(results, "batch_output", overwrite=False)

### 2. Cropped Images:
This will output the cropped images of the detected animals. The cropping is done around the detection bounding box, The images will be saved in the specified output directory.

In [6]:
# pw_utils.save_crop_images(results, "crop_output", overwrite=False)

### 3. JSON Format:
This will output the detection results in JSON format. The JSON file will be saved in the specified output directory.

In [29]:
pw_utils.save_detection_json(results, os.path.join(".","batch_output.json"),
                             categories=detection_model.CLASS_NAMES,
                             exclude_category_ids=[], # Category IDs can be found in the definition of each model.
                             exclude_file_path=None)

### Copyright (c) Microsoft Corporation. All rights reserved.
### Licensed under the MIT License.