In [None]:
import cv2
import supervision as sv
from rfdetr import RFDETRNano
import os
import json
import pandas as pd
from pathlib import Path
import numpy as np

# --- CONFIGURATION ---
SOURCE_FOLDER = r"D:\Projects\RF_DETR_Wetland\rf_detr\videos"
TARGET_FOLDER = r"D:\Projects\RF_DETR_Wetland\annotated_videos"
ANNOTATIONS_PATH = r"D:\Projects\RF_DETR_Wetland\rf_detr\_annotations.coco.json"

# Set your video range here (e.g., 40 to 70)
# Note: Python indexing starts at 0. To get video "040", use index 39.
START_INDEX = 39 
END_INDEX = 70

os.makedirs(TARGET_FOLDER, exist_ok=True)

# 1. Initialize Model
model = RFDETRNano(
    patch_size=16,
    positional_encoding_size=24,
    resolution=384,
    out_feature_indexes=[3, 6, 9, 12],
    num_windows=2,
    dec_layers=2,
    pretrain_weights="rf-detr-nano.pth"
)
# Optional: model.optimize_for_inference() 

# 2. Load and Index Ground Truth
with open(ANNOTATIONS_PATH, 'r') as f:
    coco_data = json.load(f)

image_id_to_filename = {img['id']: img['file_name'] for img in coco_data['images']}
filename_to_gt = {}
for ann in coco_data['annotations']:
    fname = image_id_to_filename[ann['image_id']]
    if fname not in filename_to_gt:
        filename_to_gt[fname] = []
    # COCO [x,y,w,h] -> Supervision [x1,y1,x2,y2]
    x, y, w, h = ann['bbox']
    filename_to_gt[fname].append([x, y, x + w, y + h])

# 3. Setup Video List with Limits
all_video_files = sorted([f for f in os.listdir(SOURCE_FOLDER) if f.endswith('.mp4')])
selected_videos = all_video_files[START_INDEX:END_INDEX]

# 4. Storage for Evaluation
all_predictions = []
all_ground_truths = []

box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

def process_frame(frame: np.ndarray, index: int, video_stem: str) -> np.ndarray:
    # Construct filename to match COCO JSON (e.g., 001-white_wagtail_000000.jpg)
    frame_filename = f"{video_stem}_{index:06d}.jpg"
    
    # Model Prediction
    results = model.predict(frame, confidence=0.15)
    bird_detections = results[results.class_id == 16] # Filter COCO 'Bird'
    
    # Map to Class 0 for Agnostic Evaluation
    bird_detections.class_id = np.zeros_like(bird_detections.class_id)
    
    # Get Ground Truth
    gt_boxes = filename_to_gt.get(frame_filename, [])
    if gt_boxes:
        gt_detections = sv.Detections(
            xyxy=np.array(gt_boxes),
            class_id=np.zeros(len(gt_boxes), dtype=int)
        )
    else:
        gt_detections = sv.Detections.empty()

    # Accumulate data
    all_predictions.append(bird_detections)
    all_ground_truths.append(gt_detections)
    
    # Annotation
    labels = [f"Bird {conf:.2f}" for conf in bird_detections.confidence]
    scene = box_annotator.annotate(scene=frame.copy(), detections=bird_detections)
    return label_annotator.annotate(scene=scene, detections=bird_detections, labels=labels)

# 5. Execution Loop
print(f"üöÄ Starting processing for videos {START_INDEX} through {END_INDEX}...")

for video_name in selected_videos:
    stem = Path(video_name).stem
    source_path = os.path.join(SOURCE_FOLDER, video_name)
    target_path = os.path.join(TARGET_FOLDER, f"{stem}_annotated.mp4")
    
    print(f"üé¨ Processing: {video_name}")
    sv.process_video(
        source_path=source_path,
        target_path=target_path,
        callback=lambda f, i: process_frame(f, i, stem)
    )

# 6. Final Metrics Calculation
print("\nüìä Calculating Performance Metrics...")

# mAP
map_metric = sv.MeanAveragePrecision.from_detections(
    predictions=all_predictions,
    ground_truth=all_ground_truths
)

# P, R, F1 via Confusion Matrix
confusion_matrix = sv.ConfusionMatrix.from_detections(
    predictions=all_predictions,
    ground_truth=all_ground_truths,
    classes=['Bird']
)

tp = confusion_matrix.matrix[0, 0]
fp = confusion_matrix.matrix[0, 1]
fn = confusion_matrix.matrix[1, 0]

precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

# 7. Save Reports
metrics_report = {
    "Range_Processed": f"{START_INDEX} to {END_INDEX}",
    "mAP_50": round(float(map_metric.map50), 4),
    "mAP_50_95": round(float(map_metric.map50_95), 4),
    "Precision": round(float(precision), 4),
    "Recall": round(float(recall), 4),
    "F1_Score": round(float(f1), 4),
    "Total_Frames": len(all_predictions)
}

# Export
with open(os.path.join(TARGET_FOLDER, "range_eval_results.json"), "w") as f:
    json.dump(metrics_report, f, indent=4)

pd.DataFrame([metrics_report]).to_csv(os.path.join(TARGET_FOLDER, "range_eval_summary.csv"), index=False)

print(f"\n‚úÖ Done! Performance for this range: F1={metrics_report['F1_Score']}")

In [None]:
# import cv2
# import supervision as sv
# from rfdetr import RFDETRNano

# # 1. Initialize Model (Same settings as your inference script)
# model = RFDETRNano(
#     patch_size=16,
#     positional_encoding_size=24,
#     resolution=384,
#     out_feature_indexes=[3, 6, 9, 12],
#     num_windows=2,
#     dec_layers=2,
#     pretrain_weights="rf-detr-nano.pth"
# )

# # 2. Paths
# SOURCE_VIDEO = r"D:\Projects\RF_DETR_Wetland\rf_detr\videos\013-black_headed_gull.mp4"
# TARGET_VIDEO = r"D:\Projects\RF_DETR_Wetland\annotated_videos\013-black_headed_gull_annotated.mp4"

# # 3. Setup Video Info and Tools
# video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO)
# box_annotator = sv.BoxAnnotator()
# label_annotator = sv.LabelAnnotator()

# # 4. Processing Function
# def process_frame(frame: cv2.typing.MatLike, index: int) -> cv2.typing.MatLike:
#     # Run AI prediction on the current frame
#     results = model.predict(frame, confidence=0.15)
    
#     # Filter for ID 16 (Bird)
#     bird_detections = results[results.class_id == 16]
    
#     # Annotate
#     labels = [f"Bird {conf:.2f}" for conf in bird_detections.confidence]
#     annotated_frame = box_annotator.annotate(scene=frame.copy(), detections=bird_detections)
#     annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=bird_detections, labels=labels)
    
#     return annotated_frame

# # 5. Execute
# print(f"Processing {SOURCE_VIDEO}...")
# sv.process_video(
#     source_path=SOURCE_VIDEO,
#     target_path=TARGET_VIDEO,
#     callback=process_frame
# )
# print(f"Video saved at: {TARGET_VIDEO}")

import cv2
import supervision as sv
from rfdetr import RFDETRNano
import os
from pathlib import Path

# 1. Initialize Model (Keeping your exact configurations)
model = RFDETRNano(
    patch_size=16,
    positional_encoding_size=24,
    resolution=384,
    out_feature_indexes=[3, 6, 9, 12],
    num_windows=2,
    dec_layers=2,
    pretrain_weights="rf-detr-nano.pth"
)

# 2. Setup Directories
SOURCE_FOLDER = r"D:\Projects\RF_DETR_Wetland\rf_detr\videos"
TARGET_FOLDER = r"D:\Projects\RF_DETR_Wetland\annotated_videos"

# Create output folder if it doesn't exist
os.makedirs(TARGET_FOLDER, exist_ok=True)

# 3. Setup Annotators
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

# 4. Processing Function
def process_frame(frame: cv2.typing.MatLike, index: int) -> cv2.typing.MatLike:
    # Run AI prediction on the current frame
    results = model.predict(frame, confidence=0.15)
    
    # Filter for ID 16 (Bird in COCO/General weights)
    bird_detections = results[results.class_id == 16]
    
    # Annotate
    labels = [f"Bird {conf:.2f}" for conf in bird_detections.confidence]
    annotated_frame = box_annotator.annotate(scene=frame.copy(), detections=bird_detections)
    annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=bird_detections, labels=labels)
    
    return annotated_frame

# 5. Batch Execution Loop
# Get all .mp4 files in the folder
video_files = [f for f in os.listdir(SOURCE_FOLDER) if f.endswith('.mp4')]

print(f"Found {len(video_files)} videos. Starting batch processing...")

for video_filename in video_files:
    source_path = os.path.join(SOURCE_FOLDER, video_filename)
    
    # Create target filename (e.g., bird_video.mp4 -> bird_video_annotated.mp4)
    target_filename = f"{Path(video_filename).stem}_annotated.mp4"
    target_path = os.path.join(TARGET_FOLDER, target_filename)
    
    print(f"\n--- Processing: {video_filename} ---")
    
    try:
        sv.process_video(
            source_path=source_path,
            target_path=target_path,
            callback=process_frame
        )
        print(f"Successfully saved to: {target_path}")
    except Exception as e:
        print(f"Error processing {video_filename}: {e}")

print("\n‚úÖ All videos processed!")

In [None]:
# --- EVALUATION SCRIPT ---
#this code evaluated the performance of re-detr-nano model on custom dataset visual wetland dataset using coco format annotations and supervision metrics for mAP, precision, recall, and F1 score. It processes a specified range of videos, extracts frames, runs predictions, and compares them to ground truth annotations to compute performance metrics which are then saved in JSON and CSV formats for analysis.
#the model is initialized with specific configurations and pre-trained weights. The script loads COCO annotations, maps them to filenames, and processes videos in a defined range. For each frame, it predicts bird detections, compares them to ground truth, and accumulates results for final metric calculations. Finally, it computes mAP, precision, recall, and F1 score, saving the results in both JSON and CSV formats for further analysis.
#the code uses supervision metrics for mAP, precision, recall, and F1 score, and can be easily modified to work with different datasets and models.
#the ground truths for each frame have been extracyed from original visual wetland dataset and converted into _annotations.coco.json format for evaluation.


import cv2
import supervision as sv
from rfdetr import RFDETRNano
import os
import json
import pandas as pd
from pathlib import Path
import numpy as np

# --- CONFIG ---
SOURCE_FOLDER = r"D:\Projects\RF_DETR_Wetland\rf_detr\videos"
ANNOTATIONS_PATH = r"D:\Projects\RF_DETR_Wetland\rf_detr\_annotations.coco.json"
TARGET_FOLDER = r"D:\Projects\RF_DETR_Wetland\evaluation_only"
os.makedirs(TARGET_FOLDER, exist_ok=True)

START_IDX = 0
STOP_IDX = 178

# 1. Initialize Model
model = RFDETRNano(
    patch_size=16, positional_encoding_size=24, resolution=384,
    out_feature_indexes=[3, 6, 9, 12], num_windows=2, dec_layers=2,
    pretrain_weights="rf-detr-nano.pth"
)
try:
    model.optimize_for_inference()
except:
    pass

# 2. Initialize Evaluator (Updated for v0.27.0+)
# We use the full path to the Metric class to avoid the DataClass conflict
map_metric = sv.metrics.MeanAveragePrecision()

# 3. Load Ground Truth Mapping
with open(ANNOTATIONS_PATH, 'r') as f:
    coco_data = json.load(f)

image_id_to_filename = {img['id']: img['file_name'] for img in coco_data['images']}
filename_to_gt = {}
for ann in coco_data['annotations']:
    fname = image_id_to_filename[ann['image_id']]
    if fname not in filename_to_gt: filename_to_gt[fname] = []
    x, y, w, h = ann['bbox']
    filename_to_gt[fname].append([x, y, x + w, y + h])

# 4. Prepare Videos
all_video_files = sorted([f for f in os.listdir(SOURCE_FOLDER) if f.endswith('.mp4')])
video_batch = all_video_files[START_IDX:STOP_IDX]

print(f"üöÄ Starting evaluation on {len(video_batch)} videos...")

# 5. Process Videos
for video_name in video_batch:
    stem = Path(video_name).stem
    source_path = os.path.join(SOURCE_FOLDER, video_name)
    cap = cv2.VideoCapture(source_path)
    frame_idx = 0
    
    print(f"üé¨ Processing: {video_name}")
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success: break
        
        if frame_idx % 5 == 0:
            frame_filename = f"{stem}_{frame_idx:06d}.jpg"
            
            # Predict
            results = model.predict(frame, confidence=0.15)
            bird_preds = results[results.class_id == 16]
            # Force class 0 for agnostic evaluation
            bird_preds.class_id = np.zeros_like(bird_preds.class_id)
            
            # Get GT
            gt_boxes = filename_to_gt.get(frame_filename, [])
            gt_detections = sv.Detections(
                xyxy=np.array(gt_boxes),
                class_id=np.zeros(len(gt_boxes), dtype=int)
            ) if gt_boxes else sv.Detections.empty()

            # Update evaluator incrementally
            map_metric.update(bird_preds, gt_detections)
            
        frame_idx += 1
    cap.release()

# 6. Compute & Save
print("\nüìä Generating Full Dataset Report...")
result = map_metric.compute()

print("-" * 30)
print(f"mAP @ 50:95: {result.map50_95:.4f}")
print(f"mAP @ 50:    {result.map50:.4f}")
print(f"mAP @ 75:    {result.map75:.4f}")
print("-" * 30)

# 7. Save to CSV
# result.to_pandas() creates a detailed dataframe with all thresholds
report_df = result.to_pandas()
csv_path = os.path.join(TARGET_FOLDER, "evaluation_report.csv")
report_df.to_csv(csv_path, index=False)

print(f"‚úÖ Results saved to {csv_path}")

In [None]:
import cv2
import supervision as sv
from rfdetr import RFDETRMedium  # Changed from RFDETRNano
import os
import json
import pandas as pd
from pathlib import Path
import numpy as np
# import time

# --- CONFIG ---
SOURCE_FOLDER = r"D:\Projects\RF_DETR_Wetland\rf_detr\videos"
ANNOTATIONS_PATH = r"D:\Projects\RF_DETR_Wetland\rf_detr\_annotations.coco.json"
TARGET_FOLDER = r"D:\Projects\RF_DETR_Wetland\evaluation_only"
os.makedirs(TARGET_FOLDER, exist_ok=True)

START_IDX = 0
STOP_IDX = 178

# 1. Initialize Model (Updated for Medium)
model = RFDETRMedium(
    patch_size=16, resolution=416,
    pretrain_weights="rf-detr-medium.pth"
)

# Optional: Optimize for inference (Recommended for Medium as it is heavier than Nano)
try:
    model.optimize_for_inference()
except Exception as e:
    print(f"‚ö†Ô∏è Optimization skipped: {e}")

# 2. Initialize Evaluator
map_metric = sv.metrics.MeanAveragePrecision()

# 3. Load Ground Truth Mapping
with open(ANNOTATIONS_PATH, 'r') as f:
    coco_data = json.load(f)

image_id_to_filename = {img['id']: img['file_name'] for img in coco_data['images']}
filename_to_gt = {}
for ann in coco_data['annotations']:
    fname = image_id_to_filename[ann['image_id']]
    if fname not in filename_to_gt: filename_to_gt[fname] = []
    x, y, w, h = ann['bbox']
    filename_to_gt[fname].append([x, y, x + w, y + h])

# 4. Prepare Videos
all_video_files = sorted([f for f in os.listdir(SOURCE_FOLDER) if f.endswith('.mp4')])
video_batch = all_video_files[START_IDX:STOP_IDX]

print(f"üöÄ Starting evaluation on {len(video_batch)} videos using RF-DETR-Medium...")

# 5. Process Videos
for video_name in video_batch:
    stem = Path(video_name).stem
    source_path = os.path.join(SOURCE_FOLDER, video_name)
    cap = cv2.VideoCapture(source_path)
    frame_idx = 0
    
    print(f"üé¨ Processing: {video_name}")
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success: break
        
        if frame_idx % 5 == 0:
            frame_filename = f"{stem}_{frame_idx:06d}.jpg"
            
            # Predict
            # start_time = time.time()
            results = model.predict(frame, confidence=0.15)
            # inference_time = time.time() - start_time
            # Filter for bird class (ID 16 in COCO)
            bird_preds = results[results.class_id == 16]
            # Force class 0 for agnostic evaluation
            bird_preds.class_id = np.zeros_like(bird_preds.class_id)
            
            # print(f"Frame: {frame_idx} | Time: {inference_time:.4f}s ({1/inference_time:.1f} FPS) | Birds: {len(bird_preds)}")

            # Get GT
            gt_boxes = filename_to_gt.get(frame_filename, [])
            gt_detections = sv.Detections(
                xyxy=np.array(gt_boxes),
                class_id=np.zeros(len(gt_boxes), dtype=int)
            ) if gt_boxes else sv.Detections.empty()

            # Update evaluator incrementally
            map_metric.update(bird_preds, gt_detections)
            
        frame_idx += 1
    cap.release()

# 6. Compute & Save
print("\nüìä Generating Full Dataset Report...")
result = map_metric.compute()

print("-" * 30)
print(f"mAP @ 50:95: {result.map50_95:.4f}")
print(f"mAP @ 50:    {result.map50:.4f}")
print(f"mAP @ 75:    {result.map75:.4f}")
print("-" * 30)

# 7. Save to CSV
report_df = result.to_pandas()
csv_path = os.path.join(TARGET_FOLDER, "evaluation_report_medium_10.csv")
report_df.to_csv(csv_path, index=False)

print(f"‚úÖ Results saved to {csv_path}")

In [None]:
# from rfdetr.main import download_pretrain_weights

# try:
#     print("Attempting to force-download Base weights...")
#     download_pretrain_weights("rf-detr-base.pth", redownload=True)
#     print("Download successful!")
# except Exception as e:
#     print(f"Download failed: {e}")

In [None]:
#test on base model 

import cv2
import supervision as sv
from rfdetr import RFDETRBase # Changed from RFDETRBase
import os
import json
import pandas as pd
from pathlib import Path
import numpy as np
# import time

# --- CONFIG ---
SOURCE_FOLDER = r"D:\Projects\RF_DETR_Wetland\rf_detr\videos"
ANNOTATIONS_PATH = r"D:\Projects\RF_DETR_Wetland\rf_detr\_annotations.coco.json"
TARGET_FOLDER = r"D:\Projects\RF_DETR_Wetland\evaluation_only"
os.makedirs(TARGET_FOLDER, exist_ok=True)

START_IDX = 0
STOP_IDX = 50

# 1. Initialize Model (Updated for Medium)
model = RFDETRBase(
    pretrain_weights="rf-detr-base.pth"
)

# Optional: Optimize for inference (Recommended for Medium as it is heavier than Nano)
try:
    model.optimize_for_inference()
except Exception as e:
    print(f"‚ö†Ô∏è Optimization skipped: {e}")

# 2. Initialize Evaluator
map_metric = sv.metrics.MeanAveragePrecision()

# 3. Load Ground Truth Mapping
with open(ANNOTATIONS_PATH, 'r') as f:
    coco_data = json.load(f)

image_id_to_filename = {img['id']: img['file_name'] for img in coco_data['images']}
filename_to_gt = {}
for ann in coco_data['annotations']:
    fname = image_id_to_filename[ann['image_id']]
    if fname not in filename_to_gt: filename_to_gt[fname] = []
    x, y, w, h = ann['bbox']
    filename_to_gt[fname].append([x, y, x + w, y + h])

# 4. Prepare Videos
all_video_files = sorted([f for f in os.listdir(SOURCE_FOLDER) if f.endswith('.mp4')])
video_batch = all_video_files[START_IDX:STOP_IDX]

print(f"üöÄ Starting evaluation on {len(video_batch)} videos using RF-DETR-Base...")

# 5. Process Videos
for video_name in video_batch:
    stem = Path(video_name).stem
    source_path = os.path.join(SOURCE_FOLDER, video_name)
    cap = cv2.VideoCapture(source_path)
    frame_idx = 0
    
    print(f"üé¨ Processing: {video_name}")
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success: break
        
        if frame_idx % 5 == 0:
            frame_filename = f"{stem}_{frame_idx:06d}.jpg"
            
            # Predict
            # start_time = time.time()
            results = model.predict(frame, confidence=0.15)
            # inference_time = time.time() - start_time
            # Filter for bird class (ID 16 in COCO)
            bird_preds = results[results.class_id == 16]
            # Force class 0 for agnostic evaluation
            bird_preds.class_id = np.zeros_like(bird_preds.class_id)
            
            # print(f"Frame: {frame_idx} | Time: {inference_time:.4f}s ({1/inference_time:.1f} FPS) | Birds: {len(bird_preds)}")

            # Get GT
            gt_boxes = filename_to_gt.get(frame_filename, [])
            gt_detections = sv.Detections(
                xyxy=np.array(gt_boxes),
                class_id=np.zeros(len(gt_boxes), dtype=int)
            ) if gt_boxes else sv.Detections.empty()

            # Update evaluator incrementally
            map_metric.update(bird_preds, gt_detections)
            
        frame_idx += 1
    cap.release()

# 6. Compute & Save
print("\nüìä Generating Full Dataset Report...")
result = map_metric.compute()

print("-" * 30)
print(f"mAP @ 50:95: {result.map50_95:.4f}")
print(f"mAP @ 50:    {result.map50:.4f}")
print(f"mAP @ 75:    {result.map75:.4f}")
print("-" * 30)

# 7. Save to CSV
report_df = result.to_pandas()
csv_path = os.path.join(TARGET_FOLDER, "evaluation_report_base_full.csv")
report_df.to_csv(csv_path, index=False)

print(f"‚úÖ Results saved to {csv_path}")

[2026-02-17 19:01:04] [INFO] rf-detr - Loading pretrain weights


`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


üöÄ Starting evaluation on 50 videos using RF-DETR-Base...
üé¨ Processing: 001-white_wagtail.mp4
üé¨ Processing: 002-squacco_heron.mp4
üé¨ Processing: 003-squacco_heron.mp4
üé¨ Processing: 004-squacco_heron.mp4
üé¨ Processing: 005-squacco_heron.mp4
üé¨ Processing: 006-yellow_legged_gull.mp4
üé¨ Processing: 007-yellow_legged_gull.mp4
üé¨ Processing: 008-yellow_legged_gull.mp4
üé¨ Processing: 009-yellow_legged_gull.mp4
üé¨ Processing: 010-yellow_legged_gull.mp4
üé¨ Processing: 011-yellow_legged_gull.mp4
