In [1]:
#Import required libraries and functions
from sahi import AutoDetectionModel
from sahi.utils.cv import read_image, read_image_as_pil
from sahi.utils.file import Path, increment_path, list_files, save_json, save_pickle, download_from_url
from sahi.predict import get_prediction, get_sliced_prediction, predict, agg_prediction, get_prediction_batched, get_sliced_prediction_batched, predict 
from sahi.prediction import visualize_object_predictions
from sahi.postprocess.utils import ObjectPredictionList, has_match, merge_object_prediction_pair
from IPython.display import Image
from numpy import asarray
from tqdm import tqdm
from PIL import Image
from sahi.prediction import ObjectPrediction, PredictionResult
from pathlib import Path
from multiprocessing import Pool, cpu_count
from torchvision.ops import nms, batched_nms, clip_boxes_to_image
import cv2
import math
import os
import time
import json
import numpy as np
import time
import torch

In [3]:
#create the model instance
#pretrained_yolo_model_path = 'models/yolov8/last.pt'
#yolov8_model_path_baseline = 'models/baseline/yolov8n.pt'
#fined_tuned_yolov8_model_path = 'models/fine-tuned/full/YOLOv8n/last.pt'
fined_tuned__mixed_yolov8_model_path = 'models/fine-tuned/mixed/YOLOv8n/last.pt'
detection_model = AutoDetectionModel.from_pretrained(
        model_type='yolov8',
        model_path=fined_tuned__mixed_yolov8_model_path,
        confidence_threshold=0.25,
        device="cuda:0", # or 'cpu'
)

In [4]:
#functions to set the overlap ratio and slice size as per detected objects count in each slice
def get_slice_parameters(object_density, slice_size):
    
    if object_density >= 50:
        #slice_size = min_dim // 4
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.5
        overlap_height_ratio = 0.5
    elif 25 <= object_density < 50:
        #slice_size = min_dim // 2
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.25
        overlap_height_ratio = 0.25
    elif 10 <= object_density < 25:
        #slice_size = min_dim // 2
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.15
        overlap_height_ratio = 0.15
    else:
        return

    return slice_width, slice_height, overlap_width_ratio, overlap_height_ratio

In [5]:
def merged_preds_to_coco(preds, image_id):
    coco_predictions = []
    for pred in preds:
        try:
            coco = pred.to_coco_prediction(image_id=image_id)
            coco_dict = {
                "image_id": coco.image_id,
                "bbox": coco.bbox,
                "score": coco.score,
                "category_id": coco.category_id,
                "segmentation": coco.segmentation,
                "iscrowd": coco.iscrowd,
                "area": coco.area,
            }
            # Optionally include category_name (not used in COCO eval)
            # coco_dict["category_name"] = coco.category_name

            # Only add if bbox is valid (non-empty)
            if coco_dict["bbox"]:
                coco_predictions.append(coco_dict)
        except Exception as e:
            print(f"Failed to convert to COCO dict: {e}")
    return coco_predictions

In [6]:
def nms_merge(predictions, iou_threshold=0.5):
    if not predictions:
        return []
    boxes = torch.tensor([p.bbox.to_xyxy() for p in predictions])
    scores = torch.tensor([p.score.value for p in predictions])
    keep_indices = nms(boxes, scores, iou_threshold).tolist()
    return [predictions[i] for i in keep_indices]

In [7]:
# Apply Non-Maximum Suppression (NMS)
def apply_nms(detections, iou_threshold=0.5):
    boxes, scores, labels = [], [], []
    for det in detections:
        boxes.append(det.bbox.to_xyxy())
        scores.append(det.score.value)
        labels.append(det.category.id)
    boxes = torch.tensor(boxes, dtype=torch.float32)
    scores = torch.tensor(scores, dtype=torch.float32)
    labels = torch.tensor(labels)

    keep_indices = []
    unique_labels = labels.unique()
    for label in unique_labels:
        idxs = (labels == label).nonzero(as_tuple=False).squeeze(1)
        class_boxes = boxes[idxs]
        class_scores = scores[idxs]
        keep = nms(class_boxes, class_scores, iou_threshold)
        keep_indices.extend(idxs[keep].tolist())

    return [detections[i] for i in keep_indices]

In [8]:
# Function to get image details by image_id
def get_image_id(coco_data, image_name):
    for image in coco_data["images"]:
        file_name = Path(image['file_name']).stem
        if file_name == image_name:
            return image['id']
    return None

In [9]:
# --- add these imports at the top if not present ---
import torch
import torch.nn as nn
import numpy as np
from PIL import Image

# --- VDSR model (standard 20-layer, 64 features) ---
class VDSR(nn.Module):
    def __init__(self, num_channels=1, depth=20, features=64):
        super().__init__()
        layers = [nn.Conv2d(num_channels, features, 3, 1, 1), nn.ReLU(inplace=True)]
        for _ in range(depth - 2):
            layers += [nn.Conv2d(features, features, 3, 1, 1), nn.ReLU(inplace=True)]
        layers += [nn.Conv2d(features, num_channels, 3, 1, 1)]
        self.net = nn.Sequential(*layers)

    @torch.inference_mode()
    def forward(self, x):
        return self.net(x)

def _rgb_to_ycbcr(img):
    img = img.astype(np.float32)
    r,g,b = img[...,0], img[...,1], img[...,2]
    y  =  0.299*r + 0.587*g + 0.114*b
    cb = -0.168736*r - 0.331264*g + 0.5*b + 128.0
    cr =  0.5*r - 0.418688*g - 0.081312*b + 128.0
    return y, cb, cr

def _ycbcr_to_rgb(y, cb, cr):
    y  = y.astype(np.float32)
    cb = cb.astype(np.float32) - 128.0
    cr = cr.astype(np.float32) - 128.0
    r = y + 1.402 * cr
    g = y - 0.344136 * cb - 0.714136 * cr
    b = y + 1.772 * cb
    return np.clip(np.stack([r,g,b], -1), 0, 255).astype(np.uint8)

def _bicubic_resize(arr_u8, scale):
    H, W = arr_u8.shape[:2]
    return np.array(Image.fromarray(arr_u8).resize((W*scale, H*scale), Image.BICUBIC))

@torch.inference_mode()
def apply_vdsr_2x_on_pil(pil_img, vdsr_model, device='cuda'):
    """
    2× VDSR on Y channel; Cb/Cr bicubic. Returns upscaled PIL.Image.
    Assumes vdsr_model expects Y in [0,1] and outputs residual.
    """
    rgb = np.array(pil_img.convert('RGB'))
    y, cb, cr = _rgb_to_ycbcr(rgb)
    y2  = _bicubic_resize(y.astype(np.uint8), 2).astype(np.float32)
    cb2 = _bicubic_resize(cb.astype(np.uint8), 2).astype(np.float32)
    cr2 = _bicubic_resize(cr.astype(np.uint8), 2).astype(np.float32)

    dev = device if (device == 'cuda' and torch.cuda.is_available()) else 'cpu'
    vdsr_model = vdsr_model.to(dev).eval()

    tin = torch.from_numpy(y2/255.0).float().unsqueeze(0).unsqueeze(0).to(dev)
    res = vdsr_model(tin)
    y_sr = (tin + res).clamp(0,1).squeeze().cpu().numpy() * 255.0
    y_sr = np.clip(y_sr, 0, 255).astype(np.uint8)

    out = _ycbcr_to_rgb(y_sr, cb2, cr2)
    return Image.fromarray(out)


In [15]:
def predict_multilevel_fine_sliced_images(
    input_folder,
    dataset_json_path,
    detection_model,
    base_slice_size=512,
    # --- new optional knobs (all default off for backward compatibility) ---
    vdsr_model=None,            # pass a torch.nn.Module with VDSR weights for 2x
    vdsr_device='cuda',
    apply_vdsr_on_fine=True     # set False to disable even if model is provided
):
    print("\n***********************************************")
    print("Adaptive MultiLevel SAHI Prediction")
    print("*************************************************")
    
    name = "exp"
    save_dir = Path(increment_path(Path("sliced_predictions") / name, exist_ok=False))
    os.makedirs(save_dir, exist_ok=True)

    if dataset_json_path:
        with open(dataset_json_path, "r") as file:
            data = json.load(file)
        
    vis_params = {
        "bbox_thickness": 2,
        "text_size": 0.5,
        "text_thickness": 1,
        "hide_labels": False,
        "hide_conf": False,
        "format": "png"
    }

    image_files = [
        f for f in os.listdir(input_folder)
        if f.lower().endswith((".jpg", ".jpeg", ".png"))
    ]

    print(f"\nRunning fine slicing prediction on {len(image_files)} image(s)...")
    total_prediction_time = 0.0
    grand_total_predictions = 0
    all_coco_preds = []

    for filename in image_files:
        image_path = os.path.join(input_folder, filename)
        image_pil = Image.open(image_path).convert("RGB")
        image_np = np.array(image_pil)
        image_h, image_w = image_np.shape[:2]
        filename_wo_ext = Path(filename).stem
        total_prediction_count = 0
        print("*****************************************")
        print("File Name: ", filename_wo_ext)
        print("*****************************************")
        
        img_id = get_image_id(data, filename_wo_ext) if dataset_json_path else None
        all_object_predictions = []

        # Base prediction
        t0 = time.time()
        intial_prediction = get_prediction(image_path, detection_model)
        t_base = (time.time() - t0)
        print(f"Base pred time: {t_base * 1000:.2f} ms")
        total_prediction_time += t_base
        print(f"Total Based pred time: {total_prediction_time * 1000:.2f} ms")
        full_preds = intial_prediction.object_prediction_list
        full_object_density = len(full_preds)
        print("Full Image Object Density:", full_object_density)

        for pred in full_preds:
            all_object_predictions.append(pred)
        total_prediction_count += len(all_object_predictions)
    
        # Split image into 2x2 grid
        grid_h, grid_w = image_h // 2, image_w // 2

        for row in range(2):
            for col in range(2):
                x1, y1 = col * grid_w, row * grid_h
                x2, y2 = min(x1 + grid_w, image_w), min(y1 + grid_h, image_h)
                sub_img = image_pil.crop((x1, y1, x2, y2))
                print("Cropped Image Dimension:", x1, y1, x2, y2)

                time_start = time.time()
                base_pred = get_prediction(sub_img, detection_model)
                time_end = time.time() - time_start
                print("Initial Prediction time is: {:.2f} ms".format(time_end * 1000))

                coarse_preds = base_pred.object_prediction_list
                object_density = len(coarse_preds)
                print("Slice Object Density:", object_density)
                
                slice_params = get_slice_parameters(object_density, base_slice_size)
                iteration_time = time_end
                preds = coarse_preds.copy()
                
                if slice_params:
                    slice_width, slice_height, overlap_w, overlap_h = slice_params
                    print("********* Slice Parameters ***********")
                    print("Slice Width: ", slice_width)
                    print("Slice Height: ", slice_height)
                    print("Overlap Width Ratio: ", overlap_w)
                    print("Overlap Height Ratio: ", overlap_h)

                    # -------- VDSR fine-level hook (2× on sub_img BEFORE fine slicing) --------
                    use_vdsr = (vdsr_model is not None) and apply_vdsr_on_fine
                    if use_vdsr:
                        t_sr0 = time.time()
                        sub_img_sr = apply_vdsr_2x_on_pil(sub_img, vdsr_model, device=vdsr_device)
                        t_sr = time.time() - t_sr0
                        print(f"VDSR(2x) time: {t_sr*1000:.2f} ms on this sub-image")

                        # run fine slicing on the SR image with scaled slice sizes
                        sr_slice_w  = slice_width  * 2
                        sr_slice_h  = slice_height * 2
                        min_area_sr = int(16 * (2**2))  # keep min-area consistent after downscale

                        time_start_slice = time.time()
                        sliced_pred = get_sliced_prediction(
                            sub_img_sr,
                            detection_model,
                            slice_height=sr_slice_h,
                            slice_width=sr_slice_w,
                            overlap_height_ratio=overlap_h,
                            overlap_width_ratio=overlap_w,
                            postprocess_type="NMS",
                            postprocess_match_metric="IOU",
                            postprocess_match_threshold=0.3,
                            postprocess_min_area=min_area_sr,
                            verbose=0
                        )
                        time_end_slice = time.time() - time_start_slice
                        print("Sliced Prediction (VDSR) time is: {:.2f} ms".format(time_end_slice * 1000))

                        # scale predictions back from SR (2×) to sub_img scale
                        fine_preds = sliced_pred.object_prediction_list
                        for p in fine_preds:
                            p.bbox.minx *= 0.5
                            p.bbox.maxx *= 0.5
                            p.bbox.miny *= 0.5
                            p.bbox.maxy *= 0.5
                    else:
                        # regular fine slicing on the original sub_img
                        time_start_slice = time.time()
                        sliced_pred = get_sliced_prediction(
                            sub_img,
                            detection_model,
                            slice_height=slice_height,
                            slice_width=slice_width,
                            overlap_height_ratio=overlap_h,
                            overlap_width_ratio=overlap_w,
                            postprocess_type="NMS",
                            postprocess_match_metric="IOU",
                            postprocess_match_threshold=0.3,
                            postprocess_min_area=16,
                            verbose=0
                        )
                        time_end_slice = time.time() - time_start_slice
                        print("Sliced Prediction time is: {:.2f} ms".format(time_end_slice * 1000))
                        fine_preds = sliced_pred.object_prediction_list
                    # -----------------------------------------------------------------------

                    iteration_time += time_end_slice
                    print("Pre Adaptive Slicing: ", len(preds))
                    preds.extend(fine_preds)
                    print("Post Adaptive Slicing: ", len(preds))
                else:
                    print("Prediction time is: {:.2f} ms".format(time_end * 1000))

                total_prediction_time += iteration_time
                
                # Offset bounding boxes back to original image space
                for pred in preds:
                    pred.bbox.minx += x1
                    pred.bbox.maxx += x1
                    pred.bbox.miny += y1
                    pred.bbox.maxy += y1
                    all_object_predictions.append(pred)
        
                total_prediction_count += len(preds)

        print("________________________________________________")
        print("Total Prediction Count (Crop+Slice): ", len(all_object_predictions))
        
        # Merge all predictions for image
        merged_preds = apply_nms(all_object_predictions, iou_threshold=0.5)
        
        # Convert merged predictions to COCO format
        if dataset_json_path:
            coco_preds = merged_preds_to_coco(merged_preds, img_id)
            all_coco_preds.extend(coco_preds)
            
        # Visualization
        visualize_object_predictions(
            image=np.ascontiguousarray(image_pil),
            object_prediction_list=merged_preds,
            rect_th=vis_params["bbox_thickness"],
            text_size=vis_params["text_size"],
            text_th=vis_params["text_thickness"],
            hide_labels=vis_params["hide_labels"],
            hide_conf=vis_params["hide_conf"],
            output_dir=save_dir,
            file_name=filename_wo_ext,
            export_format=vis_params["format"]
        )

        grand_total_predictions += total_prediction_count

    if dataset_json_path:
        save_path = str(save_dir / "result.json")
        save_json(all_coco_preds, save_path)
   
    print(f"\nCompleted {len(image_files)} images.")
    print("Total Prediction Count (Before NMS Merge): ", grand_total_predictions)
    print("Total Prediction Count (After Final NMS Merge): ", len(merged_preds))
    print("Total Prediction time for all images is: {:.2f} ms".format(total_prediction_time * 1000))
    print(f"Prediction results are successfully exported to {save_dir}")


#### **Single Image**

In [12]:
# 1) Load your pretrained VDSR x2 checkpoint
vdsr = VDSR()
state = torch.load("./VDSR-PyTorch/weights/vdsr-TB291-fef487db.pth.tar", map_location="cpu")
sd = state.get("state_dict", state)
sd = {k.replace("module.", ""): v for k, v in sd.items()}
vdsr.load_state_dict(sd, strict=False)

source_folder = './single_test/images'
json_path = "./subset_visdrone_test_990.json"
slice_size = 256

# 2) Call with SR enabled on fine slices
predict_multilevel_fine_sliced_images(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    vdsr_model=vdsr,      # enables SR
    #sr_scale=2,           # keep 2× for VDSR
    vdsr_device='cuda'    # or 'cpu'
)


***********************************************
Adaptive MultiLevel SAHI Prediction
*************************************************

Running fine slicing prediction on 1 image(s)...
*****************************************
File Name:  0000006_06773_d_0000018
*****************************************


  return F.conv2d(input, weight, bias, self.stride,


Base pred time: 593.40 ms
Total Based pred time: 593.40 ms
Full Image Object Density: 47
Cropped Image Dimension: 0 0 680 382
Initial Prediction time is: 9.50 ms
Slice Object Density: 20
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
VDSR(2x) time: 86.89 ms on this sub-image
Sliced Boxes Count: 6
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 42
Final Bounding Box Count (NMS):  13
Final Bounding Box Count (NMS):  1
Final Bounding Box Count (NMS):  5
Final Bounding Box Count (NMS):  2
Final Bounding Box Count (NMS):  3
Sliced Prediction (VDSR) time is: 111.61 ms
Pre Adaptive Slicing:  20
Post Adaptive Slicing:  44
Cropped Image Dimension: 680 0 1360 382
Initial Prediction time is: 7.76 ms
Slice Object Density: 7
Prediction time is: 7.76 ms
Cropped Image Dimension: 0 382 680 764
Initial Prediction time is: 7.60 ms
Slice Object Density: 11
********* Slice Parameters ***********
Slice W

In [13]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_990.json' --result_json_path './sliced_predictions/exp1072/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.449
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.864
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.486
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.832
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.851
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.432
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.495
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [14]:
!python ../GOIS/scripts/evaluate_prediction.py --ground_truth_path "../sahi/subset_visdrone_test_990.json" --predictions_path "./sliced_predictions/exp1072/result.json" --iou_type bbox

Evaluating predictions...
Ground Truth: ../sahi/subset_visdrone_test_990.json
Predictions: ./sliced_predictions/exp1072/result.json
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.449
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.864
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.486
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.432
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.495
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.361
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.041
 Average Reca

#### **Subset of 15 images**

In [16]:
# Load VDSR x2 weights (e.g., from Lornatang or your own)  --> mixed yolov8
vdsr = VDSR()
state = torch.load("./VDSR-PyTorch/weights/vdsr-TB291-fef487db.pth.tar", map_location='cpu')
sd = state.get('state_dict', state)
sd = {k.replace('module.', ''): v for k, v in sd.items()}
vdsr.load_state_dict(sd, strict=False)

source_folder = './test_subsets/v5/images'
json_path = "./subset_visdrone_test_data_15_v5.json"
slice_size = 256

# Call your function (unchanged calls still work; these args are optional)
predict_multilevel_fine_sliced_images(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    vdsr_model=vdsr,          # <— enable VDSR on fine level
    vdsr_device='cuda',       # or 'cpu'
    apply_vdsr_on_fine=True
)



***********************************************
Adaptive MultiLevel SAHI Prediction
*************************************************

Running fine slicing prediction on 15 image(s)...
*****************************************
File Name:  9999963_00000_d_0000057
*****************************************
Base pred time: 94.44 ms
Total Based pred time: 94.44 ms
Full Image Object Density: 35
Cropped Image Dimension: 0 0 700 525
Initial Prediction time is: 9.19 ms
Slice Object Density: 2
Prediction time is: 9.19 ms
Cropped Image Dimension: 700 0 1400 525
Initial Prediction time is: 8.90 ms
Slice Object Density: 22
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
VDSR(2x) time: 119.00 ms on this sub-image
Sliced Boxes Count: 12
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 58
Final Bounding Box Count (NMS):  21
Final Bounding Box Count (NMS):  1
Final Bounding Box Count (NMS):  1
Sliced 

In [17]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_15_v5.json' --result_json_path './sliced_predictions/exp1073/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.23s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.175
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.311
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.157
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.203
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.398
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.644
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.103
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.218
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [10]:
# Load VDSR x2 weights (e.g., from Lornatang or your own)
vdsr = VDSR()
state = torch.load("./VDSR-PyTorch/weights/vdsr-TB291-fef487db.pth.tar", map_location='cpu')
sd = state.get('state_dict', state)
sd = {k.replace('module.', ''): v for k, v in sd.items()}
vdsr.load_state_dict(sd, strict=False)

source_folder = './test_subsets/v5/images'
json_path = "./subset_visdrone_test_data_15_v5.json"
slice_size = 256

# Call your function (unchanged calls still work; these args are optional)
predict_multilevel_fine_sliced_images(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    vdsr_model=vdsr,          # <— enable VDSR on fine level
    vdsr_device='cuda',       # or 'cpu'
    apply_vdsr_on_fine=True
)



***********************************************
Adaptive MultiLevel SAHI Prediction
*************************************************

Running fine slicing prediction on 15 image(s)...
*****************************************
File Name:  9999963_00000_d_0000057
*****************************************


  return F.conv2d(input, weight, bias, self.stride,


Base pred time: 548.17 ms
Total Based pred time: 548.17 ms
Full Image Object Density: 27
Cropped Image Dimension: 0 0 700 525
Initial Prediction time is: 9.45 ms
Slice Object Density: 2
Prediction time is: 9.45 ms
Cropped Image Dimension: 700 0 1400 525
Initial Prediction time is: 9.03 ms
Slice Object Density: 21
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
VDSR(2x) time: 133.53 ms on this sub-image
Sliced Boxes Count: 12
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 54
Final Bounding Box Count (NMS):  19
Final Bounding Box Count (NMS):  1
Final Bounding Box Count (NMS):  1
Sliced Prediction (VDSR) time is: 204.12 ms
Pre Adaptive Slicing:  21
Post Adaptive Slicing:  42
Cropped Image Dimension: 0 525 700 1050
Initial Prediction time is: 8.75 ms
Slice Object Density: 4
Prediction time is: 8.75 ms
Cropped Image Dimension: 700 525 1400 1050
Initial Prediction time is: 8.72 ms
Slice O

In [11]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_15_v5.json' --result_json_path './sliced_predictions/exp1058/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.21s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.169
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.287
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.152
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.177
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.342
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.642
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.094
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.206
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [None]:
# Load VDSR x2 weights (e.g., from Lornatang or your own)
vdsr = VDSR()
state = torch.load("./VDSR-PyTorch/weights/vdsr-TB291-fef487db.pth.tar", map_location='cpu')
sd = state.get('state_dict', state)
sd = {k.replace('module.', ''): v for k, v in sd.items()}
vdsr.load_state_dict(sd, strict=False)

source_folder = './test_subsets/v5/images'
json_path = "./subset_visdrone_test_data_15_v5.json"
slice_size = 256

# Call your function (unchanged calls still work; these args are optional)
predict_multilevel_fine_sliced_images(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    vdsr_model=vdsr,          # <— enable VDSR on fine level
    vdsr_device='cuda',       # or 'cpu'
    apply_vdsr_on_fine=True
)


In [22]:
# Example setup
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './test_subsets/v5/images'
json_path = "./subset_visdrone_test_data_15_v5.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)


# Call your function
predict_multilevel_sliced_images_full_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 15 image(s)...
*****************************************
File Name 9999963_00000_d_0000057
(sx, sy):  (2.0, 2.0)
Base pred time: 954.08 ms
Total Based pred time: 954.08 ms
Full Image Object Density: 36
Cropped Image: 0 0 700 525
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 255.32 ms
Object Density: 6
Prediction time is: 255.32 ms
Cropped Image: 700 0 1400 525
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 253.85 ms
Object Density: 21
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
Sliced Boxes Count: 12
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 66
Final Bounding Box Count (NMS):  27
Final Bounding Box Count (NMS):  2
Final Bounding Box Count (NMS):  1
Final Bounding Box Count (NMS):  2
Sliced (SR) Prediction time is: 207.80 ms
Pre Slicing:  21
Post Slicing:  53
Cropped Image: 0 525 700 1050
(sx, sy):  (2.0, 2.0)
Initial (SR) 

In [23]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_15_v5.json' --result_json_path './sliced_predictions/exp1019/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.25s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.140
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.232
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.148
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.158
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.262
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.529
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.087
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.169
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [11]:
# Example setup
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './test_subsets/v5/images'
json_path = "./subset_visdrone_test_data_15_v5.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function
predict_multilevel_sliced_images_upscaled_modified(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 15 image(s)...
*****************************************
File Name 9999963_00000_d_0000057


  return F.conv2d(input, weight, bias, self.stride,


Base pred time: 591.96 ms
Total Based pred time: 591.96 ms
Full Image Object Density: 33
Cropped Image: 0 0 700 525
Initial Prediction (no SR) time is: 9.71 ms
Object Density: 3
Prediction time is: 9.71 ms
Cropped Image: 700 0 1400 525
Initial Prediction (no SR) time is: 9.26 ms
Object Density: 20
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
Sliced Prediction time is: 349.79 ms
Pre Slicing:  20
Post Slicing:  66
Cropped Image: 0 525 700 1050
Initial Prediction (no SR) time is: 8.99 ms
Object Density: 7
Prediction time is: 8.99 ms
Cropped Image: 700 525 1400 1050
Initial Prediction (no SR) time is: 8.70 ms
Object Density: 2
Prediction time is: 8.70 ms
________________________________________________
*****************************************
File Name 9999963_00000_d_0000006
Base pred time: 31.01 ms
Total Based pred time: 1009.43 ms
Full Image Object Density: 36
Cropped Image: 0 0 700 525
Initial Predic

In [12]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_15_v5.json' --result_json_path './sliced_predictions/exp1014/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.24s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.163
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.257
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.179
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.151
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.322
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.367
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.085
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.205
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [13]:
# Example setup
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './test_subsets/v5/images'
json_path = "./subset_visdrone_test_data_15_v5.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function
predict_multilevel_sliced_images_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 15 image(s)...
*****************************************
File Name 9999963_00000_d_0000057
Base pred time: 30.25 ms
Total Based pred time: 30.25 ms
Full Image Object Density: 33
Cropped Image: 0 0 700 525
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 244.38 ms
Object Density: 6
Prediction time is: 244.38 ms
Cropped Image: 700 0 1400 525
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 244.74 ms
Object Density: 21
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
Sliced Boxes Count: 12
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 66
Final Bounding Box Count (NMS):  27
Final Bounding Box Count (NMS):  2
Final Bounding Box Count (NMS):  1
Final Bounding Box Count (NMS):  2
Sliced (SR) Prediction time is: 209.10 ms
Pre Slicing:  21
Post Slicing:  53
Cropped Image: 0 525 700 1050
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 245.

In [14]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_15_v5.json' --result_json_path './sliced_predictions/exp1015/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.24s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.142
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.234
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.149
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.160
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.253
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.593
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.087
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.165
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [15]:
# Example setup
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './custom_utils/testdev_subsets/high/v1/images'
json_path = "./custom_utils/annotations/subset_visdrone_test_data_50_high_v1.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function
predict_multilevel_sliced_images_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 50 image(s)...
*****************************************
File Name 9999938_00000_d_0000372
Base pred time: 27.70 ms
Total Based pred time: 27.70 ms
Full Image Object Density: 35
Cropped Image: 0 0 700 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 234.45 ms
Object Density: 28
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
Sliced Boxes Count: 8
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 99
Final Bounding Box Count (NMS):  1
Final Bounding Box Count (NMS):  24
Final Bounding Box Count (NMS):  11
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  8
Sliced (SR) Prediction time is: 149.59 ms
Pre Slicing:  28
Post Slicing:  75
Cropped Image: 700 0 1400 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 230.87 ms
Object Density: 9
Prediction time is: 230.87 ms
Cropped Image: 0 394 700 788
(sx, sy):  (2.0, 2.0)
Ini

In [16]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
3!sahi coco evaluate --dataset_json_path './custom_utils/annotations/subset_visdrone_test_data_50_high_v1.json' --result_json_path './sliced_predictions/exp1016/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.89s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.149
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.265
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.152
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.190
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.415
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.591
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.097
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.221
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [26]:
# Example setup
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './custom_utils/testdev_subsets/high/v1/images'
json_path = "./custom_utils/annotations/subset_visdrone_test_data_50_high_v1.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function
predict_multilevel_sliced_images_full_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 50 image(s)...
*****************************************
File Name 9999938_00000_d_0000372
(sx, sy):  (2.0, 2.0)
Base pred time: 788.08 ms
Total Based pred time: 788.08 ms
Full Image Object Density: 34
Cropped Image: 0 0 700 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 243.56 ms
Object Density: 28
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
Sliced Boxes Count: 8
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 99
Final Bounding Box Count (NMS):  1
Final Bounding Box Count (NMS):  24
Final Bounding Box Count (NMS):  11
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  8
Sliced (SR) Prediction time is: 148.25 ms
Pre Slicing:  28
Post Slicing:  75
Cropped Image: 700 0 1400 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 234.41 ms
Object Density: 9
Prediction time is: 234.41 ms
Cropped Image: 0 394 700 788
(

In [28]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './custom_utils/annotations/subset_visdrone_test_data_50_high_v1.json' --result_json_path './sliced_predictions/exp1021/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.90s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.142
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.251
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.142
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.178
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.350
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.564
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.087
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.209
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

#### **Subset of 50 images**

**1. Full-Coarse-Fine SR**

In [24]:
# Example setup (Yolov8n - Mixed trained)
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './test_subsets/v6/images'
json_path = "./subset_visdrone_test_data_50_v1.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function 
predict_multilevel_sliced_images_full_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 50 image(s)...
*****************************************
File Name 9999938_00000_d_0000283
(sx, sy):  (2.0, 2.0)
Base pred time: 785.13 ms
Total Based pred time: 785.13 ms
Full Image Object Density: 43
Cropped Image: 0 0 700 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 241.90 ms
Object Density: 18
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
Sliced Boxes Count: 8
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 53
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  15
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  4
Sliced (SR) Prediction time is: 141.05 ms
Pre Slicing:  18
Post Slicing:  43
Cropped Image: 700 0 1400 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 235.85 ms
Object Density: 4
Prediction time is: 235.85 ms
Cropped Image: 0 394 700 788
(sx, sy):  (2.0, 2.0)
Initial (SR) Pr

In [25]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_50_v1.json' --result_json_path './sliced_predictions/exp1027/result.json'

loading annotations into memory...
Done (t=0.04s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.56s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.184
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.310
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.193
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.244
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.362
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.538
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.136
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.212
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [32]:
# Example setup (YOLOv8n Full image trained)
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './test_subsets/v6/images'
json_path = "./subset_visdrone_test_data_50_v1.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function 
predict_multilevel_sliced_images_full_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 50 image(s)...
*****************************************
File Name 9999938_00000_d_0000283
(sx, sy):  (2.0, 2.0)
Base pred time: 850.74 ms
Total Based pred time: 850.74 ms
Full Image Object Density: 43
Cropped Image: 0 0 700 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 234.45 ms
Object Density: 18
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
Sliced Boxes Count: 8
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 53
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  15
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  4
Sliced (SR) Prediction time is: 144.22 ms
Pre Slicing:  18
Post Slicing:  43
Cropped Image: 700 0 1400 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 240.91 ms
Object Density: 4
Prediction time is: 240.91 ms
Cropped Image: 0 394 700 788
(sx, sy):  (2.0, 2.0)
Initial (SR) Pr

In [33]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_50_v1.json' --result_json_path './sliced_predictions/exp1029/result.json'

loading annotations into memory...
Done (t=0.05s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.56s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.184
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.310
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.193
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.244
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.362
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.538
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.136
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.212
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [28]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_50_v1.json' --result_json_path './sliced_predictions/exp1028/result.json'

loading annotations into memory...
Done (t=0.04s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.56s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.184
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.310
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.193
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.244
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.362
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.538
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.136
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.212
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

**2. Coarse-Fine SR**

In [17]:
# Example setup
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './test_subsets/v6/images'
json_path = "./subset_visdrone_test_data_50_v1.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function 
predict_multilevel_sliced_images_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 50 image(s)...
*****************************************
File Name 9999938_00000_d_0000283
Base pred time: 26.85 ms
Total Based pred time: 26.85 ms
Full Image Object Density: 35
Cropped Image: 0 0 700 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 233.45 ms
Object Density: 21
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
Sliced Boxes Count: 8
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 54
Final Bounding Box Count (NMS):  2
Final Bounding Box Count (NMS):  16
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  2
Final Bounding Box Count (NMS):  2
Final Bounding Box Count (NMS):  1
Sliced (SR) Prediction time is: 141.25 ms
Pre Slicing:  21
Post Slicing:  47
Cropped Image: 700 0 1400 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 234.67 ms
Object Density: 2
Prediction time is: 234.67 ms
Cropped Image: 0 394

In [18]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_50_v1.json' --result_json_path './sliced_predictions/exp1025/result.json'

loading annotations into memory...
Done (t=0.04s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.57s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.172
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.289
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.178
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.207
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.394
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.503
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.111
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.247
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [20]:
# Example setup
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './test_subsets/v6/images'
json_path = "./subset_visdrone_test_data_50_v1.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function
predict_multilevel_sliced_images_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 50 image(s)...
*****************************************
File Name 9999938_00000_d_0000283
Base pred time: 86.69 ms
Total Based pred time: 86.69 ms
Full Image Object Density: 41
Cropped Image: 0 0 700 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 233.29 ms
Object Density: 18
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
Sliced Boxes Count: 8
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 53
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  15
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  4
Sliced (SR) Prediction time is: 141.55 ms
Pre Slicing:  18
Post Slicing:  43
Cropped Image: 700 0 1400 394
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 231.02 ms
Object Density: 4
Prediction time is: 231.02 ms
Cropped Image: 0 394 700 788
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 232.92

In [21]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_50_v1.json' --result_json_path './sliced_predictions/exp1026/result.json'

loading annotations into memory...
Done (t=0.04s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.56s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.185
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.316
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.195
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.246
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.363
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.558
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.136
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.215
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

#### **Sparse Images**

In [36]:
# Example setup  --> 150 epochs Mixed
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './custom_utils/testdev_subsets/sparse/v1/images'
json_path = "./custom_utils/annotations/subset_visdrone_test_data_50_sparse_v1.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function
predict_multilevel_sliced_images_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 50 image(s)...
*****************************************
File Name 9999973_00000_d_0000054
Base pred time: 105.27 ms
Total Based pred time: 105.27 ms
Full Image Object Density: 12
Cropped Image: 0 0 958 539
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 375.90 ms
Object Density: 5
Prediction time is: 375.90 ms
Cropped Image: 958 0 1916 539
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 380.56 ms
Object Density: 8
Prediction time is: 380.56 ms
Cropped Image: 0 539 958 1078
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 374.06 ms
Object Density: 0
Prediction time is: 374.06 ms
Cropped Image: 958 539 1916 1078
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 374.85 ms
Object Density: 8
Prediction time is: 374.85 ms
________________________________________________
*****************************************
File Name 9999986_00000_d_0000027
Base pred time: 28.93 ms
Total Based pred time: 1639.56 ms
Full Image Object Density: 4

In [37]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './custom_utils/annotations/subset_visdrone_test_data_50_sparse_v1.json' --result_json_path './sliced_predictions/exp1030/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.13s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.239
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.359
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.297
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.128
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.611
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.646
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.076
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.419
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [17]:
# Example setup
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './custom_utils/testdev_subsets/sparse/v1/images'
json_path = "./custom_utils/annotations/subset_visdrone_test_data_50_sparse_v1.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function
predict_multilevel_sliced_images_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 50 image(s)...
*****************************************
File Name 9999973_00000_d_0000054
Base pred time: 43.50 ms
Total Based pred time: 43.50 ms
Full Image Object Density: 14
Cropped Image: 0 0 958 539
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 367.19 ms
Object Density: 6
Prediction time is: 367.19 ms
Cropped Image: 958 0 1916 539
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 368.19 ms
Object Density: 8
Prediction time is: 368.19 ms
Cropped Image: 0 539 958 1078
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 368.09 ms
Object Density: 0
Prediction time is: 368.09 ms
Cropped Image: 958 539 1916 1078
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 366.52 ms
Object Density: 6
Prediction time is: 366.52 ms
________________________________________________
*****************************************
File Name 9999986_00000_d_0000027
Base pred time: 30.53 ms
Total Based pred time: 1544.03 ms
Full Image Object Density: 5
C

In [18]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './custom_utils/annotations/subset_visdrone_test_data_50_sparse_v1.json' --result_json_path './sliced_predictions/exp1017/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.13s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.224
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.357
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.281
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.144
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.618
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.513
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.076
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.421
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [24]:
# Example setup
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './custom_utils/testdev_subsets/sparse/v1/images'
json_path = "./custom_utils/annotations/subset_visdrone_test_data_50_sparse_v1.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function
predict_multilevel_sliced_images_full_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 50 image(s)...
*****************************************
File Name 9999973_00000_d_0000054
(sx, sy):  (2.0, 2.0)
Base pred time: 1340.35 ms
Total Based pred time: 1340.35 ms
Full Image Object Density: 12
Cropped Image: 0 0 958 539
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 390.06 ms
Object Density: 6
Prediction time is: 390.06 ms
Cropped Image: 958 0 1916 539
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 373.97 ms
Object Density: 8
Prediction time is: 373.97 ms
Cropped Image: 0 539 958 1078
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 373.63 ms
Object Density: 0
Prediction time is: 373.63 ms
Cropped Image: 958 539 1916 1078
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 379.93 ms
Object Density: 6
Prediction time is: 379.93 ms
________________________________________________
*****************************************
File Name 9999986_00000_d_0000027
(sx, sy):  (2.0, 2.0)
Base pred time: 946.62 ms
Total Based pre

In [25]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './custom_utils/annotations/subset_visdrone_test_data_50_sparse_v1.json' --result_json_path './sliced_predictions/exp1020/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.13s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.225
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.354
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.284
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.139
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.613
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.519
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.076
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.429
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [17]:
#Using NMS
!python sahi/scripts/coco_base_metrics.py './subset_visdrone_test_990.json' './sliced_predictions/exp590/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
******************************************************************
IoU threshold 0.50: TP=58, FP=44, FN=18
IoU threshold 0.55: TP=58, FP=44, FN=18
IoU threshold 0.60: TP=56, FP=46, FN=20
IoU threshold 0.65: TP=50, FP=52, FN=26
IoU threshold 0.70: TP=44, FP=58, FN=32
IoU threshold 0.75: TP=42, FP=60, FN=34
IoU threshold 0.80: TP=28, FP=74, FN=48
IoU threshold 0.85: TP=14, FP=88, FN=62
IoU threshold 0.90: TP=4, FP=98, FN=72
IoU threshold 0.95: TP=0, FP=102, FN=76
******************************************************************


In [5]:
#Using coarse and fine SR
!python sahi/scripts/coco_base_metrics.py './subset_visdrone_test_990.json' './sliced_predictions/exp1009/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU threshold 0.50: TP=46, FP=44, FN=30
IoU threshold 0.55: TP=46, FP=44, FN=30
IoU threshold 0.60: TP=44, FP=46, FN=32
IoU threshold 0.65: TP=40, FP=50, FN=36
IoU threshold 0.70: TP=38, FP=52, FN=38
IoU threshold 0.75: TP=30, FP=60, FN=46
IoU threshold 0.80: TP=20, FP=70, FN=56
IoU threshold 0.85: TP=8, FP=82, FN=68
IoU threshold 0.90: TP=4, FP=86, FN=72
IoU threshold 0.95: TP=0, FP=90, FN=76


In [6]:
#Using pbde-nms
!python sahi/scripts/coco_base_metrics.py './subset_visdrone_test_990.json' './sliced_predictions/exp1012/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU threshold 0.50: TP=58, FP=44, FN=18
IoU threshold 0.55: TP=58, FP=44, FN=18
IoU threshold 0.60: TP=56, FP=46, FN=20
IoU threshold 0.65: TP=50, FP=52, FN=26
IoU threshold 0.70: TP=44, FP=58, FN=32
IoU threshold 0.75: TP=42, FP=60, FN=34
IoU threshold 0.80: TP=28, FP=74, FN=48
IoU threshold 0.85: TP=14, FP=88, FN=62
IoU threshold 0.90: TP=4, FP=98, FN=72
IoU threshold 0.95: TP=0, FP=102, FN=76


In [19]:
#Using pbde-nms
!python sahi/scripts/coco_evaluation.py --dataset_json_path './subset_visdrone_test_990.json' --result_json_path './sliced_predictions/exp1012/result.json'

******************************************************************
Max Dets:  500
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
******************************************************************
IoU threshold 0.50: TP=58, FP=44, FN=18
IoU threshold 0.55: TP=58, FP=44, FN=18
IoU threshold 0.60: TP=56, FP=46, FN=20
IoU threshold 0.65: TP=50, FP=52, FN=26
IoU threshold 0.70: TP=44, FP=58, FN=32
IoU threshold 0.75: TP=42, FP=60, FN=34
IoU threshold 0.80: TP=28, FP=74, FN=48
IoU threshold 0.85: TP=14, FP=88, FN=62
IoU threshold 0.90: TP=4, FP=98, FN=72
IoU threshold 0.95: TP=0, FP=102, FN=76
******************************************************************
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 

#### **Mix of 500 images**

In [15]:
# Example setup
import os 
import torch
from PIL import Image
import numpy as np
from RealESRGAN_base.RealESRGAN import RealESRGAN

source_folder = './custom_utils/testdev_subsets/mix/v1/images'
json_path = "./custom_utils/annotations/subset_visdrone_test_data_500_mix_v1.json"
slice_size = 256
model_path = './RealESRGAN_base/weights/RealESRGAN_x2.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
realesr = RealESRGAN(device, scale=2)
realesr.load_weights(model_path, download=False)

# Call your function
predict_multilevel_sliced_images_upscaled(
    input_folder=source_folder,
    dataset_json_path=json_path,
    detection_model=detection_model,
    base_slice_size=slice_size,
    sr_model=realesr.predict  # <= will be called on each 2x2 sub-image
)


Running fine slicing prediction on 500 image(s)...
*****************************************
File Name 0000320_00300_d_0000004


  return F.conv2d(input, weight, bias, self.stride,


Base pred time: 582.70 ms
Total Based pred time: 582.70 ms
Full Image Object Density: 42
Cropped Image: 0 0 680 382
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 887.58 ms
Object Density: 24
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
Sliced Boxes Count: 6
POST PROCESS:  NMS
Confidence Score:  0.3
Original Prediction Count 60
Final Bounding Box Count (NMS):  14
Final Bounding Box Count (NMS):  2
Final Bounding Box Count (NMS):  1
Final Bounding Box Count (NMS):  12
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  1
Final Bounding Box Count (NMS):  3
Sliced (SR) Prediction time is: 184.49 ms
Pre Slicing:  24
Post Slicing:  60
Cropped Image: 680 0 1360 382
(sx, sy):  (2.0, 2.0)
Initial (SR) Prediction time is: 232.12 ms
Object Density: 25
********* Slice Parameters ***********
Slice Width:  256
Slice Height:  256
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
Sliced 

In [16]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS) --> min_area - 16
!sahi coco evaluate --dataset_json_path './custom_utils/annotations/subset_visdrone_test_data_500_mix_v1.json' --result_json_path './sliced_predictions/exp1024/result.json'

loading annotations into memory...
Done (t=0.15s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=9.51s).
Accumulating evaluation results...
DONE (t=0.17s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.130
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.224
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.130
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.147
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.308
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.394
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.075
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.181
 Average Precision  (AP) @[ IoU=0.50:0.95 | a