In [1]:
!pip install -U sahi ultralytics huggingface_hub --quiet

[33mDEPRECATION: pytorch-lightning 1.5.4 has a non-standard dependency specifier torch>=1.7.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m

In [3]:
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.utils.huggingface as fouh
from fiftyone import ViewField as F

In [4]:
dataset = fouh.load_from_hub("jamarks/VisDrone2019-DET", name="sahi-test", max_samples=100, overwrite=True)

Downloading config file fiftyone.yml from jamarks/VisDrone2019-DET


fiftyone.yml:   0%|          | 0.00/113 [00:00<?, ?B/s]

Loading dataset
Importing samples...
 100% |█████████████████| 100/100 [33.5ms elapsed, 0s remaining, 3.0K samples/s]      
Downloading 100 media files...


In [5]:
session = fo.launch_app(dataset)

Connected to FiftyOne on port 5151 at localhost.
If you are not connecting to a remote session, you may need to start a new session and specify a port


In [6]:
from ultralytics import YOLO

ckpt_path = "yolov8l.pt"
model = YOLO(ckpt_path)
## fiftyone will work directly with the Ultralytics.YOLO model

dataset.apply_model(model, label_field="base_model")

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l.pt to 'yolov8l.pt'...


100%|██████████| 83.7M/83.7M [00:03<00:00, 26.1MB/s]


 100% |█████████████████| 100/100 [7.3s elapsed, 0s remaining, 15.7 samples/s]      


In [7]:
session = fo.launch_app(dataset)

In [8]:
mapping = {"pedestrians": "person", "people": "person", "van": "car"}
mapped_view = dataset.map_labels("ground_truth", mapping)

In [9]:
def get_label_fields(sample_collection):
    """Get the (detection) label fields of a Dataset or DatasetView."""
    label_fields = list(
        sample_collection.get_field_schema(embedded_doc_type=fo.Detections).keys()
    )
    return label_fields

def filter_all_labels(sample_collection):
    label_fields = get_label_fields(sample_collection)

    filtered_view = sample_collection

    for lf in label_fields:
        filtered_view = filtered_view.filter_labels(
            lf, F("label").is_in(["person", "car", "truck"]), only_matches=False
        )
    return filtered_view

In [10]:
filtered_view = filter_all_labels(mapped_view)

In [11]:
session.view = filtered_view.view()

In [12]:
from sahi import AutoDetectionModel
from sahi.predict import get_prediction, get_sliced_prediction

In [13]:
detection_model = AutoDetectionModel.from_pretrained(
    model_type='yolov8',
    model_path=ckpt_path,
    confidence_threshold=0.25, ## same as the default value for our base model
    image_size=640,
    device="cpu", # or 'cuda'
)

In [14]:
result = get_prediction(dataset.first().filepath, detection_model, verbose=0)
print(result)

<sahi.prediction.PredictionResult object at 0x741938980eb0>


In [15]:
print(result.to_fiftyone_detections())

[<Detection: {
    'id': '663c3d12ebcc6fc2030a2250',
    'attributes': {},
    'tags': [],
    'label': 'car',
    'bounding_box': [
        0.6646394729614258,
        0.7850866247106482,
        0.06464214324951172,
        0.09088355170355902,
    ],
    'mask': None,
    'confidence': 0.8933133482933044,
    'index': None,
}>, <Detection: {
    'id': '663c3d12ebcc6fc2030a2251',
    'attributes': {},
    'tags': [],
    'label': 'car',
    'bounding_box': [
        0.6196376800537109,
        0.7399617513020833,
        0.06670347849527995,
        0.09494832356770834,
    ],
    'mask': None,
    'confidence': 0.8731603622436523,
    'index': None,
}>, <Detection: {
    'id': '663c3d12ebcc6fc2030a2252',
    'attributes': {},
    'tags': [],
    'label': 'car',
    'bounding_box': [
        0.5853352228800456,
        0.7193766276041667,
        0.06686935424804688,
        0.07682359483506944,
    ],
    'mask': None,
    'confidence': 0.859582781791687,
    'index': None,
}>, <Det

In [16]:
sliced_result = get_sliced_prediction(
    dataset.skip(40).first().filepath,
    detection_model,
    slice_height = 320,
    slice_width = 320,
    overlap_height_ratio = 0.2,
    overlap_width_ratio = 0.2,
)

Performing prediction on 8 number of slices.


In [17]:
num_sliced_dets = len(sliced_result.to_fiftyone_detections())
num_orig_dets = len(result.to_fiftyone_detections())

print(f"Detections predicted without slicing: {num_orig_dets}")
print(f"Detections predicted with slicing: {num_sliced_dets}")

Detections predicted without slicing: 26
Detections predicted with slicing: 73


In [18]:
def predict_with_slicing(sample, label_field, **kwargs):
    result = get_sliced_prediction(
        sample.filepath, detection_model, verbose=0, **kwargs
    )
    sample[label_field] = fo.Detections(detections=result.to_fiftyone_detections())

In [19]:
kwargs = {"overlap_height_ratio": 0.2, "overlap_width_ratio": 0.2}

for sample in dataset.iter_samples(progress=True, autosave=True):
    predict_with_slicing(sample, label_field="small_slices", slice_height=320, slice_width=320, **kwargs)
    predict_with_slicing(sample, label_field="large_slices", slice_height=480, slice_width=480, **kwargs)

   1% |-----------------|   1/100 [2.1m elapsed, 3.4h remaining, 0.0 samples/s] 


05/08/2024 23:06:42 - INFO - eta.core.utils -      1% |-----------------|   1/100 [2.1m elapsed, 3.4h remaining, 0.0 samples/s] 


KeyboardInterrupt: 

In [None]:
filtered_view = filter_all_labels(mapped_view)

In [None]:
session = fo.launch_app(filtered_view, auto=False)

In [None]:
base_results = filtered_view.evaluate_detections("base_model", gt_field="ground_truth", eval_key="eval_base_model")
large_slice_results = filtered_view.evaluate_detections("large_slices", gt_field="ground_truth", eval_key="eval_large_slices")
small_slice_results = filtered_view.evaluate_detections("small_slices", gt_field="ground_truth", eval_key="eval_small_slices")

In [None]:
print("Base model results:")
base_results.print_report()

print("-" * 50)
print("Large slice results:")
large_slice_results.print_report()

print("-" * 50)
print("Small slice results:")
small_slice_results.print_report()

In [None]:
## Filtering for only small boxes

box_width, box_height = F("bounding_box")[2], F("bounding_box")[3]
rel_bbox_area = box_width * box_height

im_width, im_height = F("$metadata.width"), F("$metadata.height")
abs_area = rel_bbox_area * im_width * im_height

small_boxes_view = filtered_view
for lf in get_label_fields(filtered_view):
    small_boxes_view = small_boxes_view.filter_labels(lf, abs_area < 32**2, only_matches=False)

In [None]:
session.view = small_boxes_view.view()

In [None]:
small_boxes_base_results = small_boxes_view.evaluate_detections("base_model", gt_field="ground_truth", eval_key="eval_small_boxes_base_model")
small_boxes_large_slice_results = small_boxes_view.evaluate_detections("large_slices", gt_field="ground_truth", eval_key="eval_small_boxes_large_slices")
small_boxes_small_slice_results = small_boxes_view.evaluate_detections("small_slices", gt_field="ground_truth", eval_key="eval_small_boxes_small_slices")