In [1]:
import csv
from pathlib import Path

from ns_vfs.common.utility import save_frames
from ns_vfs.config.loader import load_config
from ns_vfs.data.frame import BenchmarkLTLFrame, FramesofInterest
from ns_vfs.frame_searcher import FrameSearcher
from ns_vfs.model.vision.grounding_dino import GroundingDino
from ns_vfs.processor.benchmark_video_processor import BenchmarkVideoFrameProcessor
from ns_vfs.video_to_automaton import VideotoAutomaton
from common import get_available_benchmark_video
from ns_vfs.model.vision.yolo import Yolo
from common import get_available_benchmark_video, get_precision_recall_f1_score

  from .autonotebook import tqdm as notebook_tqdm


**Global Variable**

In [2]:
config = load_config()
benchmark_frame_video_root_dir = Path(
    "/opt/Neuro-Symbolic-Video-Frame-Search/artifacts/benchmark_frame_video/"
)
benchmark_image_set_dir = [x for x in benchmark_frame_video_root_dir.iterdir() if x.is_dir()]
cv_model_list = ["grounding_dino", "yolo"]

**Local Variable for the experiment**

In [3]:
"""
It will go over all available benchmark video and search for frame of interest for each cv detection model.
+ No manual confidence score
"""
result = {}
for benchmark_name_dir in benchmark_image_set_dir:
    ltl_video_dir_set = [x for x in benchmark_name_dir.iterdir() if x.is_dir()]
    if len(ltl_video_dir_set) > 0:
        print(f"--processing {benchmark_name_dir.name}--")
        print(f"number of ltl rule: {len(ltl_video_dir_set)}")
        result[benchmark_name_dir.name] = {}
        for ltl_video_dir in ltl_video_dir_set:
            result[benchmark_name_dir.name][ltl_video_dir] = {}
            benchmark_video_file_list = get_available_benchmark_video(ltl_video_dir)
            print(f"number of examples of {ltl_video_dir.name}: {len(benchmark_video_file_list)}")

            for benchmark_video_file in benchmark_video_file_list:
                ltl_formula = benchmark_video_file.name.split(".")[0].split("_ltl_")[-1]
                result[benchmark_name_dir.name][ltl_video_dir][ltl_formula] = []
                search_result_per_video = {}

                for cv_model in cv_model_list:
                    if cv_model == "yolo":
                        cv_detection_model = Yolo(config=config.YOLO,
                                                  weight_path=config.YOLO.YOLO_CHECKPOINT_PATH)
                    elif cv_model == "grounding_dino":
                        cv_detection_model = GroundingDino(
                                config=config.GROUNDING_DINO,
                                weight_path=config.GROUNDING_DINO.GROUNDING_DINO_CHECKPOINT_PATH,
                                config_path=config.GROUNDING_DINO.GROUNDING_DINO_CONFIG_PATH,
                            )
                    benchmark_video_processor = BenchmarkVideoFrameProcessor(
                        video_path=benchmark_video_file,
                        artifact_dir=config.VERSION_AND_PATH.ARTIFACTS_PATH)

                    benchmark_video: BenchmarkLTLFrame = benchmark_video_processor.benchmark_image_frames

                    video_automata_builder = VideotoAutomaton(
                        detector=cv_detection_model,
                        video_processor=benchmark_video_processor,
                        artifact_dir=config.VERSION_AND_PATH.ARTIFACTS_PATH,
                        proposition_set=benchmark_video.proposition,
                        save_annotation=False,  # TODO: Debug only
                        save_image=False,  # TODO: Debug only
                        ltl_formula=f"P>=0.80 [{benchmark_video.ltl_formula}]",
                        verbose=False,
                    )
                    frame_sercher = FrameSearcher(
                        video_automata_builder=video_automata_builder,
                        video_processor=benchmark_video_processor,
                    )

                    frame_of_interest = frame_sercher.search()
                    # search_result_per_video 
                    search_result_per_video["benchmark_video"] = benchmark_video
                    search_result_per_video[cv_model] = frame_of_interest

                # classification_metrics
                search_result_per_video = get_precision_recall_f1_score(search_result_per_video)
                result[benchmark_name_dir.name][ltl_video_dir][ltl_formula].append(search_result_per_video)



--processing cifar10--
number of ltl rule: 3
number of examples of prop1Uprop2: 12
final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 5.6ms preprocess, 6.3ms inference, 10.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.7ms
Speed: 3.0ms preprocess, 8.7ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.8ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.8ms preprocess, 6.3ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.3ms
Speed: 3.1ms preprocess, 4.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.7ms preprocess, 4.7ms inference, 5

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.3ms
Speed: 1.8ms preprocess, 5.3ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.6ms preprocess, 4.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 1.8ms preprocess, 6.5ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 7.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.5ms preprocess, 6.4ms inference, 14.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.5ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.3ms
Speed: 1.7ms preprocess, 4.3ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.7ms preprocess, 4.7ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.7ms preprocess, 6.3ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.4ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 13.3ms
Speed: 4.2ms preprocess, 13.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.8ms preprocess, 6.4ms inference, 10.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 1.7ms preprocess, 4.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.5ms preprocess, 4.7ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.0ms preprocess, 6.3ms inference, 18.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.6ms
Speed: 1.7ms preprocess, 5.6ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference,

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 17.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 18.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.0ms preprocess, 6.3ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 4.7ms
Speed: 5.1ms preprocess, 4.7ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.2ms
Speed: 1.7ms preprocess, 5.2ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.2ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 2.1ms preprocess, 6.5ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 3.1ms preprocess, 4.6ms inference, 

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 6.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 1.7ms preprocess, 4.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 3.4ms preprocess, 6.5ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 4.5ms
Speed: 1.6ms preprocess, 4.5ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 1.7ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 10.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 1.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.8ms
Speed: 3.2ms preprocess, 4.8ms inference, 

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 4.5ms
Speed: 1.7ms preprocess, 4.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 1.7ms preprocess, 4.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.3ms preprocess, 6.3ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.3ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 6.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.7ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 5.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 3.0ms preprocess, 4.4ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.7ms preprocess, 4.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.8ms
Speed: 1.7ms preprocess, 5.8ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.2ms
Speed: 3.4ms preprocess, 6.2ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.8ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 1.8ms preprocess, 6.5ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.1ms
Speed: 1.9ms preprocess, 6.1ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.8ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.0ms preprocess, 6.3ms inference, 

number of examples of Fprop1: 12
final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.8ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.0ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 1.8ms preprocess, 4.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.7ms preprocess, 4.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.7ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 3.2ms preprocess, 6.5ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.8ms
Speed: 5.6ms preprocess, 6.8ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 5.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.8ms
Speed: 3.1ms preprocess, 4.8ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.8ms preprocess, 4.6ms inference, 10.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.0ms preprocess, 6.3ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.8ms
Speed: 3.2ms preprocess, 4.8ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.7ms preprocess, 4.5ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.7ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.9ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.3ms
Speed: 1.7ms preprocess, 4.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.8ms
Speed: 1.7ms preprocess, 4.8ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.0ms preprocess, 6.4ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 6.5ms preprocess, 6.3ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 1.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.5ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 8.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.9ms
Speed: 1.6ms preprocess, 4.9ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.6ms preprocess, 4.6ms inference, 0.

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 3.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 1.8ms preprocess, 6.3ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 2.8ms preprocess, 6.5ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.2ms
Speed: 1.7ms preprocess, 5.2ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.7ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.7ms preprocess, 4.7ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 6.8ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.7ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.5ms
Speed: 1.7ms preprocess, 5.5ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.8ms
Speed: 1.7ms preprocess, 5.8ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.8ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.0ms preprocess, 6.3ms inference, 6

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 10.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.3ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.5ms
Speed: 3.0ms preprocess, 5.5ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.6ms
Speed: 1.6ms preprocess, 5.6ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.0ms
Speed: 1.7ms preprocess, 6.0ms inference, 6

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 3.3ms preprocess, 6.3ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.4ms preprocess, 6.3ms inference, 7.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 2.2ms preprocess, 6.5ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.9ms
Speed: 3.0ms preprocess, 4.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 2.1ms preprocess, 4.6ms inference, 0

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.4ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 10.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.3ms
Speed: 1.7ms preprocess, 4.3ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 1.7ms preprocess, 4.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 8

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 6.5ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.0ms
Speed: 2.1ms preprocess, 6.0ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.7ms
Speed: 2.6ms preprocess, 8.7ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.7ms preprocess, 6.4ms inference, 18.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.7ms preprocess, 6.4ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.3ms
Speed: 1.7ms preprocess, 4.3ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.8ms
Speed: 1.6ms preprocess, 4.8ms inference, 0

number of examples of Gprop1: 12
final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.6ms
Speed: 1.7ms preprocess, 6.6ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.7ms preprocess, 6.3ms inference, 10.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 18.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.8ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.2ms
Speed: 2.1ms preprocess, 6.2ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.6ms preprocess, 4.6ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 1.7ms preprocess, 6.3ms inference, 

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.7ms
Speed: 6.0ms preprocess, 6.7ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 18.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.8ms preprocess, 4.6ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.9ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 1.8ms preprocess, 6.6ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.7ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.3ms
Speed: 3.1ms preprocess, 4.3ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 1.7ms preprocess, 4.4ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.8ms preprocess, 4.6ms inference, 4

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.2ms
Speed: 7.7ms preprocess, 6.2ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.7ms
Speed: 2.8ms preprocess, 8.7ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.7ms preprocess, 4.5ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.8ms preprocess, 4.6ms inference, 0

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 8.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.7ms preprocess, 4.5ms inference, 0

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 4.8ms
Speed: 2.0ms preprocess, 4.8ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.8ms preprocess, 4.5ms inference, 5.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.2ms
Speed: 3.0ms preprocess, 6.2ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 3.1ms preprocess, 4.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 2.4ms preprocess, 6.5ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.6ms
Speed: 1.7ms preprocess, 5.6ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 3.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.3ms
Speed: 1.6ms preprocess, 4.3ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.7ms preprocess, 4.7ms inference, 0

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 7.4ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.7ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 3.1ms preprocess, 4.6ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.7ms preprocess, 4.7ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.5ms preprocess, 6.4ms inference, 

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.8ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 2.9ms preprocess, 4.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.7ms preprocess, 4.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.8ms
Speed: 1.7ms preprocess, 5.8ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 4.4ms
Speed: 2.3ms preprocess, 4.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 3.1ms preprocess, 4.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.6ms
Speed: 2.7ms preprocess, 8.6ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.6ms
Speed: 3.7ms preprocess, 8.6ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 4.6ms
Speed: 5.8ms preprocess, 4.6ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.7ms preprocess, 4.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.6ms preprocess, 6.3ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.6ms
Speed: 3.0ms preprocess, 8.6ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11

--processing cifar100--
number of ltl rule: 3
number of examples of prop1Uprop2: 12
final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 6.5ms preprocess, 6.3ms inference, 7.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.8ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.7ms
Speed: 3.1ms preprocess, 8.7ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.7ms preprocess, 4.5ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.0ms
Speed: 1.7ms preprocess, 6.0ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 4.6ms
Speed: 5.8ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.2ms
Speed: 1.8ms preprocess, 5.2ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.4ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.5ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.0ms preprocess, 6.3ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 3.1ms preprocess, 4.7ms inference, 

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.4ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.7ms preprocess, 4.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 1.7ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 1.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 6.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.4ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 5.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.7ms preprocess, 4.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 13.8ms
Speed: 3.4ms preprocess, 13.8ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.5ms preprocess, 6.4ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference,

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.4ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.8ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 9.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.4ms
Speed: 1.8ms preprocess, 5.4ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 7.5ms
Speed: 1.7ms preprocess, 7.5ms inference, 18.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.7ms preprocess, 6.4ms inference, 6.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.5ms preprocess, 6.4ms inference, 18

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 6.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.7ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 5.2ms preprocess, 6.4ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.7ms preprocess, 4.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.6ms preprocess, 6.3ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.8ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.8ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 3.2ms preprocess, 4.7ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.8ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.0ms preprocess, 6.3ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 6.5ms preprocess, 6.4ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.0ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 3.0ms preprocess, 4.5ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.1ms
Speed: 1.8ms preprocess, 5.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.8ms preprocess, 6.3ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.2ms
Speed: 2.3ms preprocess, 6.2ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.9ms
Speed: 1.7ms preprocess, 4.9ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.5ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.8ms preprocess, 4.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.7ms preprocess, 4.7ms inference, 3

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 6.5ms preprocess, 6.3ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.0ms preprocess, 6.4ms inference, 18.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.6ms
Speed: 3.1ms preprocess, 8.6ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 9.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.0ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 8.

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 4.8ms
Speed: 1.8ms preprocess, 4.8ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.6ms preprocess, 4.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 1.8ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 10.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.6ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.5ms preprocess, 6.4ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 8.5ms
Speed: 6.5ms preprocess, 8.5ms inference, 11.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.0ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.7ms
Speed: 3.7ms preprocess, 8.7ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.7ms
Speed: 2.8ms preprocess, 8.7ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.1ms
Speed: 1.9ms preprocess, 5.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 7.1ms
Speed: 1.7ms preprocess, 7.1ms inference, 18.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.7ms preprocess, 6.4ms inference, 

number of examples of Fprop1: 12
final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.8ms preprocess, 6.4ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.4ms preprocess, 6.4ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 8

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 6.5ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.6ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 5.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.8ms
Speed: 1.7ms preprocess, 4.8ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.5ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.6ms preprocess, 6.3ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.9ms preprocess, 6.4ms inference, 5.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 3.1ms preprocess, 6.5ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.2ms
Speed: 1.7ms preprocess, 5.2ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 13.3ms
Speed: 4.3ms preprocess, 13.3ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.3ms
Speed: 1.8ms preprocess, 5.3ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.3ms preprocess, 6.3ms inference,

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.5ms
Speed: 1.7ms preprocess, 6.5ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.8ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 1.8ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.3ms
Speed: 2.7ms preprocess, 5.3ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.0ms
Speed: 1.8ms preprocess, 5.0ms inference, 

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 6.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.6ms
Speed: 3.1ms preprocess, 8.6ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.0ms
Speed: 3.1ms preprocess, 5.0ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.8ms
Speed: 1.7ms preprocess, 4.8ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 5

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.4ms preprocess, 6.3ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.7ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.7ms preprocess, 4.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.2ms
Speed: 1.7ms preprocess, 5.2ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 8.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.0ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.8ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.5ms
Speed: 3.1ms preprocess, 5.5ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.7ms
Speed: 2.4ms preprocess, 5.7ms inference, 12.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.9ms preprocess, 6.3ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.4ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.9ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 1.7ms preprocess, 4.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.6ms preprocess, 6.3ms inference, 11.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.7ms preprocess, 6.3ms inference, 7

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.5ms
Speed: 7.8ms preprocess, 6.5ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 1.9ms preprocess, 6.4ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.7ms
Speed: 2.5ms preprocess, 8.7ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.5ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.1ms
Speed: 2.5ms preprocess, 5.1ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.8ms
Speed: 1.6ms preprocess, 4.8ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.6ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 5

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.2ms
Speed: 2.3ms preprocess, 6.2ms inference, 9.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.1ms preprocess, 6.3ms inference, 18.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.2ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.7ms preprocess, 4.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.7ms preprocess, 4.7ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.7ms preprocess, 6.3ms inference, 11

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.0ms preprocess, 6.4ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 3.9ms preprocess, 6.6ms inference, 12.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 7.1ms
Speed: 3.6ms preprocess, 7.1ms inference, 15.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.2ms
Speed: 5.6ms preprocess, 6.2ms inference, 22.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.8ms
Speed: 4.4ms preprocess, 6.8ms inference, 9.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.1ms
Speed: 2.5ms preprocess, 6.1ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.2ms
Speed: 1.8ms preprocess, 6.2ms inference, 17.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 4.0ms preprocess, 6.4ms inference, 2

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.5ms
Speed: 8.6ms preprocess, 6.5ms inference, 10.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 12.6ms
Speed: 4.3ms preprocess, 12.6ms inference, 22.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 5.3ms preprocess, 6.3ms inference, 17.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 3.7ms preprocess, 4.6ms inference, 12.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 3.6ms preprocess, 6.6ms inference, 19.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 4.3ms preprocess, 6.4ms inference, 14.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 3.9ms preprocess, 6.5ms inference, 8.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.2ms
Speed: 3.9ms preprocess, 6.2ms inference

number of examples of Gprop1: 12
final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.6ms
Speed: 2.4ms preprocess, 6.6ms inference, 6.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 1.6ms preprocess, 4.4ms inference, 17.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 5.6ms preprocess, 6.3ms inference, 22.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 2.1ms preprocess, 4.4ms inference, 12.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 3.8ms preprocess, 6.6ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 4.5ms preprocess, 6.3ms inference, 21.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 7.0ms
Speed: 2.7ms preprocess, 7.0ms inference, 6.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.0ms
Speed: 2.0ms preprocess, 5.0ms inference, 6

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 7.4ms
Speed: 8.6ms preprocess, 7.4ms inference, 14.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.0ms preprocess, 6.4ms inference, 12.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 3.6ms preprocess, 6.6ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.6ms
Speed: 1.9ms preprocess, 5.6ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 2.0ms preprocess, 4.7ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.7ms
Speed: 1.5ms preprocess, 5.7ms inference, 18.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 10.6ms
Speed: 4.9ms preprocess, 10.6ms inference, 12.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 3.8ms preprocess, 6.3ms inference,

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 8.3ms
Speed: 2.3ms preprocess, 8.3ms inference, 13.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 7.2ms
Speed: 2.2ms preprocess, 7.2ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 2.9ms preprocess, 6.6ms inference, 12.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 3.9ms preprocess, 6.6ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 7.2ms
Speed: 4.1ms preprocess, 7.2ms inference, 32.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 5.5ms preprocess, 6.4ms inference, 15.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.7ms
Speed: 4.1ms preprocess, 5.7ms inference, 5.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 7.4ms
Speed: 3.1ms preprocess, 7.4ms inference, 

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.5ms preprocess, 6.3ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.0ms
Speed: 2.1ms preprocess, 6.0ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.4ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 3.1ms preprocess, 6.5ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 2.5ms preprocess, 6.6ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.9ms
Speed: 1.7ms preprocess, 6.9ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.7ms preprocess, 4.6ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.3ms preprocess, 6.4ms inference, 2

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 6.5ms preprocess, 6.4ms inference, 12.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 3.0ms preprocess, 6.5ms inference, 8.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 9.0ms
Speed: 3.1ms preprocess, 9.0ms inference, 12.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 3.6ms preprocess, 6.5ms inference, 19.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 3.6ms preprocess, 6.6ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 3.5ms preprocess, 6.5ms inference, 11.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.6ms preprocess, 4.5ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.6ms
Speed: 1.8ms preprocess, 4.6ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 4.6ms preprocess, 6.3ms inference, 20.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.1ms
Speed: 2.2ms preprocess, 6.1ms inference, 21.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 4.0ms preprocess, 6.3ms inference, 21.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.8ms preprocess, 6.4ms inference, 21.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.2ms
Speed: 3.6ms preprocess, 6.2ms inference, 12.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.7ms
Speed: 3.6ms preprocess, 6.7ms inference, 14.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.5ms
Speed: 1.9ms preprocess, 4.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.6ms
Speed: 1.6ms preprocess, 5.6ms inference, 

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 5.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 3.3ms preprocess, 6.6ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 2.6ms preprocess, 6.6ms inference, 12.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 7.1ms
Speed: 2.7ms preprocess, 7.1ms inference, 21.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 6.1ms preprocess, 6.3ms inference, 19.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.8ms
Speed: 2.4ms preprocess, 6.8ms inference, 10.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.0ms
Speed: 2.1ms preprocess, 6.0ms inference, 13.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 9.5ms
Speed: 2.0ms preprocess, 9.5ms inference, 

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.4ms
Speed: 2.3ms preprocess, 6.4ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 3.3ms preprocess, 6.6ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 1.7ms preprocess, 6.6ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 4.1ms preprocess, 4.4ms inference, 19.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.2ms
Speed: 5.5ms preprocess, 6.2ms inference, 21.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.7ms
Speed: 3.9ms preprocess, 6.7ms inference, 6.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 2.2ms preprocess, 4.7ms inference, 12.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.0ms
Speed: 1.9ms preprocess, 5.0ms inference, 2

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 7.4ms
Speed: 8.6ms preprocess, 7.4ms inference, 15.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.5ms
Speed: 3.9ms preprocess, 6.5ms inference, 11.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 2.3ms preprocess, 6.6ms inference, 12.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 5.5ms
Speed: 2.0ms preprocess, 5.5ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.8ms
Speed: 1.7ms preprocess, 4.8ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 8.8ms
Speed: 4.0ms preprocess, 8.8ms inference, 22.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 5.7ms preprocess, 6.4ms inference, 15.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.7ms
Speed: 2.8ms preprocess, 6.7ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.6ms
Speed: 3.3ms preprocess, 6.6ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.6ms
Speed: 2.2ms preprocess, 6.6ms inference, 12.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.4ms
Speed: 2.7ms preprocess, 4.4ms inference, 14.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.2ms preprocess, 6.3ms inference, 22.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 2.4ms preprocess, 6.3ms inference, 14.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 2.8ms preprocess, 4.7ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 7.0ms
Speed: 2.9ms preprocess, 7.0ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.9ms
Speed: 1.7ms preprocess, 4.9ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.2ms
Speed: 8.6ms preprocess, 6.2ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.8ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 2.6ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.2ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 2.9ms preprocess, 4.7ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.9ms
Speed: 1.8ms preprocess, 4.9ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.3ms
Speed: 1.9ms preprocess, 6.3ms inference, 1

final text_encoder_type: bert-base-uncased



0: 640x640 (no detections), 6.3ms
Speed: 2.3ms preprocess, 6.3ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.7ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.4ms
Speed: 3.0ms preprocess, 6.4ms inference, 11.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.8ms
Speed: 3.2ms preprocess, 4.8ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4.7ms
Speed: 1.8ms preprocess, 4.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 6.8ms
Speed: 1.7ms preprocess, 6.8ms inference, 1