In [6]:
import gradio as gr
from hf_samdino import process_video

In [10]:
def gradio_process_video(video_file, detector, 
                         box_threshold_1, box_threshold_2, 
                         text_threshold_1, text_threshold_2, 
                         confidence_1, confidence_2, 
                         iou_1, iou_2, 
                         frame_len, frame_stride, gif_duration):
    box_threshold = [box_threshold_1, box_threshold_2]
    text_threshold = [text_threshold_1, text_threshold_2]
    yolo_confidence = [confidence_1, confidence_2]
    yolo_iou_threhold = [iou_1, iou_2]
    return process_video(video_path=video_file,
                        detector=detector,
                        dino_box_threshold=box_threshold,
                        dino_text_threshold=text_threshold,
                        yolo_confidence=yolo_confidence,
                        yolo_iou_threshold=yolo_iou_threhold,
                        frame_len=frame_len,
                        frame_stride=frame_stride,
                        gif_duration=gif_duration)

In [11]:
def update_inputs(detector):
    if detector == "Grounding DINO":
        # Show DINO sliders, hide YOLO sliders
        return [gr.update(visible=True), gr.update(visible=False)]
    else:
        # Show YOLO sliders, hide DINO sliders
        return [gr.update(visible=False), gr.update(visible=True)]

In [None]:
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=1):  # Left half for inputs
            video = gr.Video(label="Upload Video")
            detector = gr.Radio(choices=["Grounding DINO", "YOLO"], label="Choose a detector")

            with gr.Group(visible=False) as dino_sliders:
                box_threshold_1 = gr.Slider(0, 1, value=0.35, step=0.05, label="Box Threshold (DINO): Player/Referee")
                box_threshold_2 = gr.Slider(0, 1, value=0.35, step=0.05, label="Box Threshold (DINO): Basketball")
                text_threshold_1 = gr.Slider(0, 1, value=0.35, step=0.05, label="Text Threshold (DINO): Player/Referee")
                text_threshold_2 = gr.Slider(0, 1, value=0.35, step=0.05, label="Text Threshold (DINO): Basketball")

            with gr.Group(visible=False) as yolo_sliders:
                confidence_1 = gr.Slider(0, 1, value=0.3, step=0.05, label="Confidence (YOLO): Player/Referee")
                confidence_2 = gr.Slider(0, 1, value=0.75, step=0.05, label="Confidence (YOLO): Basketball")
                iou_1 = gr.Slider(0, 1, value=0.7, step=0.05, label="IOU Threshold (YOLO): Player/Referee")
                iou_2 = gr.Slider(0, 1, value=0.5, step=0.05, label="IOU Threshold (YOLO): Basketball")

            frame_len = gr.Slider(1, 500, value=100, step=1, label="Frame Length")
            frame_stride = gr.Slider(1, 50, value=3, step=1, label="Frame Stride")
            gif_duration = gr.Slider(1, 500, value=100, step=1, label="GIF Duration")

            submit = gr.Button("Process")
        
        with gr.Column(scale=1):  # Right half for outputs
            output = gr.Image(type="filepath", label="Output")

    # Adjust outputs: first dino_sliders group, then yolo_sliders group
    detector.change(
        update_inputs,
        inputs=[detector],
        outputs=[dino_sliders, yolo_sliders]
    )

    # Submit button action
    submit.click(
        gradio_process_video,
        inputs=[video, detector, box_threshold_1, box_threshold_2,
                text_threshold_1, text_threshold_2,
                confidence_1, confidence_2, iou_1, iou_2,
                frame_len, frame_stride, gif_duration],
        outputs=output
    )

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7865
* Running on public URL: https://96edfa21b3a14599fa.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Converting video to video frames
Extracted 648 frames to 'output_video_frames'.
using device: cuda
Object detection of starting frame using YOLO




Frame  0
Frame  1
Frame  2
Frame  3
Frame  4
Frame  5
Frame  6
Frame  7
Frame  8
Frame  9
Frame  10
Frame  11
Frame  12
Frame  13
Frame  14
Frame  15
Frame  16
Frame  17
Frame  18
Frame  19
Frame  20
Frame  21
Frame  22
Frame  23
Frame  24
Frame  25
Frame  26
Frame  27
Frame  28
Frame  29
Frame  30
Frame  31
Frame  32
Frame  33
Frame  34
Frame  35
Frame  36
Frame  37
Frame  38
Frame  39
Frame  40
Frame  41
Frame  42
Frame  43
Frame  44
Frame  45
Frame  46
Frame  47
Frame  48
Frame  49
Frame  50
Frame  51




Loading SAM2
Initializing SAM2


Falling back to all available kernels for scaled_dot_product_attention (which may have a slower speed).
propagate in video: 100%|██████████| 100/100 [01:01<00:00,  1.62it/s]


Saving annotations
Saving segmented frames


100%|██████████| 100/100 [00:45<00:00,  2.19it/s]


Creating video and gif
Video saved to output\output.mp4
