In [1]:
import sys, os, json, time, subprocess, pathlib
from pathlib import Path
from davis2017.davis import DAVIS
import imageio.v3 as iio
import numpy as np
from tqdm import tqdm
import torch

# ── USER‐CONFIGURABLE PATHS ──────────────────────────────────────────────────
DAVIS_ROOT  = Path("./data/DAVIS")          # ← point this at your DAVIS folder
OUT_DIR     = Path("./data/DAVIS2017_mgfs_naive_optical")           # ← where we’ll write out PNGs
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ── STEP 1: load the DAVIS “val” split (semi‐supervised task) ───────────────────
ds = DAVIS(str(DAVIS_ROOT), task="semi-supervised", subset="val", resolution="480p")
print(f"Loaded {len(ds.sequences)} validation sequences")

# ── STEP 2: build SAM 2 video predictor ─────────────────────────────────────────
from sam2.build_sam import build_sam2_video_predictor

device = "cuda" if torch.cuda.is_available() else "cpu"
sam2_checkpoint = "./checkpoints/sam2.1_hiera_large.pt"   # ← adjust if needed
model_cfg       = "configs/sam2.1/sam2.1_hiera_l.yaml"   # ← adjust if needed

predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device=device)
predictor.to(device)   # make sure model is on CUDA if available

Loaded 30 validation sequences


SAM2VideoPredictor(
  (image_encoder): ImageEncoder(
    (trunk): Hiera(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 144, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
      )
      (blocks): ModuleList(
        (0-1): 2 x MultiScaleBlock(
          (norm1): LayerNorm((144,), eps=1e-06, elementwise_affine=True)
          (attn): MultiScaleAttention(
            (qkv): Linear(in_features=144, out_features=432, bias=True)
            (proj): Linear(in_features=144, out_features=144, bias=True)
          )
          (drop_path): Identity()
          (norm2): LayerNorm((144,), eps=1e-06, elementwise_affine=True)
          (mlp): MLP(
            (layers): ModuleList(
              (0): Linear(in_features=144, out_features=576, bias=True)
              (1): Linear(in_features=576, out_features=144, bias=True)
            )
            (act): GELU(approximate='none')
          )
        )
        (2): MultiScaleBlock(
          (norm1): LayerNorm((144,), eps=1e-06, elemen

In [2]:
# ── STEP 3: for each DAVIS sequence, run SAM 2 and save outputs ───────────────
for seq in ds.sequences:
    print(f"\n=== Processing sequence: {seq} ===")

    # 3a) directories of images & GT masks for this sequence
    img_dir  = DAVIS_ROOT / "JPEGImages"  / "480p" / seq
    mask_dir = DAVIS_ROOT / "Annotations" / "480p" / seq

    img_paths  = sorted(img_dir.glob("*.jpg"))
    mask_paths = sorted(mask_dir.glob("*.png"))
    assert len(img_paths) == len(mask_paths), (
        f"Image/mask count mismatch in {seq}: {len(img_paths)} vs {len(mask_paths)}"
    )

    # 3b) initialize inference state by “loading” this sequence as a video
    #     We pass the *directory* of frames to init_state.  Internally, it will call
    #     `load_video_frames(video_path=video_dir, ...)` and store all frames in memory.
    video_dir = str(img_dir)  # e.g. "./data/davis/DAVIS/JPEGImages/480p/<seq>"
    inference_state = predictor.init_state(
        video_path=video_dir,
        offload_video_to_cpu=False,
        offload_state_to_cpu=False,
        async_loading_frames=False
    )

# (b) load the single “00000.png” which contains two different colored regions
    rgb = iio.imread(str(mask_dir / "00000.png"))  # shape (H, W, 3)
    H, W, C = rgb.shape
    assert C == 3, "Expected a 3‐channel (RGB) first‐frame mask."

    # (c) find all unique RGB colors except black
    flat = rgb.reshape(-1, 3)                        # shape (H*W, 3)
    uniq_colors = np.unique(flat, axis=0)            # shape (K, 3), where K ≤ (H*W)
    # Remove the black color (0,0,0) if present
    non_black = [tuple(c) for c in uniq_colors if not np.all(c == 0)]
    if len(non_black) == 0:
        raise RuntimeError(f"No non‐black colors found in {seq}/00000.png")

    # (d) for each unique non‐black color, build a 2D boolean mask and register it
    print(f"Found {len(non_black)} unique non‐black colors in {seq}/00000.png")
    for idx, color in enumerate(non_black):
        # color is something like (200, 0, 0) or (0, 200, 0)
        R, G, B = color
        # build a binary mask: True where pixel == this color
        bin_mask = np.logical_and.reduce([
            rgb[:, :, 0] == R,
            rgb[:, :, 1] == G,
            rgb[:, :, 2] == B
        ])  # shape (H, W), dtype=bool

        # wrap as torch.bool on the same device as SAM 2
        mask2d = torch.from_numpy(bin_mask).to(device)

        # register this mask as object `idx`
        predictor.add_new_mask(
            inference_state=inference_state,
            frame_idx=0,
            obj_id=idx,  # choose 0,1,2,… per color
            mask=mask2d
        )

    # 3e) now propagate through all frames.  As each new frame is processed,
    #     propagate_in_video yields (frame_idx, [obj_ids], video_res_masks).
    #
    #     We’ll save each mask as “00000.png”, “00001.png”, … under OUT_DIR/<seq>/
    seq_out_dir = OUT_DIR / seq
    seq_out_dir.mkdir(parents=True, exist_ok=True)

    for frame_idx, obj_ids, video_res_masks in tqdm(
        predictor.propagate_in_video(inference_state),
        total=len(img_paths)-1,
        desc=f"Propagating {seq}"
    ):
        # # ‣ frame_idx is an integer (1,2,3,…).  video_res_masks is a tensor of shape
        # #   (num_objects, H, W).  For DAVIS, num_objects==1.
        # #
        # # ‣ Thresholding has already happened internally; `video_res_masks` is
        # #   a float‐tensor where positive values correspond to predicted “object.”
        # mask_np = (video_res_masks[0].cpu().numpy() > 0.0).astype(np.uint8) * 255

        # # Save with zero‐padded five digits to match DAVIS naming:
        # save_name = f"{frame_idx:05d}.png"
        # save_path = seq_out_dir / save_name
        # iio.imwrite(str(save_path), mask_np)

        # Suppose `video_res_masks` is whatever you get from propagate_in_video:
        #   • If there is only one object, it may be a 2D tensor of shape (H, W)
        #   • If there are multiple objects, it will be a 3D tensor of shape (O, H, W)

        pred_np = video_res_masks.cpu().numpy()   # dtype=float32 or float; # ───────────────────────────────────────────────────────────────
        # Assume you already did:
        #   pred_np = video_res_masks.cpu().numpy()

        # 1) Check how many dimensions `pred_np` has:
        if pred_np.ndim == 2:
            # Case A: single object, shape = (H, W)
            H, W = pred_np.shape
            O = 1
            pred_np = pred_np[np.newaxis, ...]  # -> now shape (1, H, W)

        elif pred_np.ndim == 3:
            # Could be either:
            #  (A) shape = (1, H, W)   ← single object with a leading axis
            #  (B) shape = (O, H, W)   ← multiple objects, no extra channel axis
            if pred_np.shape[0] == 1:
                # Treat as “one‐object” → squeeze to (1, H, W) (already fits our convention)
                O, H, W = pred_np.shape
            else:
                # Multi‐object already: (O, H, W)
                O, H, W = pred_np.shape
            # (no need to reshape because it’s already (O, H, W))

        elif pred_np.ndim == 4:
            # Some SAM 2 builds return (O, 1, H, W). In that case:
            #   • pred_np.shape = (O, 1, H, W)
            #   → we want to drop the “channel” dimension (axis=1).
            O = pred_np.shape[0]
            H = pred_np.shape[2]
            W = pred_np.shape[3]
            pred_np = pred_np[:, 0, :, :]  # now shape (O, H, W)

        else:
            raise RuntimeError(f"Unexpected mask array with ndim={pred_np.ndim}, shape={pred_np.shape}")

        # At this point:
        #   • pred_np is guaranteed to have shape (O, H, W)
        #   • O, H, W are set correctly
        # ───────────────────────────────────────────────────────────────

        # Now you can build your colored output exactly as before:

        colored = np.zeros((H, W, 3), dtype=np.uint8)

        for i in range(O):
            mask_i = (pred_np[i] > 0.0)   # boolean mask (H, W)
            if not mask_i.any():
                continue
            R, G, B = non_black[i]  # the original RGB for object i
            colored[mask_i, 0] = R
            colored[mask_i, 1] = G
            colored[mask_i, 2] = B

        save_name = f"{frame_idx:05d}.png"
        save_path = seq_out_dir / save_name
        iio.imwrite(str(save_path), colored)


    print(f"→ Saved all predicted masks for {seq} in {seq_out_dir}")

print("\nAll sequences processed.")
print(f"Your SAM 2 masks live under: {OUT_DIR}")



=== Processing sequence: bike-packing ===


frame loading (JPEG): 100%|██████████| 69/69 [00:02<00:00, 34.37it/s]


Found 2 unique non‐black colors in bike-packing/00000.png



Skipping the post-processing step due to the error above. You can still use SAM 2 and it's OK to ignore the error above, although some post-processing functionality may be limited (which doesn't affect the results in most cases; see https://github.com/facebookresearch/sam2/blob/main/INSTALL.md).
  pred_masks_gpu = fill_holes_in_mask_scores(
Propagating bike-packing:   0%|          | 0/68 [00:00<?, ?it/s]

Skipping frame 1 due to low MAD (0.04)


Propagating bike-packing:   4%|▍         | 3/68 [00:00<00:09,  6.86it/s]

Skipping frame 3 due to low MAD (0.04)


Propagating bike-packing:  41%|████      | 28/68 [00:10<00:17,  2.26it/s]

Skipping frame 28 due to low MAD (0.04)


Propagating bike-packing:  44%|████▍     | 30/68 [00:11<00:13,  2.89it/s]

Skipping frame 30 due to low MAD (0.03)
Avg FPS last 30 frames: 2.62
Skipping frame 31 due to low MAD (0.05)


Propagating bike-packing:  49%|████▊     | 33/68 [00:11<00:08,  3.91it/s]

Skipping frame 33 due to low MAD (0.05)


Propagating bike-packing:  90%|████████▉ | 61/68 [00:23<00:03,  2.26it/s]

Avg FPS last 30 frames: 2.41


propagate in video: 100%|██████████| 69/69 [00:27<00:00,  2.55it/s]5it/s]
Propagating bike-packing: 69it [00:27,  2.55it/s]                        


Skipping frame 68 due to low MAD (0.04)
Skipped 7 frames due to low MAD.
→ Saved all predicted masks for bike-packing in data/DAVIS2017_mgfs_naive_optical/bike-packing

=== Processing sequence: blackswan ===


frame loading (JPEG): 100%|██████████| 50/50 [00:01<00:00, 35.07it/s]


Found 1 unique non‐black colors in blackswan/00000.png


Propagating blackswan:  63%|██████▎   | 31/49 [00:10<00:06,  2.87it/s]

Avg FPS last 30 frames: 2.93


propagate in video: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s]   
Propagating blackswan: 50it [00:16,  2.97it/s]


→ Saved all predicted masks for blackswan in data/DAVIS2017_mgfs_naive_optical/blackswan

=== Processing sequence: bmx-trees ===


frame loading (JPEG): 100%|██████████| 80/80 [00:02<00:00, 35.06it/s]


Found 2 unique non‐black colors in bmx-trees/00000.png


Propagating bmx-trees:  39%|███▉      | 31/79 [00:12<00:21,  2.28it/s]

Avg FPS last 30 frames: 2.35


Propagating bmx-trees:  77%|███████▋  | 61/79 [00:25<00:07,  2.28it/s]

Avg FPS last 30 frames: 2.28


propagate in video: 100%|██████████| 80/80 [00:34<00:00,  2.34it/s]   
Propagating bmx-trees: 80it [00:34,  2.34it/s]


→ Saved all predicted masks for bmx-trees in data/DAVIS2017_mgfs_naive_optical/bmx-trees

=== Processing sequence: breakdance ===


frame loading (JPEG): 100%|██████████| 84/84 [00:02<00:00, 33.21it/s]


Found 1 unique non‐black colors in breakdance/00000.png


Propagating breakdance:  37%|███▋      | 31/83 [00:10<00:18,  2.88it/s]

Avg FPS last 30 frames: 2.94


Propagating breakdance:  73%|███████▎  | 61/83 [00:20<00:07,  2.84it/s]

Avg FPS last 30 frames: 2.88


propagate in video: 100%|██████████| 84/84 [00:28<00:00,  2.93it/s]    
Propagating breakdance: 84it [00:28,  2.93it/s]


→ Saved all predicted masks for breakdance in data/DAVIS2017_mgfs_naive_optical/breakdance

=== Processing sequence: camel ===


frame loading (JPEG): 100%|██████████| 90/90 [00:02<00:00, 33.32it/s]


Found 1 unique non‐black colors in camel/00000.png


Propagating camel:  35%|███▍      | 31/89 [00:10<00:20,  2.89it/s]

Avg FPS last 30 frames: 2.93


Propagating camel:  69%|██████▊   | 61/89 [00:20<00:09,  2.89it/s]

Avg FPS last 30 frames: 2.88


propagate in video: 100%|██████████| 90/90 [00:30<00:00,  2.92it/s]
Propagating camel: 90it [00:30,  2.92it/s]


→ Saved all predicted masks for camel in data/DAVIS2017_mgfs_naive_optical/camel

=== Processing sequence: car-roundabout ===


frame loading (JPEG): 100%|██████████| 75/75 [00:02<00:00, 36.16it/s]


Found 1 unique non‐black colors in car-roundabout/00000.png


Propagating car-roundabout:  42%|████▏     | 31/74 [00:10<00:15,  2.86it/s]

Avg FPS last 30 frames: 2.94


Propagating car-roundabout:  82%|████████▏ | 61/74 [00:20<00:04,  2.89it/s]

Avg FPS last 30 frames: 2.88


propagate in video: 100%|██████████| 75/75 [00:25<00:00,  2.95it/s]        
Propagating car-roundabout: 75it [00:25,  2.94it/s]


→ Saved all predicted masks for car-roundabout in data/DAVIS2017_mgfs_naive_optical/car-roundabout

=== Processing sequence: car-shadow ===


frame loading (JPEG): 100%|██████████| 40/40 [00:01<00:00, 32.92it/s]


Found 1 unique non‐black colors in car-shadow/00000.png


Propagating car-shadow:  79%|███████▉  | 31/39 [00:10<00:02,  2.88it/s]

Avg FPS last 30 frames: 2.95


propagate in video: 100%|██████████| 40/40 [00:13<00:00,  3.00it/s]    
Propagating car-shadow: 40it [00:13,  3.00it/s]


→ Saved all predicted masks for car-shadow in data/DAVIS2017_mgfs_naive_optical/car-shadow

=== Processing sequence: cows ===


frame loading (JPEG): 100%|██████████| 104/104 [00:03<00:00, 34.58it/s]


Found 1 unique non‐black colors in cows/00000.png


Propagating cows:  30%|███       | 31/103 [00:10<00:25,  2.88it/s]

Avg FPS last 30 frames: 2.93


Propagating cows:  59%|█████▉    | 61/103 [00:20<00:14,  2.88it/s]

Avg FPS last 30 frames: 2.88


Propagating cows:  88%|████████▊ | 91/103 [00:31<00:04,  2.82it/s]

Avg FPS last 30 frames: 2.85


propagate in video: 100%|██████████| 104/104 [00:35<00:00,  2.91it/s]
Propagating cows: 104it [00:35,  2.91it/s]


→ Saved all predicted masks for cows in data/DAVIS2017_mgfs_naive_optical/cows

=== Processing sequence: dance-twirl ===


frame loading (JPEG): 100%|██████████| 90/90 [00:02<00:00, 34.28it/s]


Found 1 unique non‐black colors in dance-twirl/00000.png


Propagating dance-twirl:  35%|███▍      | 31/89 [00:10<00:20,  2.87it/s]

Avg FPS last 30 frames: 2.95


Propagating dance-twirl:  69%|██████▊   | 61/89 [00:20<00:09,  2.88it/s]

Avg FPS last 30 frames: 2.88


propagate in video: 100%|██████████| 90/90 [00:36<00:00,  2.47it/s]     
Propagating dance-twirl: 90it [00:36,  2.47it/s]


→ Saved all predicted masks for dance-twirl in data/DAVIS2017_mgfs_naive_optical/dance-twirl

=== Processing sequence: dog ===


frame loading (JPEG): 100%|██████████| 60/60 [00:01<00:00, 30.28it/s]


Found 1 unique non‐black colors in dog/00000.png


Propagating dog:  53%|█████▎    | 31/59 [00:18<00:17,  1.60it/s]

Avg FPS last 30 frames: 1.65


propagate in video: 100%|██████████| 60/60 [00:34<00:00,  1.72it/s]
Propagating dog: 60it [00:34,  1.72it/s]


→ Saved all predicted masks for dog in data/DAVIS2017_mgfs_naive_optical/dog

=== Processing sequence: dogs-jump ===


frame loading (JPEG): 100%|██████████| 66/66 [00:01<00:00, 34.08it/s]


Found 3 unique non‐black colors in dogs-jump/00000.png


Propagating dogs-jump:  35%|███▌      | 23/65 [00:20<00:41,  1.02it/s]

Skipping frame 23 due to low MAD (0.05)


Propagating dogs-jump:  38%|███▊      | 25/65 [00:21<00:30,  1.32it/s]

Skipping frame 25 due to low MAD (0.04)


Propagating dogs-jump:  42%|████▏     | 27/65 [00:22<00:24,  1.53it/s]

Skipping frame 27 due to low MAD (0.04)


Propagating dogs-jump:  45%|████▍     | 29/65 [00:23<00:21,  1.66it/s]

Skipping frame 29 due to low MAD (0.04)


Propagating dogs-jump:  48%|████▊     | 31/65 [00:24<00:19,  1.76it/s]

Avg FPS last 30 frames: 1.23
Skipping frame 31 due to low MAD (0.04)


Propagating dogs-jump:  51%|█████     | 33/65 [00:25<00:15,  2.01it/s]

Skipping frame 33 due to low MAD (0.05)


Propagating dogs-jump:  94%|█████████▍| 61/65 [00:49<00:03,  1.03it/s]

Avg FPS last 30 frames: 1.21
Skipping frame 61 due to low MAD (0.05)


Propagating dogs-jump:  97%|█████████▋| 63/65 [00:50<00:01,  1.32it/s]

Skipping frame 63 due to low MAD (0.05)


propagate in video: 100%|██████████| 66/66 [00:51<00:00,  1.29it/s]/s]
Propagating dogs-jump: 66it [00:51,  1.29it/s]                        


Skipping frame 65 due to low MAD (0.04)
Skipped 9 frames due to low MAD.
→ Saved all predicted masks for dogs-jump in data/DAVIS2017_mgfs_naive_optical/dogs-jump

=== Processing sequence: drift-chicane ===


frame loading (JPEG): 100%|██████████| 52/52 [00:01<00:00, 32.77it/s]


Found 1 unique non‐black colors in drift-chicane/00000.png


Propagating drift-chicane:   0%|          | 0/51 [00:00<?, ?it/s]

Skipping frame 1 due to low MAD (0.02)
Skipping frame 2 due to low MAD (0.03)
Skipping frame 3 due to low MAD (0.04)


Propagating drift-chicane:  10%|▉         | 5/51 [00:00<00:05,  7.79it/s]

Skipping frame 5 due to low MAD (0.02)
Skipping frame 6 due to low MAD (0.04)
Skipping frame 7 due to low MAD (0.05)


Propagating drift-chicane:  18%|█▊        | 9/51 [00:01<00:06,  6.62it/s]

Skipping frame 9 due to low MAD (0.03)
Skipping frame 10 due to low MAD (0.04)


Propagating drift-chicane:  24%|██▎       | 12/51 [00:01<00:06,  5.65it/s]

Skipping frame 12 due to low MAD (0.02)
Skipping frame 13 due to low MAD (0.04)


Propagating drift-chicane:  29%|██▉       | 15/51 [00:02<00:06,  5.26it/s]

Skipping frame 15 due to low MAD (0.03)
Skipping frame 16 due to low MAD (0.04)
Skipping frame 17 due to low MAD (0.05)


Propagating drift-chicane:  37%|███▋      | 19/51 [00:03<00:05,  5.55it/s]

Skipping frame 19 due to low MAD (0.03)
Skipping frame 20 due to low MAD (0.04)


Propagating drift-chicane:  43%|████▎     | 22/51 [00:03<00:05,  5.24it/s]

Skipping frame 22 due to low MAD (0.03)
Skipping frame 23 due to low MAD (0.04)
Skipping frame 24 due to low MAD (0.05)


Propagating drift-chicane:  51%|█████     | 26/51 [00:04<00:04,  5.42it/s]

Skipping frame 26 due to low MAD (0.02)
Skipping frame 27 due to low MAD (0.03)
Skipping frame 28 due to low MAD (0.04)


Propagating drift-chicane:  59%|█████▉    | 30/51 [00:05<00:03,  5.54it/s]

Skipping frame 30 due to low MAD (0.03)
Avg FPS last 30 frames: 5.62
Skipping frame 31 due to low MAD (0.04)


Propagating drift-chicane:  65%|██████▍   | 33/51 [00:05<00:03,  5.24it/s]

Skipping frame 33 due to low MAD (0.03)
Skipping frame 34 due to low MAD (0.04)


Propagating drift-chicane:  71%|███████   | 36/51 [00:06<00:02,  5.02it/s]

Skipping frame 36 due to low MAD (0.03)
Skipping frame 37 due to low MAD (0.04)
Skipping frame 38 due to low MAD (0.05)


Propagating drift-chicane:  78%|███████▊  | 40/51 [00:07<00:02,  5.32it/s]

Skipping frame 40 due to low MAD (0.03)
Skipping frame 41 due to low MAD (0.03)
Skipping frame 42 due to low MAD (0.04)
Skipping frame 43 due to low MAD (0.05)


Propagating drift-chicane:  88%|████████▊ | 45/51 [00:07<00:01,  5.95it/s]

Skipping frame 45 due to low MAD (0.03)
Skipping frame 46 due to low MAD (0.05)


Propagating drift-chicane:  94%|█████████▍| 48/51 [00:08<00:00,  5.57it/s]

Skipping frame 48 due to low MAD (0.04)


Propagating drift-chicane:  98%|█████████▊| 50/51 [00:09<00:00,  4.82it/s]

Skipping frame 50 due to low MAD (0.05)


propagate in video: 100%|██████████| 52/52 [00:09<00:00,  5.22it/s]       
Propagating drift-chicane: 52it [00:09,  5.22it/s]


Skipped 36 frames due to low MAD.
→ Saved all predicted masks for drift-chicane in data/DAVIS2017_mgfs_naive_optical/drift-chicane

=== Processing sequence: drift-straight ===


frame loading (JPEG): 100%|██████████| 50/50 [00:01<00:00, 31.41it/s]


Found 1 unique non‐black colors in drift-straight/00000.png


Propagating drift-straight:  63%|██████▎   | 31/49 [00:18<00:11,  1.57it/s]

Avg FPS last 30 frames: 1.62


propagate in video: 100%|██████████| 50/50 [00:27<00:00,  1.81it/s]        
Propagating drift-straight: 50it [00:27,  1.81it/s]


→ Saved all predicted masks for drift-straight in data/DAVIS2017_mgfs_naive_optical/drift-straight

=== Processing sequence: goat ===


frame loading (JPEG): 100%|██████████| 90/90 [00:02<00:00, 33.58it/s]


Found 1 unique non‐black colors in goat/00000.png


Propagating goat:  35%|███▍      | 31/89 [00:18<00:36,  1.57it/s]

Avg FPS last 30 frames: 1.60


Propagating goat:  69%|██████▊   | 61/89 [00:37<00:17,  1.60it/s]

Avg FPS last 30 frames: 1.57


propagate in video: 100%|██████████| 90/90 [00:53<00:00,  1.69it/s]
Propagating goat: 90it [00:53,  1.69it/s]


→ Saved all predicted masks for goat in data/DAVIS2017_mgfs_naive_optical/goat

=== Processing sequence: gold-fish ===


frame loading (JPEG): 100%|██████████| 78/78 [00:02<00:00, 30.58it/s]


Found 5 unique non‐black colors in gold-fish/00000.png


Propagating gold-fish:  40%|████      | 31/77 [00:38<01:01,  1.34s/it]

Avg FPS last 30 frames: 0.79


Propagating gold-fish:  79%|███████▉  | 61/77 [01:15<00:21,  1.34s/it]

Avg FPS last 30 frames: 0.80


propagate in video: 100%|██████████| 78/78 [01:36<00:00,  1.24s/it]   
Propagating gold-fish: 78it [01:36,  1.24s/it]


→ Saved all predicted masks for gold-fish in data/DAVIS2017_mgfs_naive_optical/gold-fish

=== Processing sequence: horsejump-high ===


frame loading (JPEG): 100%|██████████| 50/50 [00:01<00:00, 34.24it/s]


Found 2 unique non‐black colors in horsejump-high/00000.png


Propagating horsejump-high:  63%|██████▎   | 31/49 [00:20<00:07,  2.26it/s]

Avg FPS last 30 frames: 1.45


propagate in video: 100%|██████████| 50/50 [00:34<00:00,  1.45it/s]        
Propagating horsejump-high: 50it [00:34,  1.44it/s]


→ Saved all predicted masks for horsejump-high in data/DAVIS2017_mgfs_naive_optical/horsejump-high

=== Processing sequence: india ===


frame loading (JPEG): 100%|██████████| 81/81 [00:02<00:00, 33.14it/s]


Found 3 unique non‐black colors in india/00000.png


Propagating india:  39%|███▉      | 31/80 [00:28<00:48,  1.02it/s]

Avg FPS last 30 frames: 1.05


Propagating india:  76%|███████▋  | 61/80 [00:54<00:16,  1.12it/s]

Avg FPS last 30 frames: 1.15


propagate in video: 100%|██████████| 81/81 [01:14<00:00,  1.09it/s]
Propagating india: 81it [01:14,  1.09it/s]


→ Saved all predicted masks for india in data/DAVIS2017_mgfs_naive_optical/india

=== Processing sequence: judo ===


frame loading (JPEG): 100%|██████████| 34/34 [00:01<00:00, 31.32it/s]


Found 2 unique non‐black colors in judo/00000.png


Propagating judo:  12%|█▏        | 4/33 [00:00<00:00, 33.55it/s]

Skipping frame 1 due to low MAD (0.01)
Skipping frame 2 due to low MAD (0.02)
Skipping frame 3 due to low MAD (0.03)
Skipping frame 4 due to low MAD (0.03)
Skipping frame 5 due to low MAD (0.04)
Skipping frame 6 due to low MAD (0.04)
Skipping frame 7 due to low MAD (0.05)


Propagating judo:  36%|███▋      | 12/33 [00:01<00:02,  8.33it/s]

Skipping frame 9 due to low MAD (0.02)
Skipping frame 10 due to low MAD (0.03)
Skipping frame 11 due to low MAD (0.04)
Skipping frame 12 due to low MAD (0.05)


Propagating judo:  42%|████▏     | 14/33 [00:01<00:03,  5.24it/s]

Skipping frame 14 due to low MAD (0.03)
Skipping frame 15 due to low MAD (0.04)
Skipping frame 16 due to low MAD (0.05)


Propagating judo:  55%|█████▍    | 18/33 [00:02<00:03,  4.87it/s]

Skipping frame 18 due to low MAD (0.02)
Skipping frame 19 due to low MAD (0.04)
Skipping frame 20 due to low MAD (0.04)


Propagating judo:  67%|██████▋   | 22/33 [00:03<00:02,  4.77it/s]

Skipping frame 22 due to low MAD (0.03)


Propagating judo:  85%|████████▍ | 28/33 [00:07<00:02,  1.84it/s]

Skipping frame 28 due to low MAD (0.05)


Propagating judo:  91%|█████████ | 30/33 [00:08<00:01,  2.00it/s]

Skipping frame 30 due to low MAD (0.04)
Avg FPS last 30 frames: 3.47


Propagating judo:  97%|█████████▋| 32/33 [00:09<00:00,  2.12it/s]

Skipping frame 32 due to low MAD (0.04)


propagate in video: 100%|██████████| 34/34 [00:10<00:00,  3.29it/s]
Propagating judo: 34it [00:10,  3.29it/s]


Skipped 21 frames due to low MAD.
→ Saved all predicted masks for judo in data/DAVIS2017_mgfs_naive_optical/judo

=== Processing sequence: kite-surf ===


frame loading (JPEG): 100%|██████████| 50/50 [00:01<00:00, 30.67it/s]


Found 3 unique non‐black colors in kite-surf/00000.png


Propagating kite-surf:  63%|██████▎   | 31/49 [00:25<00:17,  1.03it/s]

Avg FPS last 30 frames: 1.16


propagate in video: 100%|██████████| 50/50 [00:44<00:00,  1.13it/s]   
Propagating kite-surf: 50it [00:44,  1.13it/s]


→ Saved all predicted masks for kite-surf in data/DAVIS2017_mgfs_naive_optical/kite-surf

=== Processing sequence: lab-coat ===


frame loading (JPEG): 100%|██████████| 47/47 [00:01<00:00, 32.95it/s]


Found 5 unique non‐black colors in lab-coat/00000.png


Propagating lab-coat:  67%|██████▋   | 31/46 [00:35<00:19,  1.32s/it]

Avg FPS last 30 frames: 0.84


propagate in video: 100%|██████████| 47/47 [00:57<00:00,  1.22s/it]  
Propagating lab-coat: 47it [00:57,  1.22s/it]


→ Saved all predicted masks for lab-coat in data/DAVIS2017_mgfs_naive_optical/lab-coat

=== Processing sequence: libby ===


frame loading (JPEG): 100%|██████████| 49/49 [00:01<00:00, 30.97it/s]


Found 1 unique non‐black colors in libby/00000.png


Propagating libby:  65%|██████▍   | 31/48 [00:14<00:07,  2.15it/s]

Avg FPS last 30 frames: 2.00


propagate in video: 100%|██████████| 49/49 [00:26<00:00,  1.86it/s]
Propagating libby: 49it [00:26,  1.86it/s]


→ Saved all predicted masks for libby in data/DAVIS2017_mgfs_naive_optical/libby

=== Processing sequence: loading ===


frame loading (JPEG): 100%|██████████| 50/50 [00:01<00:00, 31.32it/s]


Found 3 unique non‐black colors in loading/00000.png


Propagating loading:  63%|██████▎   | 31/49 [00:25<00:17,  1.04it/s]

Avg FPS last 30 frames: 1.16


propagate in video: 100%|██████████| 50/50 [00:44<00:00,  1.12it/s] 
Propagating loading: 50it [00:44,  1.12it/s]


→ Saved all predicted masks for loading in data/DAVIS2017_mgfs_naive_optical/loading

=== Processing sequence: mbike-trick ===


frame loading (JPEG): 100%|██████████| 79/79 [00:02<00:00, 30.82it/s]


Found 2 unique non‐black colors in mbike-trick/00000.png


Propagating mbike-trick:   5%|▌         | 4/78 [00:01<00:38,  1.95it/s]

Skipping frame 4 due to low MAD (0.04)


Propagating mbike-trick:   8%|▊         | 6/78 [00:02<00:31,  2.25it/s]

Skipping frame 6 due to low MAD (0.04)


Propagating mbike-trick:  14%|█▍        | 11/78 [00:05<00:43,  1.54it/s]

Skipping frame 11 due to low MAD (0.04)


Propagating mbike-trick:  18%|█▊        | 14/78 [00:07<00:38,  1.65it/s]

Skipping frame 14 due to low MAD (0.05)


Propagating mbike-trick:  21%|██        | 16/78 [00:08<00:32,  1.88it/s]

Skipping frame 16 due to low MAD (0.03)
Skipping frame 17 due to low MAD (0.03)


Propagating mbike-trick:  26%|██▌       | 20/78 [00:09<00:28,  2.06it/s]

Skipping frame 20 due to low MAD (0.03)


Propagating mbike-trick:  28%|██▊       | 22/78 [00:10<00:26,  2.15it/s]

Skipping frame 22 due to low MAD (0.04)


Propagating mbike-trick:  40%|███▉      | 31/78 [00:17<00:35,  1.33it/s]

Avg FPS last 30 frames: 1.74


Propagating mbike-trick:  41%|████      | 32/78 [00:18<00:35,  1.29it/s]

Skipping frame 32 due to low MAD (0.04)


Propagating mbike-trick:  44%|████▎     | 34/78 [00:18<00:26,  1.63it/s]

Skipping frame 34 due to low MAD (0.04)


Propagating mbike-trick:  50%|█████     | 39/78 [00:22<00:26,  1.48it/s]

Skipping frame 39 due to low MAD (0.04)


Propagating mbike-trick:  59%|█████▉    | 46/78 [00:24<00:14,  2.21it/s]

Skipping frame 46 due to low MAD (0.04)


Propagating mbike-trick:  78%|███████▊  | 61/78 [00:36<00:13,  1.24it/s]

Avg FPS last 30 frames: 1.59


propagate in video: 100%|██████████| 79/79 [00:50<00:00,  1.56it/s]     
Propagating mbike-trick: 79it [00:50,  1.56it/s]


Skipped 12 frames due to low MAD.
→ Saved all predicted masks for mbike-trick in data/DAVIS2017_mgfs_naive_optical/mbike-trick

=== Processing sequence: motocross-jump ===


frame loading (JPEG): 100%|██████████| 40/40 [00:01<00:00, 31.44it/s]


Found 2 unique non‐black colors in motocross-jump/00000.png


Propagating motocross-jump:  79%|███████▉  | 31/39 [00:23<00:06,  1.25it/s]

Avg FPS last 30 frames: 1.28


propagate in video: 100%|██████████| 40/40 [00:29<00:00,  1.34it/s].25it/s]
Propagating motocross-jump: 40it [00:29,  1.34it/s]                        


Skipping frame 39 due to low MAD (0.04)
Skipped 1 frames due to low MAD.
→ Saved all predicted masks for motocross-jump in data/DAVIS2017_mgfs_naive_optical/motocross-jump

=== Processing sequence: paragliding-launch ===


frame loading (JPEG): 100%|██████████| 80/80 [00:02<00:00, 29.62it/s]


Found 3 unique non‐black colors in paragliding-launch/00000.png


Propagating paragliding-launch:  39%|███▉      | 31/79 [00:28<00:46,  1.03it/s]

Avg FPS last 30 frames: 1.06


Propagating paragliding-launch:  77%|███████▋  | 61/79 [00:55<00:17,  1.02it/s]

Avg FPS last 30 frames: 1.10


propagate in video: 100%|██████████| 80/80 [01:14<00:00,  1.08it/s]            
Propagating paragliding-launch: 80it [01:14,  1.08it/s]


→ Saved all predicted masks for paragliding-launch in data/DAVIS2017_mgfs_naive_optical/paragliding-launch

=== Processing sequence: parkour ===


frame loading (JPEG): 100%|██████████| 100/100 [00:03<00:00, 26.44it/s]


Found 1 unique non‐black colors in parkour/00000.png


Propagating parkour:  31%|███▏      | 31/99 [00:18<00:42,  1.59it/s]

Avg FPS last 30 frames: 1.65


Propagating parkour:  62%|██████▏   | 61/99 [00:36<00:23,  1.60it/s]

Avg FPS last 30 frames: 1.60


Propagating parkour:  92%|█████████▏| 91/99 [00:55<00:05,  1.60it/s]

Avg FPS last 30 frames: 1.58


propagate in video: 100%|██████████| 100/100 [01:01<00:00,  1.62it/s]
Propagating parkour: 100it [01:01,  1.62it/s]


→ Saved all predicted masks for parkour in data/DAVIS2017_mgfs_naive_optical/parkour

=== Processing sequence: pigs ===


frame loading (JPEG): 100%|██████████| 79/79 [00:02<00:00, 29.22it/s]


Found 3 unique non‐black colors in pigs/00000.png


Propagating pigs:   3%|▎         | 2/78 [00:00<00:26,  2.92it/s]

Skipping frame 2 due to low MAD (0.05)


Propagating pigs:   5%|▌         | 4/78 [00:01<00:27,  2.68it/s]

Skipping frame 4 due to low MAD (0.05)


Propagating pigs:   8%|▊         | 6/78 [00:02<00:29,  2.42it/s]

Skipping frame 6 due to low MAD (0.05)


Propagating pigs:  10%|█         | 8/78 [00:03<00:31,  2.22it/s]

Skipping frame 8 due to low MAD (0.05)


Propagating pigs:  13%|█▎        | 10/78 [00:04<00:32,  2.12it/s]

Skipping frame 10 due to low MAD (0.04)


Propagating pigs:  15%|█▌        | 12/78 [00:05<00:31,  2.07it/s]

Skipping frame 12 due to low MAD (0.05)


Propagating pigs:  18%|█▊        | 14/78 [00:06<00:31,  2.03it/s]

Skipping frame 14 due to low MAD (0.04)


Propagating pigs:  21%|██        | 16/78 [00:07<00:30,  2.01it/s]

Skipping frame 16 due to low MAD (0.04)


Propagating pigs:  23%|██▎       | 18/78 [00:08<00:30,  1.99it/s]

Skipping frame 18 due to low MAD (0.04)


Propagating pigs:  26%|██▌       | 20/78 [00:09<00:26,  2.19it/s]

Skipping frame 20 due to low MAD (0.04)


Propagating pigs:  28%|██▊       | 22/78 [00:09<00:22,  2.48it/s]

Skipping frame 22 due to low MAD (0.03)


Propagating pigs:  35%|███▍      | 27/78 [00:10<00:12,  4.14it/s]

Skipping frame 24 due to low MAD (0.03)
Skipping frame 25 due to low MAD (0.03)
Skipping frame 26 due to low MAD (0.05)


Propagating pigs:  37%|███▋      | 29/78 [00:11<00:13,  3.61it/s]

Skipping frame 28 due to low MAD (0.05)


Propagating pigs:  40%|███▉      | 31/78 [00:13<00:22,  2.11it/s]

Avg FPS last 30 frames: 2.31


Propagating pigs:  41%|████      | 32/78 [00:13<00:26,  1.76it/s]

Skipping frame 32 due to low MAD (0.05)


Propagating pigs:  46%|████▌     | 36/78 [00:16<00:30,  1.39it/s]

Skipping frame 36 due to low MAD (0.05)


Propagating pigs:  49%|████▊     | 38/78 [00:17<00:25,  1.58it/s]

Skipping frame 38 due to low MAD (0.04)


Propagating pigs:  51%|█████▏    | 40/78 [00:18<00:22,  1.71it/s]

Skipping frame 40 due to low MAD (0.04)


Propagating pigs:  54%|█████▍    | 42/78 [00:19<00:19,  1.82it/s]

Skipping frame 42 due to low MAD (0.05)


Propagating pigs:  60%|██████    | 47/78 [00:22<00:16,  1.85it/s]

Skipping frame 47 due to low MAD (0.04)


Propagating pigs:  64%|██████▍   | 50/78 [00:24<00:17,  1.61it/s]

Skipping frame 50 due to low MAD (0.00)
Skipping frame 51 due to low MAD (0.05)


Propagating pigs:  68%|██████▊   | 53/78 [00:25<00:12,  2.04it/s]

Skipping frame 53 due to low MAD (0.05)


Propagating pigs:  71%|███████   | 55/78 [00:26<00:11,  2.03it/s]

Skipping frame 55 due to low MAD (0.04)


Propagating pigs:  73%|███████▎  | 57/78 [00:27<00:10,  2.01it/s]

Skipping frame 57 due to low MAD (0.04)


Propagating pigs:  76%|███████▌  | 59/78 [00:28<00:09,  2.01it/s]

Skipping frame 59 due to low MAD (0.04)


Propagating pigs:  78%|███████▊  | 61/78 [00:29<00:08,  2.00it/s]

Avg FPS last 30 frames: 1.82
Skipping frame 61 due to low MAD (0.04)


Propagating pigs:  81%|████████  | 63/78 [00:30<00:07,  2.00it/s]

Skipping frame 63 due to low MAD (0.04)


Propagating pigs:  83%|████████▎ | 65/78 [00:31<00:06,  1.99it/s]

Skipping frame 65 due to low MAD (0.04)


Propagating pigs:  86%|████████▌ | 67/78 [00:32<00:05,  1.99it/s]

Skipping frame 67 due to low MAD (0.03)
Skipping frame 68 due to low MAD (0.05)


Propagating pigs:  90%|████████▉ | 70/78 [00:33<00:03,  2.27it/s]

Skipping frame 70 due to low MAD (0.03)
Skipping frame 71 due to low MAD (0.04)


Propagating pigs:  97%|█████████▋| 76/78 [00:34<00:00,  3.55it/s]

Skipping frame 73 due to low MAD (0.03)
Skipping frame 74 due to low MAD (0.04)
Skipping frame 75 due to low MAD (0.04)


propagate in video: 100%|██████████| 79/79 [00:35<00:00,  2.20it/s]
Propagating pigs: 79it [00:35,  2.20it/s]                        


Skipping frame 77 due to low MAD (0.03)
Skipping frame 78 due to low MAD (0.04)
Skipped 39 frames due to low MAD.
→ Saved all predicted masks for pigs in data/DAVIS2017_mgfs_naive_optical/pigs

=== Processing sequence: scooter-black ===


frame loading (JPEG): 100%|██████████| 43/43 [00:01<00:00, 32.46it/s]


Found 2 unique non‐black colors in scooter-black/00000.png


Propagating scooter-black:  74%|███████▍  | 31/42 [00:23<00:08,  1.25it/s]

Avg FPS last 30 frames: 1.28


propagate in video: 100%|██████████| 43/43 [00:31<00:00,  1.36it/s]       
Propagating scooter-black: 43it [00:31,  1.36it/s]


→ Saved all predicted masks for scooter-black in data/DAVIS2017_mgfs_naive_optical/scooter-black

=== Processing sequence: shooting ===


frame loading (JPEG): 100%|██████████| 40/40 [00:01<00:00, 30.24it/s]


Found 3 unique non‐black colors in shooting/00000.png


Propagating shooting:  79%|███████▉  | 31/39 [00:28<00:07,  1.01it/s]

Avg FPS last 30 frames: 1.04


propagate in video: 100%|██████████| 40/40 [00:37<00:00,  1.06it/s]  
Propagating shooting: 40it [00:37,  1.06it/s]


→ Saved all predicted masks for shooting in data/DAVIS2017_mgfs_naive_optical/shooting

=== Processing sequence: soapbox ===


frame loading (JPEG): 100%|██████████| 99/99 [00:03<00:00, 28.21it/s]


Found 3 unique non‐black colors in soapbox/00000.png


Propagating soapbox:   4%|▍         | 4/98 [00:02<00:55,  1.70it/s]

Skipping frame 4 due to low MAD (0.05)


Propagating soapbox:  32%|███▏      | 31/98 [00:26<01:05,  1.03it/s]

Avg FPS last 30 frames: 1.15


Propagating soapbox:  62%|██████▏   | 61/98 [00:53<00:36,  1.03it/s]

Avg FPS last 30 frames: 1.09


Propagating soapbox:  93%|█████████▎| 91/98 [01:23<00:06,  1.00it/s]

Avg FPS last 30 frames: 1.01


propagate in video: 100%|██████████| 99/99 [01:30<00:00,  1.09it/s] 
Propagating soapbox: 99it [01:30,  1.09it/s]

Skipped 1 frames due to low MAD.
→ Saved all predicted masks for soapbox in data/DAVIS2017_mgfs_naive_optical/soapbox

All sequences processed.
Your SAM 2 masks live under: data/DAVIS2017_mgfs_naive_optical



