In [None]:
!pip install compressai



In [None]:
import os, glob, argparse, math, itertools
import torch, torch.nn.functional as F
from PIL import Image
from tqdm import tqdm
from torchvision import transforms
from compressai.zoo import bmshj2018_factorized, ssf2020

  @amp.autocast(enabled=False)


In [None]:
mot17_root = "BoostTrack/data/MOT17/test"
sequence = "MOT17-01-DPM"
img_folder = os.path.join(mot17_root, sequence, 'img1')
output_folder = os.path.join('BoostTrack/data/MOT17-compressed', sequence)
os.makedirs(output_folder, exist_ok=True)

In [None]:
download = False

In [None]:
import os
import zipfile
import gdown

# Download MOT17 test data (Google Drive mirror)
url = "https://motchallenge.net/data/MOT17.zip"  # MOT17-test.zip
output = "MOT17-test.zip"

if download:
    gdown.download(url, output, quiet=False)

    # Unzip and organize folders
    with zipfile.ZipFile(output, 'r') as zip_ref:
        zip_ref.extractall("BoostTrack/data/")

    # Clean up
    os.remove(output)

In [None]:
def pad_to_multiple(x, m):
    """
    Reflect-pad so (H, W) is a multiple of m.
    SSF / other video codecs need m = 128; most image codecs work with 64.
    """
    B, C, H, W = x.shape
    Hp, Wp = (m - H % m) % m, (m - W % m) % m
    return F.pad(x, (0, Wp, 0, Hp), mode="reflect"), (H, W)

def bits_in(strings):
    return sum(len(s) * 8 for s in flatten(strings))

def flatten(l):
    for el in l:
        if isinstance(el, (list, tuple)):
            yield from flatten(el)
        else:
            yield el

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
quality = 1
model = ssf2020(quality=quality, metric='mse', pretrained=True).to(device)
model.eval()

ScaleSpaceFlow(
  (img_encoder): Encoder(
    (0): Conv2d(3, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): Conv2d(128, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (3): ReLU(inplace=True)
    (4): Conv2d(128, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (5): ReLU(inplace=True)
    (6): Conv2d(128, 192, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
  )
  (img_decoder): Decoder(
    (0): ConvTranspose2d(192, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), output_padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): ConvTranspose2d(128, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), output_padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): ConvTranspose2d(128, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), output_padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): ConvTranspose2d(128, 3, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), output_padding=(1, 1))
  )
  (img_hyperprio

In [None]:
video_codec = True
PAD_M = 128
to_tensor = transforms.ToTensor()
frames = sorted(glob.glob(os.path.join(img_folder, "*.jpg"))
                + glob.glob(os.path.join(img_folder, "*.png")))

In [None]:
frame_paths = frames[::10]

In [None]:
img_folder

'BoostTrack/data/MOT17/test/MOT17-01-DPM/img1'

In [None]:
total_bits, orig_hws = 0, []
strings_list, shapes_list = [], []

clip = []
for fp in tqdm(frame_paths, desc="Loading frames"):
    img = Image.open(fp).convert("RGB")
    x   = to_tensor(img).unsqueeze(0).to(device)
    x, hw = pad_to_multiple(x, PAD_M)
    clip.append(x)
    orig_hws.append(hw)

with torch.no_grad():
    strings_list, shapes_list = model.compress(clip)

for i, (s, sh, hw) in enumerate(zip(strings_list, shapes_list, orig_hws)):
    torch.save({"strings": s, "shape": sh, "orig_hw": hw},
                os.path.join(output_folder, f"{i:06d}.pth"))
    total_bits += bits_in(s)

Loading frames: 100%|██████████| 45/45 [00:03<00:00, 13.96it/s]


In [None]:
print(f"Compressed → {output_folder}")
print(f"Total size : {total_bits/8/1024:.1f} kB")

Compressed → BoostTrack/data/MOT17-compressed/MOT17-01-DPM
Total size : 19.9 kB


In [None]:
psnr_sum = 0.0
n_pixels = 0

with torch.no_grad():
    if video_codec:
        recon_clip = model.decompress(strings_list, shapes_list)

    for i, fp in enumerate(tqdm(frame_paths, desc="Scoring")):
        if video_codec:
            x_hat = recon_clip[i]
        else:
            x_hat = model.decompress([strings_list[i]], [shapes_list[i]])[0]

        H, W  = orig_hws[i]
        x_hat = x_hat[..., :H, :W].clamp_(0, 1)

        x_ref = to_tensor(Image.open(fp).convert("RGB")).to(device)
        mse   = F.mse_loss(x_hat, x_ref)
        psnr  = -10 * torch.log10(mse)

        psnr_sum += psnr.item()
        n_pixels += H * W

avg_psnr = psnr_sum / len(frame_paths)
bpp      = total_bits / n_pixels

print(f"\nSequence average →  {bpp:.4f} bpp   |   {avg_psnr:.2f} dB PSNR")

  mse   = F.mse_loss(x_hat, x_ref)
Scoring: 100%|██████████| 45/45 [00:02<00:00, 19.63it/s]


Sequence average →  0.0017 bpp   |   23.08 dB PSNR





In [None]:
import os, glob, math, itertools, shutil, gc
import torch, torch.nn.functional as F
from PIL import Image
from tqdm.auto import tqdm

import numpy as _np
if not hasattr(_np, "object"):
    _np.object = object

from torchvision import transforms
from compressai.zoo import bmshj2018_factorized, ssf2020

import cv2

  if not hasattr(_np, "object"):


In [None]:
mot17_root = "BoostTrack/data/MOT17/test"
out_root   = "BoostTrack/data/MOT17_processed/test"
os.makedirs(out_root, exist_ok=True)

codec_name = "ssf2020"
quality    = 1

# Set GOP_SIZE=None to encode an entire sequence in one call.
GOP_SIZE   = 1 if codec_name.startswith("ssf") else None

device     = "cuda" if torch.cuda.is_available() else "cpu"
print("Using", device)

Using cuda


In [None]:
def pad_to_multiple(x, m):
    """Reflect-pad so (H, W) is divisible by *m*."""
    B, C, H, W = x.shape
    Hp, Wp = (m - H % m) % m, (m - W % m) % m
    return F.pad(x, (0, Wp, 0, Hp), mode="reflect"), (H, W)

def flatten(nested):
    for el in nested:
        if isinstance(el, (list, tuple)):
            yield from flatten(el)
        else:
            yield el

def bits_in(strings):
    return sum(len(s) * 8 for s in flatten(strings))

to_tensor = transforms.ToTensor()

In [None]:
if codec_name == "bmshj2018_factorized":
    model      = bmshj2018_factorized(quality=quality, pretrained=True).eval().to(device)
    video_code = False
    PAD_M      = 64
elif codec_name == "ssf2020":
    model      = ssf2020(quality=quality, pretrained=True).eval().to(device)
    video_code = True
    PAD_M      = 128         # SSF ↓16, hyper-prior ↑8 → needs /128
else:
    raise ValueError(f"Unknown codec {codec_name}")

print(f"Loaded {codec_name}-Q{quality}  (video={video_code})")

Loaded ssf2020-Q1  (video=True)


In [None]:
sequences = [d for d in os.listdir(mot17_root)
             if os.path.isdir(os.path.join(mot17_root, d, "img1"))]
sequences.sort()
print("Found", len(sequences), "sequences:")
for s in sequences: print(" •", s)

Found 21 sequences:
 • MOT17-01-DPM
 • MOT17-01-FRCNN
 • MOT17-01-SDP
 • MOT17-03-DPM
 • MOT17-03-FRCNN
 • MOT17-03-SDP
 • MOT17-06-DPM
 • MOT17-06-FRCNN
 • MOT17-06-SDP
 • MOT17-07-DPM
 • MOT17-07-FRCNN
 • MOT17-07-SDP
 • MOT17-08-DPM
 • MOT17-08-FRCNN
 • MOT17-08-SDP
 • MOT17-12-DPM
 • MOT17-12-FRCNN
 • MOT17-12-SDP
 • MOT17-14-DPM
 • MOT17-14-FRCNN
 • MOT17-14-SDP


In [None]:
torch.cuda.empty_cache()

In [None]:
summary = []          # will collect (seq, frames, bpp, psnr)
for seq in sequences:
    print(f"\n=== {seq} ===")
    img_dir = os.path.join(mot17_root, seq, "img1")
    out_dir = os.path.join(out_root,   seq)
    os.makedirs(out_dir, exist_ok=True)

    out_dir_img1 = os.path.join(out_root, seq, "img1")
    os.makedirs(out_dir_img1, exist_ok=True)

    det_src = os.path.join(mot17_root, seq, "det")
    det_dst = os.path.join(out_root,   seq, "det")
    if not os.path.exists(det_dst):
        shutil.copytree(det_src, det_dst)
    shutil.copy(os.path.join(mot17_root, seq, "seqinfo.ini"),
                os.path.join(out_root,   seq, "seqinfo.ini"))

    frame_paths = sorted(glob.glob(os.path.join(img_dir, "*.jpg")) +
                         glob.glob(os.path.join(img_dir, "*.png")))
    frame_paths = frame_paths[::len(frame_paths)//15]
    n_frames = len(frame_paths)
    print("Frames:", n_frames)

    total_bits, psnr_sum, n_pixels = 0, 0.0, 0

    if video_code:
        # ── process in chunks (=GOPs) to control memory
        gop = n_frames if GOP_SIZE is None else GOP_SIZE
        for g in range(0, n_frames, gop):
            end = min(g + gop, n_frames)
            clip, orig_hws = [], []
            for fp in frame_paths[g:end]:
                img = Image.open(fp).convert("RGB")
                x, hw = pad_to_multiple(to_tensor(img).unsqueeze(0).to(device), PAD_M)
                clip.append(x); orig_hws.append(hw)

            with torch.no_grad():
                strings, shapes = model.compress(clip)

            # save + metrics
            recon = model.decompress(strings, shapes)
            for i, (st, sh, hw, x_hat, fp) in enumerate(zip(
                    strings, shapes, orig_hws, recon, frame_paths[g:end])):
                idx = g + i
                # torch.save({"strings": st, "shape": sh, "orig_hw": hw},
                #            os.path.join(out_dir, f"{idx:06d}.pth"))
                total_bits += bits_in(st)

                H, W = hw
                x_hat = x_hat[..., :H, :W].clamp_(0, 1)
                x_ref = to_tensor(Image.open(fp).convert("RGB")).to(device)
                mse   = F.mse_loss(x_hat, x_ref)
                psnr  = -10 * torch.log10(mse)
                psnr_sum += psnr.item()
                n_pixels += H * W

                rgb8 = (x_hat.squeeze(0).permute(1, 2, 0).clamp_(0, 1).cpu().detach().numpy() * 255).round().astype('uint8')

                # write JPEG with MOT naming
                cv2.imwrite(os.path.join(out_dir_img1, f"{idx+1:06d}.jpg"),
                            cv2.cvtColor(rgb8, cv2.COLOR_RGB2BGR),
                            [cv2.IMWRITE_JPEG_QUALITY, 95])

            # free GPU mem each GOP
            del clip, recon, strings, shapes
            torch.cuda.empty_cache(); gc.collect()

    else:  # ── image codec
        for idx, fp in enumerate(tqdm(frame_paths, desc="Compressing")):
            img = Image.open(fp).convert("RGB")
            x, hw = pad_to_multiple(to_tensor(img).unsqueeze(0).to(device), PAD_M)
            with torch.no_grad():
                out = model.compress(x)
                x_hat = model.decompress(out["strings"], out["shape"])[0]

            torch.save({"strings": out["strings"], "shape": out["shape"], "orig_hw": hw},
                       os.path.join(out_dir, f"{idx:06d}.pth"))
            total_bits += bits_in(out["strings"])

            H, W  = hw
            x_hat = x_hat[..., :H, :W].clamp_(0, 1)
            mse   = F.mse_loss(x_hat, to_tensor(img).to(device))
            psnr  = -10 * torch.log10(mse)
            psnr_sum += psnr.item()
            n_pixels += H * W

    bpp  = total_bits / n_pixels
    psnr = psnr_sum / n_frames
    summary.append((seq, n_frames, bpp, psnr))
    print(f"→ {bpp:.4f} bpp   |   {psnr:.2f} dB PSNR")


=== MOT17-01-DPM ===
Frames: 15


  mse   = F.mse_loss(x_hat, x_ref)


→ 0.0751 bpp   |   29.13 dB PSNR

=== MOT17-01-FRCNN ===
Frames: 15
→ 0.0751 bpp   |   27.92 dB PSNR

=== MOT17-01-SDP ===
Frames: 15
→ 0.0751 bpp   |   28.51 dB PSNR

=== MOT17-03-DPM ===
Frames: 15
→ 0.0630 bpp   |   32.29 dB PSNR

=== MOT17-03-FRCNN ===
Frames: 15
→ 0.0630 bpp   |   31.99 dB PSNR

=== MOT17-03-SDP ===
Frames: 15
→ 0.0630 bpp   |   31.34 dB PSNR

=== MOT17-06-DPM ===
Frames: 16


  mse   = F.mse_loss(x_hat, x_ref)


→ 0.0622 bpp   |   32.18 dB PSNR

=== MOT17-06-FRCNN ===
Frames: 16
→ 0.0622 bpp   |   32.18 dB PSNR

=== MOT17-06-SDP ===
Frames: 16
→ 0.0622 bpp   |   32.18 dB PSNR

=== MOT17-07-DPM ===
Frames: 16
→ 0.0748 bpp   |   29.35 dB PSNR

=== MOT17-07-FRCNN ===
Frames: 16
→ 0.0748 bpp   |   30.24 dB PSNR

=== MOT17-07-SDP ===
Frames: 16
→ 0.0748 bpp   |   30.74 dB PSNR

=== MOT17-08-DPM ===
Frames: 16
→ 0.1234 bpp   |   28.92 dB PSNR

=== MOT17-08-FRCNN ===
Frames: 16
→ 0.1234 bpp   |   28.52 dB PSNR

=== MOT17-08-SDP ===
Frames: 16
→ 0.1234 bpp   |   28.92 dB PSNR

=== MOT17-12-DPM ===
Frames: 15
→ 0.0567 bpp   |   32.63 dB PSNR

=== MOT17-12-FRCNN ===
Frames: 15
→ 0.0567 bpp   |   33.85 dB PSNR

=== MOT17-12-SDP ===
Frames: 15
→ 0.0567 bpp   |   32.63 dB PSNR

=== MOT17-14-DPM ===
Frames: 15
→ 0.0783 bpp   |   32.23 dB PSNR

=== MOT17-14-FRCNN ===
Frames: 15
→ 0.0783 bpp   |   31.09 dB PSNR

=== MOT17-14-SDP ===
Frames: 15
→ 0.0783 bpp   |   32.23 dB PSNR


In [None]:
print("\n===========  SUMMARY  ===========")
print(f"{'Sequence':20}  Frames   BPP     PSNR")
for seq, n, bpp, psnr in summary:
    print(f"{seq:20}  {n:6d}   {bpp:5.4f}   {psnr:6.2f}")
overall_bpp  = sum(bpp*n for (_,n,bpp,_) in summary) / sum(n for (_,n,_,_) in summary)
overall_psnr = sum(psnr*n for (_,n,_,psnr) in summary) / sum(n for (_,n,_,_) in summary)
print("----------------------------------------------")
print(f"{'Overall':20}          {overall_bpp:5.4f}   {overall_psnr:6.2f}")


Sequence              Frames   BPP     PSNR
MOT17-01-DPM              15   0.0751    29.13
MOT17-01-FRCNN            15   0.0751    27.92
MOT17-01-SDP              15   0.0751    28.51
MOT17-03-DPM              15   0.0630    32.29
MOT17-03-FRCNN            15   0.0630    31.99
MOT17-03-SDP              15   0.0630    31.34
MOT17-06-DPM              16   0.0622    32.18
MOT17-06-FRCNN            16   0.0622    32.18
MOT17-06-SDP              16   0.0622    32.18
MOT17-07-DPM              16   0.0748    29.35
MOT17-07-FRCNN            16   0.0748    30.24
MOT17-07-SDP              16   0.0748    30.74
MOT17-08-DPM              16   0.1234    28.92
MOT17-08-FRCNN            16   0.1234    28.52
MOT17-08-SDP              16   0.1234    28.92
MOT17-12-DPM              15   0.0567    32.63
MOT17-12-FRCNN            15   0.0567    33.85
MOT17-12-SDP              15   0.0567    32.63
MOT17-14-DPM              15   0.0783    32.23
MOT17-14-FRCNN            15   0.0783    31.09
MOT17-14-SDP   

In [None]:
mot17_root = "BoostTrack/data/MOT17/train"
out_root   = "BoostTrack/data/MOT17_processed/train"

In [None]:
sequences = [d for d in os.listdir(mot17_root)
             if os.path.isdir(os.path.join(mot17_root, d, "img1"))]
sequences.sort()
print("Found", len(sequences), "sequences:")
for s in sequences: print(" •", s)

Found 21 sequences:
 • MOT17-02-DPM
 • MOT17-02-FRCNN
 • MOT17-02-SDP
 • MOT17-04-DPM
 • MOT17-04-FRCNN
 • MOT17-04-SDP
 • MOT17-05-DPM
 • MOT17-05-FRCNN
 • MOT17-05-SDP
 • MOT17-09-DPM
 • MOT17-09-FRCNN
 • MOT17-09-SDP
 • MOT17-10-DPM
 • MOT17-10-FRCNN
 • MOT17-10-SDP
 • MOT17-11-DPM
 • MOT17-11-FRCNN
 • MOT17-11-SDP
 • MOT17-13-DPM
 • MOT17-13-FRCNN
 • MOT17-13-SDP


In [None]:
torch.cuda.empty_cache()

In [None]:
summary = []          # will collect (seq, frames, bpp, psnr)
for seq in sequences:
    print(f"\n=== {seq} ===")
    img_dir = os.path.join(mot17_root, seq, "img1")
    out_dir = os.path.join(out_root,   seq)
    os.makedirs(out_dir, exist_ok=True)

    out_dir_img1 = os.path.join(out_root, seq, "img1")
    os.makedirs(out_dir_img1, exist_ok=True)

    det_src = os.path.join(mot17_root, seq, "det")
    det_dst = os.path.join(out_root,   seq, "det")
    if not os.path.exists(det_dst):
        shutil.copytree(det_src, det_dst)
    shutil.copy(os.path.join(mot17_root, seq, "seqinfo.ini"),
                os.path.join(out_root,   seq, "seqinfo.ini"))

    frame_paths = sorted(glob.glob(os.path.join(img_dir, "*.jpg")) +
                         glob.glob(os.path.join(img_dir, "*.png")))
    frame_paths = frame_paths[::len(frame_paths)//15]
    n_frames = len(frame_paths)
    print("Frames:", n_frames)

    total_bits, psnr_sum, n_pixels = 0, 0.0, 0

    if video_code:
        # ── process in chunks (=GOPs) to control memory
        gop = n_frames if GOP_SIZE is None else GOP_SIZE
        for g in range(0, n_frames, gop):
            end = min(g + gop, n_frames)
            clip, orig_hws = [], []
            for fp in frame_paths[g:end]:
                img = Image.open(fp).convert("RGB")
                x, hw = pad_to_multiple(to_tensor(img).unsqueeze(0).to(device), PAD_M)
                clip.append(x); orig_hws.append(hw)

            with torch.no_grad():
                strings, shapes = model.compress(clip)

            # save + metrics
            recon = model.decompress(strings, shapes)
            for i, (st, sh, hw, x_hat, fp) in enumerate(zip(
                    strings, shapes, orig_hws, recon, frame_paths[g:end])):
                idx = g + i
                # torch.save({"strings": st, "shape": sh, "orig_hw": hw},
                #            os.path.join(out_dir, f"{idx:06d}.pth"))
                total_bits += bits_in(st)

                H, W = hw
                x_hat = x_hat[..., :H, :W].clamp_(0, 1)
                x_ref = to_tensor(Image.open(fp).convert("RGB")).to(device)
                mse   = F.mse_loss(x_hat, x_ref)
                psnr  = -10 * torch.log10(mse)
                psnr_sum += psnr.item()
                n_pixels += H * W

                rgb8 = (x_hat.squeeze(0).permute(1, 2, 0).clamp_(0, 1).cpu().detach().numpy() * 255).round().astype('uint8')

                # write JPEG with MOT naming
                cv2.imwrite(os.path.join(out_dir_img1, f"{idx+1:06d}.jpg"),
                            cv2.cvtColor(rgb8, cv2.COLOR_RGB2BGR),
                            [cv2.IMWRITE_JPEG_QUALITY, 95])

            # free GPU mem each GOP
            del clip, recon, strings, shapes
            torch.cuda.empty_cache(); gc.collect()

    else:  # ── image codec
        for idx, fp in enumerate(tqdm(frame_paths, desc="Compressing")):
            img = Image.open(fp).convert("RGB")
            x, hw = pad_to_multiple(to_tensor(img).unsqueeze(0).to(device), PAD_M)
            with torch.no_grad():
                out = model.compress(x)
                x_hat = model.decompress(out["strings"], out["shape"])[0]

            torch.save({"strings": out["strings"], "shape": out["shape"], "orig_hw": hw},
                       os.path.join(out_dir, f"{idx:06d}.pth"))
            total_bits += bits_in(out["strings"])

            H, W  = hw
            x_hat = x_hat[..., :H, :W].clamp_(0, 1)
            mse   = F.mse_loss(x_hat, to_tensor(img).to(device))
            psnr  = -10 * torch.log10(mse)
            psnr_sum += psnr.item()
            n_pixels += H * W

    bpp  = total_bits / n_pixels
    psnr = psnr_sum / n_frames
    summary.append((seq, n_frames, bpp, psnr))
    print(f"→ {bpp:.4f} bpp   |   {psnr:.2f} dB PSNR")


=== MOT17-02-DPM ===
Frames: 15


  mse   = F.mse_loss(x_hat, x_ref)


→ 0.0843 bpp   |   29.51 dB PSNR

=== MOT17-02-FRCNN ===
Frames: 15
→ 0.0843 bpp   |   30.66 dB PSNR

=== MOT17-02-SDP ===
Frames: 15
→ 0.0843 bpp   |   30.01 dB PSNR

=== MOT17-04-DPM ===
Frames: 15
→ 0.0528 bpp   |   33.58 dB PSNR

=== MOT17-04-FRCNN ===
Frames: 15
→ 0.0528 bpp   |   33.07 dB PSNR

=== MOT17-04-SDP ===
Frames: 15
→ 0.0528 bpp   |   33.00 dB PSNR

=== MOT17-05-DPM ===
Frames: 16


  mse   = F.mse_loss(x_hat, x_ref)


→ 0.0828 bpp   |   31.28 dB PSNR

=== MOT17-05-FRCNN ===
Frames: 16
→ 0.0828 bpp   |   30.47 dB PSNR

=== MOT17-05-SDP ===
Frames: 16
→ 0.0828 bpp   |   31.28 dB PSNR

=== MOT17-09-DPM ===
Frames: 15
→ 0.0803 bpp   |   30.56 dB PSNR

=== MOT17-09-FRCNN ===
Frames: 15
→ 0.0803 bpp   |   30.74 dB PSNR

=== MOT17-09-SDP ===
Frames: 15
→ 0.0803 bpp   |   28.55 dB PSNR

=== MOT17-10-DPM ===
Frames: 16
→ 0.0533 bpp   |   31.79 dB PSNR

=== MOT17-10-FRCNN ===
Frames: 16
→ 0.0533 bpp   |   32.94 dB PSNR

=== MOT17-10-SDP ===
Frames: 16
→ 0.0533 bpp   |   33.44 dB PSNR

=== MOT17-11-DPM ===
Frames: 15
→ 0.0630 bpp   |   33.28 dB PSNR

=== MOT17-11-FRCNN ===
Frames: 15
→ 0.0630 bpp   |   31.60 dB PSNR

=== MOT17-11-SDP ===
Frames: 15
→ 0.0630 bpp   |   33.28 dB PSNR

=== MOT17-13-DPM ===
Frames: 15
→ 0.0672 bpp   |   33.25 dB PSNR

=== MOT17-13-FRCNN ===
Frames: 15
→ 0.0672 bpp   |   31.15 dB PSNR

=== MOT17-13-SDP ===
Frames: 15
→ 0.0672 bpp   |   33.25 dB PSNR


In [None]:
print("\n===========  SUMMARY  ===========")
print(f"{'Sequence':20}  Frames   BPP     PSNR")
for seq, n, bpp, psnr in summary:
    print(f"{seq:20}  {n:6d}   {bpp:5.4f}   {psnr:6.2f}")
overall_bpp  = sum(bpp*n for (_,n,bpp,_) in summary) / sum(n for (_,n,_,_) in summary)
overall_psnr = sum(psnr*n for (_,n,_,psnr) in summary) / sum(n for (_,n,_,_) in summary)
print("----------------------------------------------")
print(f"{'Overall':20}          {overall_bpp:5.4f}   {overall_psnr:6.2f}")


Sequence              Frames   BPP     PSNR
MOT17-02-DPM              15   0.0843    29.51
MOT17-02-FRCNN            15   0.0843    30.66
MOT17-02-SDP              15   0.0843    30.01
MOT17-04-DPM              15   0.0528    33.58
MOT17-04-FRCNN            15   0.0528    33.07
MOT17-04-SDP              15   0.0528    33.00
MOT17-05-DPM              16   0.0828    31.28
MOT17-05-FRCNN            16   0.0828    30.47
MOT17-05-SDP              16   0.0828    31.28
MOT17-09-DPM              15   0.0803    30.56
MOT17-09-FRCNN            15   0.0803    30.74
MOT17-09-SDP              15   0.0803    28.55
MOT17-10-DPM              16   0.0533    31.79
MOT17-10-FRCNN            16   0.0533    32.94
MOT17-10-SDP              16   0.0533    33.44
MOT17-11-DPM              15   0.0630    33.28
MOT17-11-FRCNN            15   0.0630    31.60
MOT17-11-SDP              15   0.0630    33.28
MOT17-13-DPM              15   0.0672    33.25
MOT17-13-FRCNN            15   0.0672    31.15
MOT17-13-SDP   

In [None]:
quality  = 9

In [None]:
if codec_name == "bmshj2018_factorized":
    model      = bmshj2018_factorized(quality=quality, pretrained=True).eval().to(device)
    video_code = False
    PAD_M      = 64
elif codec_name == "ssf2020":
    model      = ssf2020(quality=quality, pretrained=True).eval().to(device)
    video_code = True
    PAD_M      = 128         # SSF ↓16, hyper-prior ↑8 → needs /128
else:
    raise ValueError(f"Unknown codec {codec_name}")

print(f"Loaded {codec_name}-Q{quality}  (video={video_code})")

Loaded ssf2020-Q9  (video=True)


In [None]:
sequences = [d for d in os.listdir(mot17_root)
             if os.path.isdir(os.path.join(mot17_root, d, "img1"))]
sequences.sort()
print("Found", len(sequences), "sequences:")
for s in sequences: print(" •", s)

Found 21 sequences:
 • MOT17-02-DPM
 • MOT17-02-FRCNN
 • MOT17-02-SDP
 • MOT17-04-DPM
 • MOT17-04-FRCNN
 • MOT17-04-SDP
 • MOT17-05-DPM
 • MOT17-05-FRCNN
 • MOT17-05-SDP
 • MOT17-09-DPM
 • MOT17-09-FRCNN
 • MOT17-09-SDP
 • MOT17-10-DPM
 • MOT17-10-FRCNN
 • MOT17-10-SDP
 • MOT17-11-DPM
 • MOT17-11-FRCNN
 • MOT17-11-SDP
 • MOT17-13-DPM
 • MOT17-13-FRCNN
 • MOT17-13-SDP


In [None]:
torch.cuda.empty_cache()

In [None]:
summary = []          # will collect (seq, frames, bpp, psnr)
for seq in sequences:
    print(f"\n=== {seq} ===")
    img_dir = os.path.join(mot17_root, seq, "img1")
    out_dir = os.path.join(out_root,   seq)
    os.makedirs(out_dir, exist_ok=True)

    out_dir_img1 = os.path.join(out_root, seq, "img1")
    os.makedirs(out_dir_img1, exist_ok=True)

    det_src = os.path.join(mot17_root, seq, "det")
    det_dst = os.path.join(out_root,   seq, "det")
    if not os.path.exists(det_dst):
        shutil.copytree(det_src, det_dst)
    shutil.copy(os.path.join(mot17_root, seq, "seqinfo.ini"),
                os.path.join(out_root,   seq, "seqinfo.ini"))

    frame_paths = sorted(glob.glob(os.path.join(img_dir, "*.jpg")) +
                         glob.glob(os.path.join(img_dir, "*.png")))
    frame_paths = frame_paths[::len(frame_paths)//15]
    n_frames = len(frame_paths)
    print("Frames:", n_frames)

    total_bits, psnr_sum, n_pixels = 0, 0.0, 0

    if video_code:
        # ── process in chunks (=GOPs) to control memory
        gop = n_frames if GOP_SIZE is None else GOP_SIZE
        for g in range(0, n_frames, gop):
            end = min(g + gop, n_frames)
            clip, orig_hws = [], []
            for fp in frame_paths[g:end]:
                img = Image.open(fp).convert("RGB")
                x, hw = pad_to_multiple(to_tensor(img).unsqueeze(0).to(device), PAD_M)
                clip.append(x); orig_hws.append(hw)

            with torch.no_grad():
                strings, shapes = model.compress(clip)

            # save + metrics
            recon = model.decompress(strings, shapes)
            for i, (st, sh, hw, x_hat, fp) in enumerate(zip(
                    strings, shapes, orig_hws, recon, frame_paths[g:end])):
                idx = g + i
                # torch.save({"strings": st, "shape": sh, "orig_hw": hw},
                #            os.path.join(out_dir, f"{idx:06d}.pth"))
                total_bits += bits_in(st)

                H, W = hw
                x_hat = x_hat[..., :H, :W].clamp_(0, 1)
                x_ref = to_tensor(Image.open(fp).convert("RGB")).to(device)
                mse   = F.mse_loss(x_hat, x_ref)
                psnr  = -10 * torch.log10(mse)
                psnr_sum += psnr.item()
                n_pixels += H * W

                rgb8 = (x_hat.squeeze(0).permute(1, 2, 0).clamp_(0, 1).cpu().detach().numpy() * 255).round().astype('uint8')

                # write JPEG with MOT naming
                cv2.imwrite(os.path.join(out_dir_img1, f"{idx+1:06d}.jpg"),
                            cv2.cvtColor(rgb8, cv2.COLOR_RGB2BGR),
                            [cv2.IMWRITE_JPEG_QUALITY, 95])

            # free GPU mem each GOP
            del clip, recon, strings, shapes
            torch.cuda.empty_cache(); gc.collect()

    else:  # ── image codec
        for idx, fp in enumerate(tqdm(frame_paths, desc="Compressing")):
            img = Image.open(fp).convert("RGB")
            x, hw = pad_to_multiple(to_tensor(img).unsqueeze(0).to(device), PAD_M)
            with torch.no_grad():
                out = model.compress(x)
                x_hat = model.decompress(out["strings"], out["shape"])[0]

            torch.save({"strings": out["strings"], "shape": out["shape"], "orig_hw": hw},
                       os.path.join(out_dir, f"{idx:06d}.pth"))
            total_bits += bits_in(out["strings"])

            H, W  = hw
            x_hat = x_hat[..., :H, :W].clamp_(0, 1)
            mse   = F.mse_loss(x_hat, to_tensor(img).to(device))
            psnr  = -10 * torch.log10(mse)
            psnr_sum += psnr.item()
            n_pixels += H * W

    bpp  = total_bits / n_pixels
    psnr = psnr_sum / n_frames
    summary.append((seq, n_frames, bpp, psnr))
    print(f"→ {bpp:.4f} bpp   |   {psnr:.2f} dB PSNR")


=== MOT17-02-DPM ===
Frames: 15


  mse   = F.mse_loss(x_hat, x_ref)


→ 1.2291 bpp   |   28.48 dB PSNR

=== MOT17-02-FRCNN ===
Frames: 15
→ 1.2291 bpp   |   31.13 dB PSNR

=== MOT17-02-SDP ===
Frames: 15
→ 1.2291 bpp   |   34.85 dB PSNR

=== MOT17-04-DPM ===
Frames: 15
→ 0.8445 bpp   |   38.70 dB PSNR

=== MOT17-04-FRCNN ===
Frames: 15
→ 0.8445 bpp   |   34.61 dB PSNR

=== MOT17-04-SDP ===
Frames: 15
→ 0.8445 bpp   |   36.38 dB PSNR

=== MOT17-05-DPM ===
Frames: 16


  mse   = F.mse_loss(x_hat, x_ref)


→ 1.1990 bpp   |   42.06 dB PSNR

=== MOT17-05-FRCNN ===
Frames: 16
→ 1.1990 bpp   |   42.06 dB PSNR

=== MOT17-05-SDP ===
Frames: 16
→ 1.1990 bpp   |   42.14 dB PSNR

=== MOT17-09-DPM ===
Frames: 15
→ 1.2788 bpp   |   33.11 dB PSNR

=== MOT17-09-FRCNN ===
Frames: 15
→ 1.2788 bpp   |   35.16 dB PSNR

=== MOT17-09-SDP ===
Frames: 15
→ 1.2788 bpp   |   35.31 dB PSNR

=== MOT17-10-DPM ===
Frames: 16
→ 0.7536 bpp   |   42.64 dB PSNR

=== MOT17-10-FRCNN ===
Frames: 16
→ 0.7536 bpp   |   41.12 dB PSNR

=== MOT17-10-SDP ===
Frames: 16
→ 0.7536 bpp   |   34.08 dB PSNR

=== MOT17-11-DPM ===
Frames: 15
→ 0.8594 bpp   |   42.12 dB PSNR

=== MOT17-11-FRCNN ===
Frames: 15
→ 0.8594 bpp   |   40.59 dB PSNR

=== MOT17-11-SDP ===
Frames: 15
→ 0.8594 bpp   |   38.14 dB PSNR

=== MOT17-13-DPM ===
Frames: 15
→ 0.7954 bpp   |   30.25 dB PSNR

=== MOT17-13-FRCNN ===
Frames: 15
→ 0.7954 bpp   |   34.48 dB PSNR

=== MOT17-13-SDP ===
Frames: 15
→ 0.7954 bpp   |   38.38 dB PSNR


In [None]:
print("\n===========  SUMMARY  ===========")
print(f"{'Sequence':20}  Frames   BPP     PSNR")
for seq, n, bpp, psnr in summary:
    print(f"{seq:20}  {n:6d}   {bpp:5.4f}   {psnr:6.2f}")
overall_bpp  = sum(bpp*n for (_,n,bpp,_) in summary) / sum(n for (_,n,_,_) in summary)
overall_psnr = sum(psnr*n for (_,n,_,psnr) in summary) / sum(n for (_,n,_,_) in summary)
print("----------------------------------------------")
print(f"{'Overall':20}          {overall_bpp:5.4f}   {overall_psnr:6.2f}")


Sequence              Frames   BPP     PSNR
MOT17-02-DPM              15   1.2291    28.48
MOT17-02-FRCNN            15   1.2291    31.13
MOT17-02-SDP              15   1.2291    34.85
MOT17-04-DPM              15   0.8445    38.70
MOT17-04-FRCNN            15   0.8445    34.61
MOT17-04-SDP              15   0.8445    36.38
MOT17-05-DPM              16   1.1990    42.06
MOT17-05-FRCNN            16   1.1990    42.06
MOT17-05-SDP              16   1.1990    42.14
MOT17-09-DPM              15   1.2788    33.11
MOT17-09-FRCNN            15   1.2788    35.16
MOT17-09-SDP              15   1.2788    35.31
MOT17-10-DPM              16   0.7536    42.64
MOT17-10-FRCNN            16   0.7536    41.12
MOT17-10-SDP              16   0.7536    34.08
MOT17-11-DPM              15   0.8594    42.12
MOT17-11-FRCNN            15   0.8594    40.59
MOT17-11-SDP              15   0.8594    38.14
MOT17-13-DPM              15   0.7954    30.25
MOT17-13-FRCNN            15   0.7954    34.48
MOT17-13-SDP   

In [None]:
mot17_root = "BoostTrack/data/MOT17/train"
out_root   = "BoostTrack/data/MOT17_processed/train"

In [None]:
sequences = [d for d in os.listdir(mot17_root)
             if os.path.isdir(os.path.join(mot17_root, d, "img1"))]
sequences.sort()
print("Found", len(sequences), "sequences:")
for s in sequences: print(" •", s)

Found 21 sequences:
 • MOT17-02-DPM
 • MOT17-02-FRCNN
 • MOT17-02-SDP
 • MOT17-04-DPM
 • MOT17-04-FRCNN
 • MOT17-04-SDP
 • MOT17-05-DPM
 • MOT17-05-FRCNN
 • MOT17-05-SDP
 • MOT17-09-DPM
 • MOT17-09-FRCNN
 • MOT17-09-SDP
 • MOT17-10-DPM
 • MOT17-10-FRCNN
 • MOT17-10-SDP
 • MOT17-11-DPM
 • MOT17-11-FRCNN
 • MOT17-11-SDP
 • MOT17-13-DPM
 • MOT17-13-FRCNN
 • MOT17-13-SDP


In [None]:
torch.cuda.empty_cache()

In [None]:
summary = []          # will collect (seq, frames, bpp, psnr)
for seq in sequences:
    print(f"\n=== {seq} ===")
    img_dir = os.path.join(mot17_root, seq, "img1")
    out_dir = os.path.join(out_root,   seq)
    os.makedirs(out_dir, exist_ok=True)

    out_dir_img1 = os.path.join(out_root, seq, "img1")
    os.makedirs(out_dir_img1, exist_ok=True)

    det_src = os.path.join(mot17_root, seq, "det")
    det_dst = os.path.join(out_root,   seq, "det")
    if not os.path.exists(det_dst):
        shutil.copytree(det_src, det_dst)
    shutil.copy(os.path.join(mot17_root, seq, "seqinfo.ini"),
                os.path.join(out_root,   seq, "seqinfo.ini"))

    frame_paths = sorted(glob.glob(os.path.join(img_dir, "*.jpg")) +
                         glob.glob(os.path.join(img_dir, "*.png")))
    frame_paths = frame_paths[::len(frame_paths)//15]
    n_frames = len(frame_paths)
    print("Frames:", n_frames)

    total_bits, psnr_sum, n_pixels = 0, 0.0, 0

    if video_code:
        # ── process in chunks (=GOPs) to control memory
        gop = n_frames if GOP_SIZE is None else GOP_SIZE
        for g in range(0, n_frames, gop):
            end = min(g + gop, n_frames)
            clip, orig_hws = [], []
            for fp in frame_paths[g:end]:
                img = Image.open(fp).convert("RGB")
                x, hw = pad_to_multiple(to_tensor(img).unsqueeze(0).to(device), PAD_M)
                clip.append(x); orig_hws.append(hw)

            with torch.no_grad():
                strings, shapes = model.compress(clip)

            # save + metrics
            recon = model.decompress(strings, shapes)
            for i, (st, sh, hw, x_hat, fp) in enumerate(zip(
                    strings, shapes, orig_hws, recon, frame_paths[g:end])):
                idx = g + i
                # torch.save({"strings": st, "shape": sh, "orig_hw": hw},
                #            os.path.join(out_dir, f"{idx:06d}.pth"))
                total_bits += bits_in(st)

                H, W = hw
                x_hat = x_hat[..., :H, :W].clamp_(0, 1)
                x_ref = to_tensor(Image.open(fp).convert("RGB")).to(device)
                mse   = F.mse_loss(x_hat, x_ref)
                psnr  = -10 * torch.log10(mse)
                psnr_sum += psnr.item()
                n_pixels += H * W

                rgb8 = (x_hat.squeeze(0).permute(1, 2, 0).clamp_(0, 1).cpu().detach().numpy() * 255).round().astype('uint8')

                # write JPEG with MOT naming
                cv2.imwrite(os.path.join(out_dir_img1, f"{idx+1:06d}.jpg"),
                            cv2.cvtColor(rgb8, cv2.COLOR_RGB2BGR),
                            [cv2.IMWRITE_JPEG_QUALITY, 95])

            # free GPU mem each GOP
            del clip, recon, strings, shapes
            torch.cuda.empty_cache(); gc.collect()

    else:  # ── image codec
        for idx, fp in enumerate(tqdm(frame_paths, desc="Compressing")):
            img = Image.open(fp).convert("RGB")
            x, hw = pad_to_multiple(to_tensor(img).unsqueeze(0).to(device), PAD_M)
            with torch.no_grad():
                out = model.compress(x)
                x_hat = model.decompress(out["strings"], out["shape"])[0]

            torch.save({"strings": out["strings"], "shape": out["shape"], "orig_hw": hw},
                       os.path.join(out_dir, f"{idx:06d}.pth"))
            total_bits += bits_in(out["strings"])

            H, W  = hw
            x_hat = x_hat[..., :H, :W].clamp_(0, 1)
            mse   = F.mse_loss(x_hat, to_tensor(img).to(device))
            psnr  = -10 * torch.log10(mse)
            psnr_sum += psnr.item()
            n_pixels += H * W

    bpp  = total_bits / n_pixels
    psnr = psnr_sum / n_frames
    summary.append((seq, n_frames, bpp, psnr))
    print(f"→ {bpp:.4f} bpp   |   {psnr:.2f} dB PSNR")


=== MOT17-02-DPM ===
Frames: 15


  mse   = F.mse_loss(x_hat, x_ref)


→ 1.2291 bpp   |   26.53 dB PSNR

=== MOT17-02-FRCNN ===
Frames: 15
→ 1.2291 bpp   |   31.76 dB PSNR

=== MOT17-02-SDP ===
Frames: 15
→ 1.2291 bpp   |   30.92 dB PSNR

=== MOT17-04-DPM ===
Frames: 15
→ 0.8445 bpp   |   36.74 dB PSNR

=== MOT17-04-FRCNN ===
Frames: 15
→ 0.8445 bpp   |   37.40 dB PSNR

=== MOT17-04-SDP ===
Frames: 15
→ 0.8445 bpp   |   38.68 dB PSNR

=== MOT17-05-DPM ===
Frames: 16


  mse   = F.mse_loss(x_hat, x_ref)


→ 1.1990 bpp   |   40.25 dB PSNR

=== MOT17-05-FRCNN ===
Frames: 16
→ 1.1990 bpp   |   42.14 dB PSNR

=== MOT17-05-SDP ===
Frames: 16
→ 1.1990 bpp   |   43.97 dB PSNR

=== MOT17-09-DPM ===
Frames: 15
→ 1.2788 bpp   |   36.21 dB PSNR

=== MOT17-09-FRCNN ===
Frames: 15
→ 1.2788 bpp   |   39.43 dB PSNR

=== MOT17-09-SDP ===
Frames: 15
→ 1.2788 bpp   |   36.87 dB PSNR

=== MOT17-10-DPM ===
Frames: 16
→ 0.7536 bpp   |   35.04 dB PSNR

=== MOT17-10-FRCNN ===
Frames: 16
→ 0.7536 bpp   |   40.78 dB PSNR

=== MOT17-10-SDP ===
Frames: 16
→ 0.7536 bpp   |   42.68 dB PSNR

=== MOT17-11-DPM ===
Frames: 15
→ 0.8594 bpp   |   38.37 dB PSNR

=== MOT17-11-FRCNN ===
Frames: 15
→ 0.8594 bpp   |   35.84 dB PSNR

=== MOT17-11-SDP ===
Frames: 15
→ 0.8594 bpp   |   40.83 dB PSNR

=== MOT17-13-DPM ===
Frames: 15
→ 0.7954 bpp   |   34.28 dB PSNR

=== MOT17-13-FRCNN ===
Frames: 15
→ 0.7954 bpp   |   36.41 dB PSNR

=== MOT17-13-SDP ===
Frames: 15
→ 0.7954 bpp   |   36.50 dB PSNR


In [None]:
print("\n===========  SUMMARY  ===========")
print(f"{'Sequence':20}  Frames   BPP     PSNR")
for seq, n, bpp, psnr in summary:
    print(f"{seq:20}  {n:6d}   {bpp:5.4f}   {psnr:6.2f}")
overall_bpp  = sum(bpp*n for (_,n,bpp,_) in summary) / sum(n for (_,n,_,_) in summary)
overall_psnr = sum(psnr*n for (_,n,_,psnr) in summary) / sum(n for (_,n,_,_) in summary)
print("----------------------------------------------")
print(f"{'Overall':20}          {overall_bpp:5.4f}   {overall_psnr:6.2f}")


Sequence              Frames   BPP     PSNR
MOT17-02-DPM              15   1.2291    26.53
MOT17-02-FRCNN            15   1.2291    31.76
MOT17-02-SDP              15   1.2291    30.92
MOT17-04-DPM              15   0.8445    36.74
MOT17-04-FRCNN            15   0.8445    37.40
MOT17-04-SDP              15   0.8445    38.68
MOT17-05-DPM              16   1.1990    40.25
MOT17-05-FRCNN            16   1.1990    42.14
MOT17-05-SDP              16   1.1990    43.97
MOT17-09-DPM              15   1.2788    36.21
MOT17-09-FRCNN            15   1.2788    39.43
MOT17-09-SDP              15   1.2788    36.87
MOT17-10-DPM              16   0.7536    35.04
MOT17-10-FRCNN            16   0.7536    40.78
MOT17-10-SDP              16   0.7536    42.68
MOT17-11-DPM              15   0.8594    38.37
MOT17-11-FRCNN            15   0.8594    35.84
MOT17-11-SDP              15   0.8594    40.83
MOT17-13-DPM              15   0.7954    34.28
MOT17-13-FRCNN            15   0.7954    36.41
MOT17-13-SDP   