# High Resolution inference 

In [1]:
# run in the root of the repository
%load_ext autoreload
%autoreload 2
 
%cd ../..

/private/home/pfz/09-videoseal/videoseal


In [2]:
from videoseal.utils.display import save_img
from videoseal.utils import Timer
from videoseal.evals.full import setup_model_from_checkpoint
from videoseal.evals.metrics import bit_accuracy, psnr, ssim
from videoseal.augmentation import Identity, JPEG
from videoseal.modules.jnd import JND, VarianceBasedJND

import os
import omegaconf
from tqdm import tqdm
import gc
from PIL import Image

import torch
import torchvision

to_tensor = torchvision.transforms.ToTensor()
to_pil = torchvision.transforms.ToPILImage()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu" 

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
# Directory containing videos
num_imgs = 50
# assets_dir = "/private/home/pfz/_images"
# assets_dir = "/checkpoint/pfz/projects/videoseal/assets/imgs"
assets_dir = "/private/home/pfz/_images/synthid"
base_output_dir = "outputs"
os.makedirs(base_output_dir, exist_ok=True)

# Checkpoint
ckpts = {
    # "vs2": "/checkpoint/pfz/2025_logs/0306_vseal_ydisc_release_bis/_nbits=256/checkpoint600.pth",
    "vs3": "/checkpoint/soucek/2025_logs/0926_vseal_aws-dynsize768-aug_size_sweep-discfrom100/_embedder_model=1_augmentation_config=1/checkpoint.pth",
}

for ckpt_name, ckpt_path in ckpts.items():

    output_dir = os.path.join(base_output_dir, ckpt_name)
    os.makedirs(output_dir, exist_ok=True)

    # a timer to measure the time
    timer = Timer()

    # Iterate over all checkpoints
    wam = setup_model_from_checkpoint(ckpt_path)
    wam.eval()
    wam.to(device)

    # wam.blender.scaling_w = 0.2

    # Iterate over all video files in the directory
    files = [f for f in os.listdir(assets_dir) if f.endswith(".png") or f.endswith(".jpg")]
    files = [os.path.join(assets_dir, f) for f in files]
    files = files[:num_imgs]

    for file in tqdm(files, desc=f"Processing Images"):
        # load image
        imgs = Image.open(file, "r").convert("RGB")  # keep only rgb channels
        imgs = to_tensor(imgs).unsqueeze(0).float()

        # Watermark embedding
        timer.start()
        outputs = wam.embed(imgs, is_video=False, lowres_attenuation=False)
        torch.cuda.synchronize()
        # print(f"embedding watermark  - took {timer.stop():.2f}s")

        # compute diff
        imgs_w = outputs["imgs_w"]  # b c h w
        msgs = outputs["msgs"]  # b k
        diff = imgs_w - imgs

        # save
        timer.start()
        base_save_name = os.path.join(output_dir, os.path.basename(file).replace(".png", ""))
        # print(f"saving videos to {base_save_name}")
        save_img(imgs[0], f"{base_save_name}_ori.png")
        save_img(imgs_w[0], f"{base_save_name}_wm.png")
        save_img(20*diff[0].abs(), f"{base_save_name}_diff.png")

        # Compute min and max values, reshape, and normalize
        min_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).min(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
        max_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).max(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
        normalized_images = (diff - min_vals) / (max_vals - min_vals)

        # Save the normalized video
        save_img(normalized_images[0], f"{base_save_name}_diff_norm.png")
        # print(f"saving videos - took {timer.stop():.2f}s")

        # Metrics
        imgs_aug = imgs_w
        outputs = wam.detect(imgs_aug, is_video=False)
        metrics = {
            "file": file,
            "bit_accuracy": bit_accuracy(
                outputs["preds"][:, 1:],
                msgs
            ).nanmean().item(),
            "psnr": psnr(imgs_w, imgs).item(),
            "ssim": ssim(imgs_w, imgs).item()
        }

        # Augment video
        # print(f"compressing and detecting watermarks")
        for qf in [80, 40]:
            imgs_aug, _ = JPEG()(imgs_w, None,qf)

            # detect
            timer.start()
            outputs = wam.detect(imgs_aug, is_video=True)
            preds = outputs["preds"]
            # print(preds)
            bit_preds = preds[:, 1:]  # b k ...
            bit_accuracy_ = bit_accuracy(
                bit_preds,
                msgs
            ).nanmean().item()
            
            metrics[f"bit_accuracy_qf{qf}"] = bit_accuracy_

        print(metrics)

        del outputs, imgs, imgs_w, diff, min_vals, max_vals, normalized_images

    # Free model from GPU
    del wam
    torch.cuda.empty_cache()

Model loaded successfully from /checkpoint/soucek/2025_logs/0926_vseal_aws-dynsize768-aug_size_sweep-discfrom100/_embedder_model=1_augmentation_config=1/checkpoint.pth with message: <All keys matched successfully>


Processing Images:   4%|▍         | 1/25 [00:01<00:26,  1.10s/it]

{'file': '/private/home/pfz/_images/synthid/animal_painting.png', 'bit_accuracy': 1.0, 'psnr': 46.76768112182617, 'ssim': 0.995708167552948, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.9921875}


Processing Images:   8%|▊         | 2/25 [00:02<00:24,  1.08s/it]

{'file': '/private/home/pfz/_images/synthid/abstract_art.png', 'bit_accuracy': 0.9921875, 'psnr': 48.17350387573242, 'ssim': 0.9983698725700378, 'bit_accuracy_qf80': 0.9921875, 'bit_accuracy_qf40': 0.984375}


Processing Images:  12%|█▏        | 3/25 [00:02<00:20,  1.05it/s]

{'file': '/private/home/pfz/_images/synthid/3d_rendering.png', 'bit_accuracy': 1.0, 'psnr': 48.47372055053711, 'ssim': 0.9985706210136414, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.9765625}


Processing Images:  16%|█▌        | 4/25 [00:03<00:19,  1.10it/s]

{'file': '/private/home/pfz/_images/synthid/animal_photo.png', 'bit_accuracy': 1.0, 'psnr': 48.14693832397461, 'ssim': 0.9979478716850281, 'bit_accuracy_qf80': 0.9921875, 'bit_accuracy_qf40': 0.97265625}


Processing Images:  20%|██        | 5/25 [00:04<00:18,  1.09it/s]

{'file': '/private/home/pfz/_images/synthid/cartoon.png', 'bit_accuracy': 1.0, 'psnr': 48.82665252685547, 'ssim': 0.9986483454704285, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.9921875}


Processing Images:  24%|██▍       | 6/25 [00:05<00:18,  1.05it/s]

{'file': '/private/home/pfz/_images/synthid/defocused_blurry.png', 'bit_accuracy': 1.0, 'psnr': 49.132286071777344, 'ssim': 0.9958239197731018, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.95703125}


Processing Images:  28%|██▊       | 7/25 [00:06<00:17,  1.02it/s]

{'file': '/private/home/pfz/_images/synthid/fantasy_painting.png', 'bit_accuracy': 1.0, 'psnr': 44.849342346191406, 'ssim': 0.981400191783905, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.98828125}


Processing Images:  32%|███▏      | 8/25 [00:07<00:16,  1.02it/s]

{'file': '/private/home/pfz/_images/synthid/historic_photo.png', 'bit_accuracy': 1.0, 'psnr': 45.29292678833008, 'ssim': 0.9974324107170105, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 1.0}


Processing Images:  36%|███▌      | 9/25 [00:08<00:14,  1.07it/s]

{'file': '/private/home/pfz/_images/synthid/flat_art.png', 'bit_accuracy': 1.0, 'psnr': 48.045814514160156, 'ssim': 0.9980697631835938, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.984375}


Processing Images:  40%|████      | 10/25 [00:09<00:15,  1.01s/it]

{'file': '/private/home/pfz/_images/synthid/landscape_painting.png', 'bit_accuracy': 1.0, 'psnr': 46.76456069946289, 'ssim': 0.9979455471038818, 'bit_accuracy_qf80': 0.99609375, 'bit_accuracy_qf40': 0.99609375}


Processing Images:  44%|████▍     | 11/25 [00:10<00:14,  1.02s/it]

{'file': '/private/home/pfz/_images/synthid/landscape_photo.png', 'bit_accuracy': 1.0, 'psnr': 45.91544723510742, 'ssim': 0.9981935024261475, 'bit_accuracy_qf80': 0.9921875, 'bit_accuracy_qf40': 0.97265625}


Processing Images:  48%|████▊     | 12/25 [00:11<00:12,  1.01it/s]

{'file': '/private/home/pfz/_images/synthid/object_painting.png', 'bit_accuracy': 1.0, 'psnr': 48.02302169799805, 'ssim': 0.9923414587974548, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.98828125}


Processing Images:  52%|█████▏    | 13/25 [00:12<00:10,  1.16it/s]

{'file': '/private/home/pfz/_images/synthid/line_drawing.png', 'bit_accuracy': 0.99609375, 'psnr': 48.19952392578125, 'ssim': 0.9994950890541077, 'bit_accuracy_qf80': 0.984375, 'bit_accuracy_qf40': 0.95703125}


Processing Images:  56%|█████▌    | 14/25 [00:12<00:08,  1.30it/s]

{'file': '/private/home/pfz/_images/synthid/logo.png', 'bit_accuracy': 0.984375, 'psnr': 50.58726501464844, 'ssim': 0.999322235584259, 'bit_accuracy_qf80': 0.9609375, 'bit_accuracy_qf40': 0.921875}


Processing Images:  60%|██████    | 15/25 [00:13<00:08,  1.15it/s]

{'file': '/private/home/pfz/_images/synthid/pixel_art.png', 'bit_accuracy': 0.99609375, 'psnr': 48.413631439208984, 'ssim': 0.993805468082428, 'bit_accuracy_qf80': 0.99609375, 'bit_accuracy_qf40': 0.9453125}


Processing Images:  64%|██████▍   | 16/25 [00:14<00:07,  1.15it/s]

{'file': '/private/home/pfz/_images/synthid/object_photo.png', 'bit_accuracy': 1.0, 'psnr': 46.56950759887695, 'ssim': 0.9533246159553528, 'bit_accuracy_qf80': 0.99609375, 'bit_accuracy_qf40': 0.98046875}


Processing Images:  68%|██████▊   | 17/25 [00:15<00:06,  1.20it/s]

{'file': '/private/home/pfz/_images/synthid/pencil_drawing.png', 'bit_accuracy': 1.0, 'psnr': 45.39335250854492, 'ssim': 0.998906672000885, 'bit_accuracy_qf80': 0.99609375, 'bit_accuracy_qf40': 0.96875}


Processing Images:  72%|███████▏  | 18/25 [00:16<00:06,  1.13it/s]

{'file': '/private/home/pfz/_images/synthid/scifi_photo.png', 'bit_accuracy': 1.0, 'psnr': 46.694278717041016, 'ssim': 0.9836916327476501, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.953125}


Processing Images:  76%|███████▌  | 19/25 [00:17<00:05,  1.04it/s]

{'file': '/private/home/pfz/_images/synthid/portrait_painting.png', 'bit_accuracy': 1.0, 'psnr': 47.40938949584961, 'ssim': 0.997018575668335, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.9765625}


Processing Images:  80%|████████  | 20/25 [00:18<00:04,  1.03it/s]

{'file': '/private/home/pfz/_images/synthid/poster_art.png', 'bit_accuracy': 1.0, 'psnr': 46.67652893066406, 'ssim': 0.9968881607055664, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.9765625}


Processing Images:  84%|████████▍ | 21/25 [00:19<00:03,  1.19it/s]

{'file': '/private/home/pfz/_images/synthid/text.png', 'bit_accuracy': 1.0, 'psnr': 49.86194610595703, 'ssim': 0.9983680248260498, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.9375}


Processing Images:  88%|████████▊ | 22/25 [00:20<00:02,  1.11it/s]

{'file': '/private/home/pfz/_images/synthid/portrait_photo.png', 'bit_accuracy': 1.0, 'psnr': 46.46059799194336, 'ssim': 0.9963210225105286, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.99609375}


Processing Images:  92%|█████████▏| 23/25 [00:21<00:01,  1.14it/s]

{'file': '/private/home/pfz/_images/synthid/stock_photo.png', 'bit_accuracy': 1.0, 'psnr': 48.990150451660156, 'ssim': 0.9960353970527649, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.9921875}


Processing Images:  96%|█████████▌| 24/25 [00:21<00:00,  1.19it/s]

{'file': '/private/home/pfz/_images/synthid/shapes.png', 'bit_accuracy': 1.0, 'psnr': 47.42558670043945, 'ssim': 0.9958243370056152, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 1.0}


Processing Images: 100%|██████████| 25/25 [00:22<00:00,  1.09it/s]

{'file': '/private/home/pfz/_images/synthid/wallpaper.png', 'bit_accuracy': 1.0, 'psnr': 48.83747100830078, 'ssim': 0.9984374642372131, 'bit_accuracy_qf80': 1.0, 'bit_accuracy_qf40': 0.97265625}





AttributeError: module 'pickle' has no attribute '__version__'

## With attenuation

In [1]:
# Directory containing videos
assets_dir = "/checkpoint/pfz/projects/videoseal/assets/imgs"
output_dir = "outputs"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Checkpoint
ckpt_path = "videoseal"
# ckpt_path = "/checkpoint/pfz/2025_logs/0115_vseal_rgb_96bits_nopercep_yuv/_scaling_w=0.05_lambda_d=0.5_extractor_model=sam_small/checkpoint.pth"

# a timer to measure the time
timer = Timer()

# Iterate over all checkpoints
wam = setup_model_from_checkpoint(ckpt_path)
wam.eval()
wam.to(device)

# create attenuation
attenuation = VarianceBasedJND(
    mode="variance",
    max_variance_value_for_clipping=300,
    min_heatmap_value=0.1,
    avg_pool_kernel_size=3
)
wam.attenuation = attenuation
wam.blender.scaling_w = 20.0

# Iterate over all video files in the directory
files = [f for f in os.listdir(assets_dir) if f.endswith(".png")]
files = [os.path.join(assets_dir, f) for f in files]

for file in tqdm(files, desc=f"Processing Images"):
    # load image
    imgs = Image.open(file, "r").convert("RGB")  # keep only rgb channels
    imgs = to_tensor(imgs).unsqueeze(0).float()

    # Watermark embedding
    timer.start()
    outputs = wam.embed(imgs, is_video=False)
    # torch.cuda.synchronize()
    # print(f"embedding watermark  - took {timer.stop():.2f}s")

    # compute diff
    imgs_w = outputs["imgs_w"]  # b c h w
    msgs = outputs["msgs"]  # b k
    diff = imgs_w - imgs

    # save
    timer.start()
    base_save_name = os.path.join(output_dir, os.path.basename(file).replace(".png", ""))
    save_img(imgs[0], f"{base_save_name}_ori.png")
    save_img(imgs_w[0], f"{base_save_name}_wm.png")
    save_img(diff[0], f"{base_save_name}_diff.png")

    # Compute min and max values, reshape, and normalize
    min_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).min(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
    max_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).max(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
    normalized_images = (diff - min_vals) / (max_vals - min_vals)

    # Save the normalized video
    save_img(normalized_images[0], f"{base_save_name}_diff_norm.png")

    # Metrics
    imgs_aug = imgs_w
    outputs = wam.detect(imgs_aug, is_video=False)
    metrics = {
        "bit_accuracy": bit_accuracy(
            outputs["preds"][:, 1:],
            msgs
        ).nanmean().item(),
        "psnr": psnr(imgs_w, imgs).item(),
        "ssim": ssim(imgs_w, imgs).item()
    }

    # Augment video
    for qf in [80, 40]:
        imgs_aug, _ = JPEG()(imgs_w, None,qf)

        # detect
        timer.start()
        outputs = wam.detect(imgs_aug, is_video=False)
        preds = outputs["preds"]
        bit_preds = preds[:, 1:]  # b k ...
        bit_accuracy_ = bit_accuracy(
            bit_preds,
            msgs
        ).nanmean().item()
        metrics[f"bit_accuracy_qf_{qf}"] = bit_accuracy_
    
    print(metrics)
    del outputs, imgs, imgs_w, diff, min_vals, max_vals, normalized_images

# Free model from GPU
del wam
torch.cuda.empty_cache()

NameError: name 'os' is not defined