# High Resolution inference 

In [1]:
# run in the root of the repository
%load_ext autoreload
%autoreload 2
 
%cd ../..

/private/home/pfz/09-videoseal/archs


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [18]:
from videoseal.utils.display import save_img
from videoseal.utils import Timer
from videoseal.evals.full import setup_model_from_checkpoint
from videoseal.evals.metrics import bit_accuracy
from videoseal.augmentation import Identity, JPEG
from videoseal.modules.jnd import JND

import os
import omegaconf
from tqdm import tqdm
import gc
from PIL import Image

import torch
import torchvision

to_tensor = torchvision.transforms.ToTensor()
to_pil = torchvision.transforms.ToPILImage()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu" 

In [15]:
# Directory containing videos
assets_dir = "assets/imgs"
output_dir = "output"

# Checkpoint
ckpt_path = "/checkpoint/pfz/2024_logs/1028_vseal_long/_seed=3_optimizer=AdamW,lr=1e-4_embedder_model=unet_small2_yuv/checkpoint600.pth"

# a timer to measure the time
timer = Timer()

# Iterate over all checkpoints
wam = setup_model_from_checkpoint(ckpt_path)
wam.eval()
wam.to(device)

# Iterate over all video files in the directory
files = [f for f in os.listdir(assets_dir) if f.endswith(".png")]
files = [os.path.join(assets_dir, f) for f in files]

for file in tqdm(files, desc=f"Processing Videos"):
    # load image
    imgs = Image.open(file, "r").convert("RGB")  # keep only rgb channels
    imgs = to_tensor(imgs).unsqueeze(0).float()

    # Watermark embedding
    timer.start()
    outputs = wam.embed(imgs, is_video=False)
    torch.cuda.synchronize()
    print(f"embedding watermark  - took {timer.stop():.2f}s")

    # compute diff
    imgs_w = outputs["imgs_w"]  # b c h w
    msgs = outputs["msgs"]  # b k
    diff = imgs_w - imgs

    # save
    timer.start()
    base_save_name = os.path.join(output_dir, os.path.basename(file).replace(".png", ""))
    print(f"saving videos to {base_save_name}")
    save_img(imgs[0], f"{base_save_name}_ori.png")
    save_img(imgs_w[0], f"{base_save_name}_wm.png")
    save_img(diff[0], f"{base_save_name}_diff.png")

    # Compute min and max values, reshape, and normalize
    min_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).min(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
    max_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).max(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
    normalized_images = (diff - min_vals) / (max_vals - min_vals)

    # Save the normalized video
    save_img(normalized_images[0], f"{base_save_name}_diff_norm.png")
    print(f"saving videos - took {timer.stop():.2f}s")

    # Augment video
    print(f"compressing and detecting watermarks")
    for qf in [80, 40]:
        imgs_aug, _ = JPEG()(imgs_w, None,qf)

        # detect
        timer.start()
        outputs = wam.detect(imgs_aug, is_video=True)
        preds = outputs["preds"]
        bit_preds = preds[:, 1:]  # b k ...
        bit_accuracy_ = bit_accuracy(
            bit_preds,
            msgs
        ).nanmean().item()
        print(f"bit accuracy at JPEG {qf} is {bit_accuracy_:.2f} - took {timer.stop():.2f}s")

    del outputs, imgs, imgs_w, diff, min_vals, max_vals, normalized_images

# Free model from GPU
del wam
torch.cuda.empty_cache()

Model loaded successfully from /checkpoint/pfz/2024_logs/1028_vseal_long/_seed=3_optimizer=AdamW,lr=1e-4_embedder_model=unet_small2_yuv/checkpoint600.pth with message: <All keys matched successfully>


Processing Videos:   0%|          | 0/4 [00:00<?, ?it/s]

embedding watermark  - took 0.01s
saving videos to output/01
tensor(0.) tensor(1.)


Processing Videos:  25%|██▌       | 1/4 [00:00<00:00,  3.12it/s]

saving videos - took 0.23s
compressing and detecting watermarks
bit accuracy at JPEG 80 is 1.00 - took 0.02s
bit accuracy at JPEG 40 is 1.00 - took 0.02s
embedding watermark  - took 0.01s
saving videos to output/00
tensor(0.) tensor(1.)


Processing Videos:  50%|█████     | 2/4 [00:01<00:01,  1.35it/s]

saving videos - took 0.84s
compressing and detecting watermarks
bit accuracy at JPEG 80 is 1.00 - took 0.01s
bit accuracy at JPEG 40 is 1.00 - took 0.02s
embedding watermark  - took 0.02s
saving videos to output/02
tensor(0.) tensor(1.)
saving videos - took 3.10s
compressing and detecting watermarks
bit accuracy at JPEG 80 is 1.00 - took 0.02s


Processing Videos:  75%|███████▌  | 3/4 [00:04<00:02,  2.06s/it]

bit accuracy at JPEG 40 is 1.00 - took 0.02s
embedding watermark  - took 0.01s
saving videos to output/03
tensor(0.) tensor(1.)


Processing Videos: 100%|██████████| 4/4 [00:05<00:00,  1.37s/it]

saving videos - took 0.38s
compressing and detecting watermarks
bit accuracy at JPEG 80 is 1.00 - took 0.01s
bit accuracy at JPEG 40 is 1.00 - took 0.01s





## With attenuation

In [27]:
# Directory containing videos
assets_dir = "assets/imgs"
output_dir = "output"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Checkpoint
ckpt_path = "/checkpoint/pfz/2024_logs/1028_vseal_long/_seed=3_optimizer=AdamW,lr=1e-4_embedder_model=unet_small2_yuv/checkpoint600.pth"

# a timer to measure the time
timer = Timer()

# Iterate over all checkpoints
wam = setup_model_from_checkpoint(ckpt_path)
wam.eval()
wam.to(device)

# create attenuation
attenuation_cfg = "configs/attenuation.yaml"
attenuation = "jnd_1_1"
attenuation_cfg = omegaconf.OmegaConf.load(attenuation_cfg)[attenuation]
attenuation = JND(**attenuation_cfg)
wam.attenuation = attenuation
wam.scaling_w = 0.2

# Iterate over all video files in the directory
files = [f for f in os.listdir(assets_dir) if f.endswith(".png")]
files = [os.path.join(assets_dir, f) for f in files]

for file in tqdm(files, desc=f"Processing Videos"):
    # load image
    imgs = Image.open(file, "r").convert("RGB")  # keep only rgb channels
    imgs = to_tensor(imgs).unsqueeze(0).float()

    # Watermark embedding
    timer.start()
    outputs = wam.embed(imgs, is_video=False)
    # torch.cuda.synchronize()
    # print(f"embedding watermark  - took {timer.stop():.2f}s")

    # compute diff
    imgs_w = outputs["imgs_w"]  # b c h w
    msgs = outputs["msgs"]  # b k
    diff = imgs_w - imgs

    # save
    timer.start()
    base_save_name = os.path.join(output_dir, os.path.basename(file).replace(".png", ""))
    save_img(imgs[0], f"{base_save_name}_ori.png")
    save_img(imgs_w[0], f"{base_save_name}_wm.png")
    save_img(diff[0], f"{base_save_name}_diff.png")

    # Compute min and max values, reshape, and normalize
    min_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).min(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
    max_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).max(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
    normalized_images = (diff - min_vals) / (max_vals - min_vals)

    # Save the normalized video
    save_img(normalized_images[0], f"{base_save_name}_diff_norm.png")

    # Augment video
    for qf in [80, 40]:
        imgs_aug, _ = JPEG()(imgs_w, None,qf)

        # detect
        timer.start()
        outputs = wam.detect(imgs_aug, is_video=True)
        preds = outputs["preds"]
        bit_preds = preds[:, 1:]  # b k ...
        bit_accuracy_ = bit_accuracy(
            bit_preds,
            msgs
        ).nanmean().item()
        print(f"bit accuracy at JPEG {qf} is {bit_accuracy_:.2f} - took {timer.stop():.2f}s")

    del outputs, imgs, imgs_w, diff, min_vals, max_vals, normalized_images

# Free model from GPU
del wam
torch.cuda.empty_cache()

Model loaded successfully from /checkpoint/pfz/2024_logs/1028_vseal_long/_seed=3_optimizer=AdamW,lr=1e-4_embedder_model=unet_small2_yuv/checkpoint600.pth with message: <All keys matched successfully>


Processing Videos:  25%|██▌       | 1/4 [00:00<00:00,  3.38it/s]

bit accuracy at JPEG 80 is 0.88 - took 0.02s
bit accuracy at JPEG 40 is 0.73 - took 0.01s


Processing Videos:  50%|█████     | 2/4 [00:01<00:01,  1.24it/s]

bit accuracy at JPEG 80 is 0.87 - took 0.02s
bit accuracy at JPEG 40 is 0.86 - took 0.01s


Processing Videos:  75%|███████▌  | 3/4 [00:05<00:02,  2.19s/it]

bit accuracy at JPEG 80 is 0.84 - took 0.02s
bit accuracy at JPEG 40 is 0.82 - took 0.02s


Processing Videos: 100%|██████████| 4/4 [00:05<00:00,  1.45s/it]

bit accuracy at JPEG 80 is 0.82 - took 0.01s
bit accuracy at JPEG 40 is 0.80 - took 0.01s



