# High Resolution inference 

In [1]:
# run in the root of the repository
%load_ext autoreload
%autoreload 2
 
%cd ../..

/private/home/pfz/09-videoseal/baselines


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
from videoseal.utils.display import save_vid
from videoseal.utils import Timer
from videoseal.evals.full import setup_model_from_checkpoint
from videoseal.evals.metrics import bit_accuracy, pvalue, capacity, psnr
from videoseal.data.datasets import VideoDataset
from videoseal.augmentation import Identity, H264

import os
from tqdm import tqdm
import torch
import gc

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu" 

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
import os
from tqdm import tqdm
import torch

# Directory containing videos
video_dir = "/checkpoint/pfz/projects/videoseal/assets/videos/metamoviegen_3s"
base_output_folder = "outputs"
if not os.path.exists(base_output_folder):
    os.makedirs(base_output_folder)

# Example usage
ckpts = {
    # "hidden": '/private/home/hadyelsahar/work/code/videoseal/2024_logs/videoseal0.1/_lambda_d=0.5_lambda_i=0.0_optimizer=AdamW,lr=1e-4_videowam_step_size=4_video_start=500_embedder_model=hidden/checkpoint.pth',
    # "unet": '/private/home/hadyelsahar/work/code/videoseal/2024_logs/videoseal0.1/_lambda_d=0.5_lambda_i=0.5_optimizer=AdamW,lr=1e-4_videowam_step_size=4_video_start=500_embedder_model=unet_small2/checkpoint.pth',
    # "scaling_laws_smalldetector_tinyembedder":"/private/home/hadyelsahar/work/code/videoseal/2024_logs_large-exp/1105-videoseal0.2-scalinglaws/_lambda_d=0.0_extractor_model=sam_small_embedder_model=0/checkpoint.pth",
    # "scaling_laws_tinydetector_tinyembedder":"/private/home/hadyelsahar/work/code/videoseal/2024_logs_large-exp/1105-videoseal0.2-scalinglaws/_lambda_d=0.0_extractor_model=sam_tiny_embedder_model=0/checkpoint.pth",
    # "JND_fix_discloss":"/private/home/hadyelsahar/work/code/videoseal/2024_logs_large-exp/1108-videoseal0.2-discloss-fix-removeunused-params/_attenuation=jnd_3_3_nbits=64_lambda_d=0.5_video_start=100/checkpoint.pth",
    # "1111_discloss_sleepwake_highssim":"/private/home/hadyelsahar/work/code/videoseal/2024_logs_large-exp/1109-videoseal0.2-discloss-fix-hing-sleepwake-4nodes/_scaling_w=0.1_lambda_i=0.25_disc_hinge_on_logits_fake=False_sleepwake=True_video_start=500/checkpoint.pth"
    # "1111-finetuned":"/private/home/hadyelsahar/work/code/videoseal/2024_logs_large-exp/1111-videoseal0.2-sleepwake-resume/_attenuation=jnd_1_1/checkpoint.pth"
    # "1112-videoseal0.2":"/private/home/hadyelsahar/work/code/videoseal/2024_logs_large-exp/1111-videoseal0.2-archsearch-4nodes/_attenuation=None_nbits=64_finetune_detector_start=1000_embedder_model=unet_small2_quant/checkpoint.pth",
    # "1118-yuv-400":"/checkpoint/pfz/2024_logs/1118_vseal_long_sab/_scheduler=0_optimizer=adopt,lr=1e-5/checkpoint400.pth",
    # "1118-yuv-800":"/checkpoint/pfz/2024_logs/1118_vseal_long_sab/_scheduler=1_optimizer=AdamW,lr=1e-5/checkpoint.pth",
    "trustmark": "baseline/trustmark",
    "videoseal0.2b": "/private/home/hadyelsahar/work/code/videoseal/2024_logs_large-exp/1111-videoseal0.2-archsearch-4nodes/_attenuation=None_nbits=64_finetune_detector_start=800_embedder_model=unet_small2_quant/checkpoint.pth",
    "videoseal0.2a": "/private/home/hadyelsahar/work/code/videoseal/2024_logs_large-exp/1109-videoseal0.2-discloss-fix-hing-sleepwake-4nodes/_scaling_w=0.5_lambda_i=0.5_disc_hinge_on_logits_fake=True_sleepwake=False_video_start=500/checkpoint.pth",
}

fps = 24 // 1
# frames_per_clip = fps * 3  # 3s
# frame_step = 1

# a timer to measure the time
timer = Timer()

# Iterate over all checkpoints
for model_name, ckpt in ckpts.items():
    wam = setup_model_from_checkpoint(ckpt)
    wam.eval()
    wam.to(device)

    wam.chunk_size = 64
    wam.step_size = 4

    # Iterate over all video files in the directory
    video_files = [f for f in os.listdir(video_dir) if f.endswith(".mp4")][:1]

    for video_file in tqdm(video_files, desc=f"Processing Videos for {model_name}"):
        video_path = os.path.join(video_dir, video_file)
        base_name = os.path.splitext(video_file)[0]

        # Load video (assuming a function `load_video` exists)
        timer.start()
        vid, mask = VideoDataset.load_full_video_decord(video_path)
        print(f"loading video {video_path} - took {timer.stop():.2f}s")

        # Watermark embedding
        timer.start()
        outputs = wam.embed(vid, is_video=True)
        print(f"embedding watermark  - took {timer.stop():.2f}s")

        # compute diff
        imgs = vid  # b c h w
        imgs_w = outputs["imgs_w"]  # b c h w
        msgs = outputs["msgs"]  # b k
        diff = imgs_w - imgs

        # save
        timer.start()
        save_vid(imgs, f"{base_output_folder}/{model_name}_{base_name}_ori.mp4", fps)
        save_vid(imgs_w, f"{base_output_folder}/{model_name}_{base_name}_wm.mp4", fps)
        save_vid(10*diff.abs(), f"{base_output_folder}/{model_name}_{base_name}_diff.mp4", fps)

        # psnr
        psnr_score = psnr(imgs, imgs_w, is_video=True).mean().item()
        print(f"PSNR: {psnr_score:.3f}")

        # Compute min and max values, reshape, and normalize
        min_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).min(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
        max_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).max(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
        normalized_images = (diff - min_vals) / (max_vals - min_vals)

        # Save the normalized video
        save_vid(normalized_images, f"{base_output_folder}/{model_name}_{base_name}_wm_normalized.mp4", fps)
        print(f"saving videos - took {timer.stop():.2f}s")

        # Augment video
        print(f"compressing and detecting watermarks")
        for crf in [-1, 23, 40]:
            if crf == -1:
                imgs_aug = imgs_w
            else:
                imgs_aug, _ = H264()(imgs_w, crf=crf)

            # detect
            timer.start()
            # outputs = wam.detect(imgs_aug, is_video=True)
            # preds = outputs["preds"]
            # bit_preds = preds[:, 1:]  # b k ...
            # bit_accuracy_ = bit_accuracy(
            #     bit_preds,
            #     msgs
            # ).nanmean().item()
            bit_preds = wam.detect_and_aggregate(imgs_aug)
            bit_accuracy_ = bit_accuracy(
                bit_preds,
                msgs[:1]
            ).nanmean().item()
            pvalue_ = pvalue(
                bit_preds,
                msgs[:1]
            ).nanmean().item()
            capacity_ = capacity(
                bit_preds,
                msgs[:1]
            ).nanmean().item()
            print(f"CRF={crf} - Bit Accuracy: {bit_accuracy_:.3f} - P-Value: {pvalue_:0.2e} - Capacity: {capacity_:.3f} - took {timer.stop():.2f}s")

        del vid, outputs, imgs, imgs_w, diff, min_vals, max_vals, normalized_images

    # Free model from GPU
    del wam
    torch.cuda.empty_cache()

Processing Videos for trustmark:   0%|          | 0/1 [00:00<?, ?it/s]

loading video /checkpoint/pfz/projects/videoseal/assets/videos/metamoviegen_3s/01.mp4 - took 2.28s
embedding watermark  - took 3.07s
PSNR: 42.851
saving videos - took 11.65s
compressing and detecting watermarks
CRF=-1 - Bit Accuracy: 1.000 - P-Value: 7.89e-31 - Capacity: 100.000 - took 1.69s
CRF=23 - Bit Accuracy: 1.000 - P-Value: 7.89e-31 - Capacity: 100.000 - took 1.41s
CRF=40 - Bit Accuracy: 1.000 - P-Value: 7.89e-31 - Capacity: 100.000 - took 1.43s


Processing Videos for trustmark: 100%|██████████| 1/1 [00:47<00:00, 47.94s/it]


Model loaded successfully from /private/home/hadyelsahar/work/code/videoseal/2024_logs_large-exp/1111-videoseal0.2-archsearch-4nodes/_attenuation=None_nbits=64_finetune_detector_start=800_embedder_model=unet_small2_quant/checkpoint.pth with message: <All keys matched successfully>


Processing Videos for videoseal0.2b:   0%|          | 0/1 [00:00<?, ?it/s]

loading video /checkpoint/pfz/projects/videoseal/assets/videos/metamoviegen_3s/01.mp4 - took 2.28s
embedding watermark  - took 3.25s
PSNR: 44.760
saving videos - took 12.05s
compressing and detecting watermarks
CRF=-1 - Bit Accuracy: 1.000 - P-Value: 5.42e-20 - Capacity: 64.000 - took 0.93s
CRF=23 - Bit Accuracy: 1.000 - P-Value: 5.42e-20 - Capacity: 64.000 - took 0.99s
CRF=40 - Bit Accuracy: 1.000 - P-Value: 5.42e-20 - Capacity: 64.000 - took 0.97s


Processing Videos for videoseal0.2b: 100%|██████████| 1/1 [00:46<00:00, 46.22s/it]


Model loaded successfully from /private/home/hadyelsahar/work/code/videoseal/2024_logs_large-exp/1109-videoseal0.2-discloss-fix-hing-sleepwake-4nodes/_scaling_w=0.5_lambda_i=0.5_disc_hinge_on_logits_fake=True_sleepwake=False_video_start=500/checkpoint.pth with message: <All keys matched successfully>


Processing Videos for videoseal0.2a:   0%|          | 0/1 [00:00<?, ?it/s]

loading video /checkpoint/pfz/projects/videoseal/assets/videos/metamoviegen_3s/01.mp4 - took 2.25s
embedding watermark  - took 3.17s
PSNR: 37.318
saving videos - took 12.48s
compressing and detecting watermarks
CRF=-1 - Bit Accuracy: 1.000 - P-Value: 5.42e-20 - Capacity: 64.000 - took 0.87s
CRF=23 - Bit Accuracy: 1.000 - P-Value: 5.42e-20 - Capacity: 64.000 - took 0.91s
CRF=40 - Bit Accuracy: 1.000 - P-Value: 5.42e-20 - Capacity: 64.000 - took 0.98s


Processing Videos for videoseal0.2a: 100%|██████████| 1/1 [00:48<00:00, 48.24s/it]
