# Baselines

In [1]:
# run in the root of the repository
%load_ext autoreload
%autoreload 2
 
%cd ../..

/private/home/pfz/09-videoseal/baselines


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [None]:
from videoseal.utils.display import save_vid
from videoseal.utils import Timer
from videoseal.evals.full import setup_model_from_checkpoint
from videoseal.evals.metrics import bit_accuracy, psnr, pvalue, normalized_bit_accuracy
from videoseal.data.datasets import VideoDataset
from videoseal.augmentation import Identity, H264, Crop

import os
from tqdm import tqdm
import torch

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu" 

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from videoseal.models.baselines import build_baseline

# Directory containing videos
video_dir = "/checkpoint/pfz/projects/videoseal/assets/videos/metamoviegen/"
save_dir = "outputs/"
num_vids = 1
fps = 24 // 1
os.makedirs(save_dir, exist_ok=True)
# frames_per_clip = fps * 3  # 3s
# frame_step = 1

# a timer to measure the time
timer = Timer()

# for method in ["cin", "hidden", "mbrs", "wam"]:
for method in ["wam_noattenuation"]:
    wam = build_baseline(
        method,  
        step_size=16,
        chunk_size=64
    )
    wam.eval()
    wam.embedder.to(device)
    wam.detector.to(device)

    # Iterate over all video files in the directory
    video_files = [f for f in os.listdir(video_dir) if f.endswith(".mp4")][:num_vids]

    for video_file in tqdm(video_files, desc=f"Processing Videos for {method}"):
        video_path = os.path.join(video_dir, video_file)
        base_name = os.path.splitext(video_file)[0]

        # Load video (assuming a function `load_video` exists)
        timer.start()
        vid, mask = VideoDataset.load_full_video_decord(video_path)
        print(f"loading video {video_path} - took {timer.stop():.2f}s")

        # Watermark embedding
        timer.start()
        outputs = wam.embed(vid, is_video=True)
        print(f"embedding watermark  - took {timer.stop():.2f}s")

        # compute diff
        imgs = vid  # b c h w
        imgs_w = outputs["imgs_w"]  # b c h w
        msgs = outputs["msgs"]  # b k
        diff = imgs_w - imgs

        # psnr
        psnr_score = psnr(imgs, imgs_w).mean().item()
        print(f"PSNR: {psnr_score:.3f}")

        # save
        timer.start()
        save_vid(imgs, f"{save_dir}/{method}_{base_name}_ori.mp4", fps)
        save_vid(imgs_w, f"{save_dir}/{method}_{base_name}_wm.mp4", fps)
        save_vid(10*diff.abs(), f"{save_dir}/{method}_{base_name}_diff.mp4", fps)

        # Compute min and max values, reshape, and normalize
        min_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).min(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
        max_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).max(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
        normalized_images = (diff - min_vals) / (max_vals - min_vals)

        # Save the normalized video
        save_vid(normalized_images, f"{save_dir}/{method}_{base_name}_diff_normalized.mp4", fps)
        print(f"saving videos - took {timer.stop():.2f}s")

        # Augment video
        print(f"compressing and detecting watermarks")
        for crf in [-1, 23, 40]:
            if crf == -1:
                imgs_aug = imgs_w
            else:
                imgs_aug, _ = H264()(imgs_w, crf=crf)

            # detect
            timer.start()
            # outputs = wam.detect(imgs_aug, is_video=True)
            # preds = outputs["preds"]
            # bit_preds = preds[:, 1:]  # b k ...
            # bit_accuracy_ = bit_accuracy(
            #     bit_preds,
            #     msgs
            # ).nanmean().item()
            bit_preds = wam.detect_and_aggregate(imgs_aug)
            bit_accuracy_ = bit_accuracy(
                bit_preds,
                msgs[:1]
            ).nanmean().item()
            pvalue_ = pvalue(
                bit_preds,
                msgs[:1]
            ).nanmean().item()
            normalized_bit_accuracy_ = normalized_bit_accuracy(
                bit_preds,
                msgs[:1]
            ).nanmean().item()
            print(f"CRF={crf} - Bit Accuracy: {bit_accuracy_:.3f} - P-Value: {pvalue_:.3f} - Normalized Bit Accuracy: {normalized_bit_accuracy_:.3f} - took {timer.stop():.2f}s")

        del vid, mask, outputs, imgs, imgs_w, diff, min_vals, max_vals, normalized_images

    # Free model from GPU
    del wam
    torch.cuda.empty_cache()

Processing Videos for wam_noattenuation:   0%|          | 0/1 [00:06<?, ?it/s]


KeyboardInterrupt: 

In [None]:
from videoseal.models.baselines import build_baseline

# Directory containing videos
video_dir = "/checkpoint/pfz/projects/videoseal/assets/videos/metamoviegen/"
save_dir = "outputs/"
num_vids = 1
fps = 24 // 1
os.makedirs(save_dir, exist_ok=True)
# frames_per_clip = fps * 3  # 3s
# frame_step = 1

# a timer to measure the time
timer = Timer()

# Iterate over all checkpoints
interpolations = [
    # {"mode": "bilinear", "align_corners": False, "antialias": False},
    {"mode": "bilinear", "align_corners": False, "antialias": True},
    # {"mode": "bicubic", "align_corners": False, "antialias": False},
    # {"mode": "bicubic", "align_corners": False, "antialias": True},
    # {"mode": "bilinear", "align_corners": True, "antialias": False},
    # {"mode": "bilinear", "align_corners": True, "antialias": True},
    # {"mode": "bicubic", "align_corners": True, "antialias": False},
    # {"mode": "bicubic", "align_corners": True, "antialias": True},
]
for interpolation in interpolations:
    print(f"Interpolation: {interpolation}")
    for method in ["cin", "hidden", "mbrs"]:
        wam = build_baseline(method)
        wam.eval()

        # Iterate over all video files in the directory
        video_files = [f for f in os.listdir(video_dir) if f.endswith(".mp4")][:num_vids]

        for video_file in tqdm(video_files, desc=f"Processing Videos for {method}"):
            video_path = os.path.join(video_dir, video_file)
            base_name = os.path.splitext(video_file)[0]

            # Load video (assuming a function `load_video` exists)
            timer.start()
            vid, mask = VideoDataset.load_full_video_decord(video_path)
            print(f"loading video {video_path} - took {timer.stop():.2f}s")

            # Watermark embedding
            timer.start()
            outputs = wam.embed(vid, is_video=True, interpolation=interpolation)
            print(f"embedding watermark  - took {timer.stop():.2f}s")

            # compute diff
            imgs = vid  # b c h w
            imgs_w = outputs["imgs_w"]  # b c h w
            msgs = outputs["msgs"]  # b k
            diff = imgs_w - imgs

            # psnr
            psnr_score = psnr(imgs, imgs_w).mean().item()
            print(f"PSNR: {psnr_score:.3f}")

            # save
            timer.start()
            save_vid(imgs, f"{save_dir}/{method}_{base_name}_ori.mp4", fps)
            save_vid(imgs_w, f"{save_dir}/{method}_{base_name}_wm.mp4", fps)
            save_vid(diff, f"{save_dir}/{method}_{base_name}_diff.mp4", fps)

            # Compute min and max values, reshape, and normalize
            min_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).min(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
            max_vals = diff.view(imgs.shape[0], imgs.shape[1], -1).max(dim=2, keepdim=True)[0].view(imgs.shape[0], imgs.shape[1], 1, 1)
            normalized_images = (diff - min_vals) / (max_vals - min_vals)

            # Save the normalized video
            save_vid(normalized_images, f"{save_dir}/{method}_{base_name}_diff_normalized.mp4", fps)
            print(f"saving videos - took {timer.stop():.2f}s")

            # Augment video
            print(f"cropping and detecting watermarks")
            for crop in [0.8, 0.9, 0.98, 1.0]:
                masks = torch.ones_like(imgs)
                imgs_aug, _ = Crop()(imgs_w, masks, crop)

                # detect
                timer.start()
                outputs = wam.detect(imgs_aug, is_video=True)
                preds = outputs["preds"]
                bit_preds = preds[:, 1:]  # b k ...
                bit_accuracy_ = bit_accuracy(
                    bit_preds,
                    msgs
                ).nanmean().item()
                print(f"crop={crop} Bit Accuracy: {bit_accuracy_:.3f} - detection took {timer.stop():.2f}s")

            del vid, mask, outputs, imgs, imgs_w, diff, min_vals, max_vals, normalized_images

        # Free model from GPU
        del wam
        torch.cuda.empty_cache()