In [1]:
import ffmpeg
import numpy as np
from PIL import Image, ImageFont, ImageDraw
import glob
import os
import tqdm

metablue = np.array([0, 100, 224], dtype=np.uint8)


def get_video_frames(fn, output_resolution=None):
    def _extract(video_path, size):
        cmd = ffmpeg.input(video_path)

        if isinstance(size, int):
            size = (size, size)
        cmd = cmd.filter('scale', size[0], size[1])

        out, _ = (
            cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24')
            .run(capture_stdout=True, quiet=True)
        )

        video = np.frombuffer(out, np.uint8).reshape([-1, size[1], size[0], 3])
        return video
    
    info = [s for s in ffmpeg.probe(fn)["streams"] if s["codec_type"] == "video"][0]
    size = (info["width"], info["height"])

    if output_resolution is not None:
        size_tmp_ = size
        # gcd_ = math.gcd(size[0], size[1])
        # gcd_size_mult_ = round(self.output_resolution / (min(size) / gcd_))
        # size = (size[0] // gcd_ * gcd_size_mult_, size[1] // gcd_ * gcd_size_mult_)
        mult_ = min(size) / output_resolution
        size = (int(size[0] / mult_), int(size[1] / mult_))
        size = (round(size[0] / 2) * 2, round(size[1] / 2) * 2) # prevent odd size -- results in ffmpeg error
        # print(f"[INFO]: video {fn} resized from {size_tmp_} to {size}.", flush=True)

    frames = _extract(fn, size)
    return frames

In [2]:
# dir_ = "/private/home/soucek/videoseal/results/original-final-ft_fixed_5e6-ss4"
# original_ = f"{dir_}/009_val_0_ori.mp4"
# ours_ = f"{dir_}/009_val_1_wm.mp4"
# ours_diff_ = f"{dir_}/009_val_2_diff.mp4"

# dir_ = "/private/home/soucek/videoseal/results/scalew1.0-trustmark-original-stepsize1"
# other_met_ = [f"{dir_}/009_val_1_wm.mp4"]
# other_met_diff_ = [f"{dir_}/009_val_2_diff.mp4"]
# other_met_names_ = ["TrustMark"]

# dir_ = "/private/home/soucek/videoseal/results/original-scalew2.0-VIDEOSEAL_PAPER"
# other_met_ += [f"{dir_}/009_val_1_wm.mp4"]
# other_met_diff_ += [f"{dir_}/009_val_2_diff.mp4"]
# other_met_names_ += ["Old VS"]

In [3]:
# frames = get_video_frames(original_, output_resolution=450)
# frames_ours = get_video_frames(ours_, output_resolution=450)
# frames_ours_diff = get_video_frames(ours_diff_, output_resolution=450)

# frames_met1 = get_video_frames(other_met_[0], output_resolution=450)
# frames_met1_diff = get_video_frames(other_met_diff_[0], output_resolution=450)
# frames_met2 = get_video_frames(other_met_[1], output_resolution=450)
# frames_met2_diff = get_video_frames(other_met_diff_[1], output_resolution=450) * 6

In [4]:
# h, w = frames[0].shape[:2]
# font = ImageFont.FreeTypeFont("./Optimistic.ttf", size=60, index=0)

# os.makedirs("tmp_", exist_ok=True)

# for frame_idx in tqdm.tqdm(range(len(frames))):
#     output_frame = Image.fromarray(np.concatenate([np.zeros([1080, 1920//4, 3], dtype=np.uint8), np.zeros([1080, 1920//4, 3], dtype=np.uint8) + 20, np.zeros([1080, 1920//4, 3], dtype=np.uint8) + 20, np.zeros([1080, 1920//4, 3], dtype=np.uint8) + metablue // 4], 1))
#     draw = ImageDraw.Draw(output_frame)

#     output_frame.paste(
#         Image.fromarray(frames[frame_idx][(max(h, w) - min(h, w))//2:][:min(h, w)]),
#         (15, 90*2-30)
#     )
#     draw.text((1920 // 4 * 0.5, 15 + (180-45) // 2), "Original", font=font, anchor="mm", fill=(230,230,230))

#     output_frame.paste(
#         Image.fromarray(frames_met1[frame_idx][(max(h, w) - min(h, w))//2:][:min(h, w)]),
#         (1920 // 4 * 1 + 15, 90*2-30)
#     )
#     output_frame.paste(
#         Image.fromarray(frames_met1_diff[frame_idx][(max(h, w) - min(h, w))//2:][:min(h, w)]),
#         (1920 // 4 * 1 + 15, 1080 // 2 + 90-15)
#     )
#     draw.text((1920 // 4 * 1.5, 15 + (180-45) // 2), other_met_names_[0], font=font, anchor="mm", fill=(128,128,128))

#     output_frame.paste(
#         Image.fromarray(frames_met2[frame_idx][(max(h, w) - min(h, w))//2:][:min(h, w)]),
#         (1920 // 4 * 2 + 15, 90*2-30)
#     )
#     output_frame.paste(
#         Image.fromarray(frames_met2_diff[frame_idx][(max(h, w) - min(h, w))//2:][:min(h, w)]),
#         (1920 // 4 * 2 + 15, 1080 // 2 + 90-15)
#     )
#     draw.text((1920 // 4 * 2.5, 15 + (180-45) // 2), other_met_names_[1], font=font, anchor="mm", fill=(128,128,128))


#     output_frame.paste(
#         Image.fromarray(frames_ours[frame_idx][(max(h, w) - min(h, w))//2:][:min(h, w)]),
#         (1920 // 4 * 3 + 15, 90*2-30)
#     )
#     output_frame.paste(
#         Image.fromarray(frames_ours_diff[frame_idx][(max(h, w) - min(h, w))//2:][:min(h, w)]),
#         (1920 // 4 * 3 + 15, 1080 // 2 + 90-15)
#     )
#     draw.text((1920 // 4 * 3.5, 15 + (180-45) // 2), "Video Seal", font=font, anchor="mm", fill=(230,230,230))#, fill=(0, round(100*1.), round(224*1.)))

#     output_frame.save(f"tmp_/{frame_idx:06d}.png")

In [None]:
FONT = ImageFont.FreeTypeFont("./Optimistic.ttf", size=60, index=0)
FADE_OUT = 4

ours_dir = "/private/home/soucek/videoseal/results/original-0228_vseal_128bits_jnd_ftvid_complete-lr5-scaling_w0.2-stepsize1-v2"
baselines = [
#    '/private/home/soucek/videoseal/results/baseline-hidden-ss1',
    '/private/home/soucek/videoseal/results/baseline-mbrs-ss1',
    '/private/home/soucek/videoseal/results/baseline-cin-ss1',
    '/private/home/soucek/videoseal/results/baseline-trustmark-ss1',
    '/private/home/soucek/videoseal/results/baseline-wam-ss1'
]
baseline_names = [
    # "HIDDEN",
    "MBRS",
    "CIN",
    "TrustMark",
    "WAM"
]
video_id = "000"

In [None]:
def process_frame(frames_ori, frames_ours, frames_ours_diff, frames_baselines, frames_baselines_diff, baseline_names, frame_idx):
    names = ["Original"] + baseline_names + ["Video Seal"]
    frames_list = [frames_ori] + frames_baselines + [frames_ours]
    diff_list = [None] + frames_baselines_diff + [frames_ours_diff]

    output_frame = Image.fromarray(np.concatenate([np.zeros([1080, 1920//4, 3], dtype=np.uint8), np.zeros([1080, 1920//4, 3], dtype=np.uint8) + 20, np.zeros([1080, 1920//4, 3], dtype=np.uint8) + 20, np.zeros([1080, 1920//4, 3], dtype=np.uint8) + metablue // 4], 1))
    draw = ImageDraw.Draw(output_frame)

    for i, (name, frames, diff) in enumerate(zip(names, frames_list, diff_list)):
        if 0 < i < 3:
            fillcolor = (128,128,128)
        else:
            fillcolor = (230,230,230)

        output_frame.paste(
            Image.fromarray(frames[frame_idx]),
            (1920 // 4 * i + 15, 90*2-30)
        )
        if diff is not None:
            output_frame.paste(
                Image.fromarray(diff[frame_idx]),
                (1920 // 4 * i + 15, 1080 // 2 + 90-15)
            )
        draw.text((1920 // 4 * (i + 0.5), 15 + (180-45) // 2), name, font=FONT, anchor="mm", fill=fillcolor)#, fill=(0, round(100*1.), round(224*1.)))
    
    return output_frame

def run_on_video(video_id, frame_prefix=0):
    frames_ori = get_video_frames(f"{ours_dir}/{video_id}_val_0_ori.mp4", output_resolution=450)

    h, w = frames_ori[0].shape[:2]
    y_from, y_to = (0, h) if h < w else ((max(h, w) - min(h, w))//2, h - (max(h, w) - min(h, w))//2)
    x_from, x_to = (0, w) if h > w else ((max(h, w) - min(h, w))//2, w - (max(h, w) - min(h, w))//2)

    frames_ori = frames_ori[:, y_from:y_to, x_from:x_to]

    frames_ours = get_video_frames(f"{ours_dir}/{video_id}_val_1_wm.mp4", output_resolution=450)[:, y_from:y_to, x_from:x_to]
    frames_ours_diff = get_video_frames(f"{ours_dir}/{video_id}_val_2_diff.mp4", output_resolution=450)[:, y_from:y_to, x_from:x_to]

    frames_baselines = []
    frames_baselines_diff = []
    for dir_ in baselines:
        frames_baselines.append(get_video_frames(f"{dir_}/{video_id}_val_1_wm.mp4", output_resolution=450)[:, y_from:y_to, x_from:x_to])
        frames_baselines_diff.append(get_video_frames(f"{dir_}/{video_id}_val_2_diff.mp4", output_resolution=450)[:, y_from:y_to, x_from:x_to])

    os.makedirs("tmp_", exist_ok=True)
    for frame_idx in tqdm.tqdm(range(len(frames_ori))):
        output_frame = process_frame(frames_ori, frames_ours, frames_ours_diff, frames_baselines[:2], frames_baselines_diff[:2], baseline_names[:2], frame_idx)

        if frame_idx == 0:
            for i in range(FADE_OUT):
                Image.fromarray(np.array(output_frame) // FADE_OUT * i).save(f"tmp_/{frame_prefix:06d}.png")
                frame_prefix += 1
        
        output_frame.save(f"tmp_/{frame_prefix + frame_idx:06d}.png")

    frame_prefix += len(frames_ori)
    
    # for i in range(5):
    #     output_frame.save(f"tmp_/{frame_prefix:06d}.png")
    #     frame_prefix += 1
    # for i in range(5):
    #     output_frame2 = process_frame(frames_ori, frames_ori, None, [frames_ori, frames_ori], [None, None], baseline_names[:2], frame_idx)
    #     output_frame2.save(f"tmp_/{frame_prefix:06d}.png")
    #     frame_prefix += 1
    # for i in range(5):
    #     output_frame.save(f"tmp_/{frame_prefix:06d}.png")
    #     frame_prefix += 1
    # for i in range(5):
    #     output_frame2 = process_frame(frames_ori, frames_ori, None, [frames_ori, frames_ori], [None, None], baseline_names[:2], frame_idx)
    #     output_frame2.save(f"tmp_/{frame_prefix:06d}.png")
    #     frame_prefix += 1
    # for i in range(5):
    #     output_frame.save(f"tmp_/{frame_prefix:06d}.png")
    #     frame_prefix += 1


    for i in range(FADE_OUT):
        Image.fromarray(np.array(output_frame) // FADE_OUT * (FADE_OUT - 1 - i)).save(f"tmp_/{frame_prefix:06d}.png")
        frame_prefix += 1

    for frame_idx in tqdm.tqdm(range(len(frames_ori))):
        output_frame = process_frame(frames_ori, frames_ours, frames_ours_diff, frames_baselines[2:], frames_baselines_diff[2:], baseline_names[2:], frame_idx)

        if frame_idx == 0:
            for i in range(FADE_OUT):
                Image.fromarray(np.array(output_frame) // FADE_OUT * i).save(f"tmp_/{frame_prefix:06d}.png")
                frame_prefix += 1

        output_frame.save(f"tmp_/{frame_prefix + frame_idx:06d}.png")

    frame_prefix += len(frames_ori)
    for i in range(FADE_OUT):
        Image.fromarray(np.array(output_frame) // FADE_OUT * (FADE_OUT - 1 - i)).save(f"tmp_/{frame_prefix:06d}.png")
        frame_prefix += 1

    return frame_prefix

In [7]:
frame_prefix = 0
frame_prefix = run_on_video("003", frame_prefix)
frame_prefix = run_on_video("004", frame_prefix)
frame_prefix = run_on_video("011", frame_prefix)
frame_prefix = run_on_video("008", frame_prefix)

100%|██████████| 72/72 [00:43<00:00,  1.66it/s]
100%|██████████| 72/72 [00:41<00:00,  1.75it/s]
100%|██████████| 72/72 [00:46<00:00,  1.55it/s]
100%|██████████| 72/72 [00:44<00:00,  1.63it/s]
100%|██████████| 72/72 [00:39<00:00,  1.83it/s]
100%|██████████| 72/72 [00:36<00:00,  1.95it/s]
100%|██████████| 72/72 [00:46<00:00,  1.54it/s]
100%|██████████| 72/72 [00:44<00:00,  1.60it/s]


In [None]:
# ./ffmpeg-7.0.2-amd64-static/ffmpeg -framerate 15 -pattern_type glob -i 'tmp_/*.png' -c:v libx265 -pix_fmt yuv420p -preset veryslow -crf 23 out.mp4