In [5]:
import cv2
import numpy as np
import os
from scipy.ndimage import label as scipy_label

# 3_pexels-cottonbro-5329478_2160p

In [6]:
def resize_padding(image, size=(1920,1080)):
    """
    Resize and pad an image to a given size
    """
    h, w = image.shape[:2]
    height_ratio = size[1] / h
    width_ratio = size[0] / w
    ratio = min(height_ratio, width_ratio)
    new_h = int(h * ratio)
    new_w = int(w * ratio)
    image = cv2.resize(image, (new_w, new_h))
    delta_w = size[0] - new_w
    delta_h = size[1] - new_h
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)
    color = [0, 0, 0]
    # copy make border
    new_image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
    return new_image

def postprocess(mask):
    regions, n_regions = scipy_label(mask > 0.1)
    if n_regions > 1:
        max_area = 0
        all_areas = np.zeros((n_regions + 1,))
        for i in range(n_regions):
            all_areas[i+1] = (regions == (i+1)).sum()
            max_area = max(max_area, all_areas[i+1])
        
        if max_area == 0:
            return
        
        if max_area * 1.0 / mask.size < 0.01:
            mask[:] = 0
            return

        # Get location of areas
        max_area_id = all_areas.argmax()
        largest_mask = regions == max_area_id

        # Remove noise
        for i in range(n_regions):
            if i + 1 == max_area_id: continue
            object_mask = regions == (i+1)
            if all_areas[i+1] < max_area * 0.1:
                mask[object_mask] = 0

In [4]:
# Prepare video with each mask overlayed
video_name = "3_pexels-cottonbro-5329478_2160p"
data_dir = "/home/chuongh/vm2m/data/vis/real_qual"
outdir = "/home/chuongh/vm2m/data/vis/website"

frame_names = sorted(os.listdir(os.path.join(data_dir, "images", video_name)))

colors = [(255, 0, 255), (255, 255, 0), (0, 165, 255), (255, 255, 255)]
for frame_name in frame_names:
    if int(frame_name.replace(".jpg", "")) < 130 or int(frame_name.replace(".jpg", "")) > 270:
        continue
    frame = cv2.imread(os.path.join(data_dir, "images", video_name, frame_name))
    # load all masks
    mask_names = os.listdir(os.path.join(data_dir, "masks", video_name, frame_name.replace(".jpg", "")))
    mask_names = sorted(mask_names)
    all_overlays = []
    
    for i, mask_name in enumerate(mask_names):
        mask = cv2.imread(os.path.join(data_dir, "masks", video_name, frame_name.replace(".jpg", ""), mask_name), cv2.IMREAD_GRAYSCALE)
        color = colors[i]
        color = np.array(color)[None, None]
        mask = mask[:, :, None]
        mask = (mask > 0) * color
        overlay = frame.copy()
        overlay[mask > 0] = mask[mask > 0] * 0.5 + overlay[mask > 0] * 0.5
        all_overlays.append(overlay)
    all_overlays = np.concatenate(all_overlays, axis=1)
    all_overlays = resize_padding(all_overlays)

    out_path = os.path.join(outdir, video_name, "video", frame_name)
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    cv2.imwrite(out_path, all_overlays)

In [11]:
def process_frames(frame_names, video_name, lower, upper, subdir, outdir):
    for frame_name in frame_names:
        if int(frame_name.replace(".jpg", "")) < lower or int(frame_name.replace(".jpg", "")) > upper:
            continue
        frame = cv2.imread(os.path.join(data_dir, "images", video_name, frame_name))
        # load all masks
        mask_names = os.listdir(os.path.join(data_dir, subdir, video_name, frame_name.replace(".jpg", "")))
        mask_names = sorted(mask_names)
        all_overlays = []
        
        for i, mask_name in enumerate(mask_names):
            mask = cv2.imread(os.path.join(data_dir, subdir, video_name, frame_name.replace(".jpg", ""), mask_name), cv2.IMREAD_GRAYSCALE)
            mask = mask / 255.0
            postprocess(mask)
            mask = mask[:, :, None]
            green = np.zeros_like(frame)
            green[:, :, 1] = 255
            overlay = frame * mask + green * (1.0 - mask)
            all_overlays.append(overlay)
        all_overlays = np.concatenate(all_overlays, axis=1)
        all_overlays = resize_padding(all_overlays)

        out_path = os.path.join(outdir, video_name, subdir, frame_name)
        os.makedirs(os.path.dirname(out_path), exist_ok=True)
        cv2.imwrite(out_path, all_overlays)
# InstMatt


In [12]:
process_frames(frame_names, video_name, 130, 270, "instmatt", outdir)

In [13]:
process_frames(frame_names, video_name, 130, 270, "sparsemat_hr", outdir)

In [14]:
process_frames(frame_names, video_name, 130, 270, "mgm_single_tcvom", outdir)

In [15]:
process_frames(frame_names, video_name, 130, 270, "mgm_stacked_tcvom", outdir)

In [16]:
process_frames(frame_names, video_name, 130, 270, "ours_tokens", outdir)

# 2_pexels-artem-podrez-6003997_2160p

In [20]:
# Prepare video with each mask overlayed
video_name = "2_pexels-artem-podrez-6003997_2160p"
data_dir = "/home/chuongh/vm2m/data/vis/real_qual"
outdir = "/home/chuongh/vm2m/data/vis/website"

frame_names = sorted(os.listdir(os.path.join(data_dir, "images", video_name)))

colors = [(255, 0, 255), (255, 255, 0), (0, 165, 255), (255, 255, 255)]
for frame_name in frame_names:
    if int(frame_name.replace(".jpg", "")) < 0 or int(frame_name.replace(".jpg", "")) > 175:
        continue
    frame = cv2.imread(os.path.join(data_dir, "images", video_name, frame_name))
    # load all masks
    mask_names = os.listdir(os.path.join(data_dir, "masks", video_name, frame_name.replace(".jpg", "")))
    mask_names = sorted(mask_names)
    all_overlays = []
    
    for i, mask_name in enumerate(mask_names):
        mask = cv2.imread(os.path.join(data_dir, "masks", video_name, frame_name.replace(".jpg", ""), mask_name), cv2.IMREAD_GRAYSCALE)
        color = colors[i]
        color = np.array(color)[None, None]
        mask = mask[:, :, None]
        mask = (mask > 0) * color
        overlay = frame.copy()
        overlay[mask > 0] = mask[mask > 0] * 0.7 + overlay[mask > 0] * 0.3
        all_overlays.append(overlay)
    all_overlays = np.concatenate(all_overlays, axis=1)
    all_overlays = resize_padding(all_overlays)

    out_path = os.path.join(outdir, video_name, "video", frame_name)
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    cv2.imwrite(out_path, all_overlays)

In [19]:
process_frames(frame_names, video_name, 0, 175, "instmatt", outdir)

In [21]:
process_frames(frame_names, video_name, 0, 175, "sparsemat_hr", outdir)

In [22]:
process_frames(frame_names, video_name, 0, 175, "mgm_single_tcvom", outdir)

In [24]:
process_frames(frame_names, video_name, 0, 175, "mgm_stacked_tcvom", outdir)

In [27]:
process_frames(frame_names, video_name, 0, 175, "ours_tokens", outdir)

In [None]:
for subdir in ['video', 'instmatt', 'sparsemat_hr', 'mgm_single_tcvom', 'mgm_stacked_tcvom', 'ours_tokens']:
    input = os.path.join(outdir, video_name, subdir)
    !ffmpeg -y -framerate 3 -pattern_type glob -i "{input}/*.jpg" -c:v libx264 -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" -pix_fmt yuv420p {input}.mp4

# 3_production_id_4122569_2160p

In [44]:
# Prepare video with each mask overlayed
video_name = "3_production_id_4122569_2160p"
data_dir = "/home/chuongh/vm2m/data/vis/real_qual"
outdir = "/home/chuongh/vm2m/data/vis/website"

frame_names = sorted(os.listdir(os.path.join(data_dir, "images", video_name)))

colors = [(255, 0, 255), (255, 255, 0), (0, 165, 255), (255, 255, 255)]
for frame_name in frame_names:
    if int(frame_name.replace(".jpg", "")) < 0 or int(frame_name.replace(".jpg", "")) > 120:
        continue
    frame = cv2.imread(os.path.join(data_dir, "images", video_name, frame_name))
    # load all masks
    mask_names = os.listdir(os.path.join(data_dir, "masks", video_name, frame_name.replace(".jpg", "")))
    mask_names = sorted(mask_names)
    all_overlays = []
    
    for i, mask_name in enumerate(mask_names):
        mask = cv2.imread(os.path.join(data_dir, "masks", video_name, frame_name.replace(".jpg", ""), mask_name), cv2.IMREAD_GRAYSCALE)
        color = colors[i]
        color = np.array(color)[None, None]
        mask = mask[:, :, None]
        mask = (mask > 0) * color
        frame[mask > 0] = mask[mask > 0] * 0.7 + frame[mask > 0] * 0.3
    #     all_overlays.append(overlay)
    # all_overlays = np.concatenate(all_overlays, axis=1)
    all_overlays = resize_padding(frame)

    out_path = os.path.join(outdir, video_name, "video", frame_name)
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    cv2.imwrite(out_path, all_overlays)

In [49]:
crop_sizes = [[800, 1770], [200, 1080], [800, 1920]]
def process_frames_with_crops(frame_names, video_name, lower, upper, subdir, outdir, mask_names, crop_sizes):
    for frame_name in frame_names:
        if int(frame_name.replace(".jpg", "")) < lower or int(frame_name.replace(".jpg", "")) > upper:
            continue
        frame = cv2.imread(os.path.join(data_dir, "images", video_name, frame_name))
        frame = frame[:, 590:]
        # load all masks
        # mask_names = os.listdir(os.path.join(data_dir, subdir, video_name, frame_name.replace(".jpg", "")))
        # mask_names = sorted(mask_names)
        all_overlays = []
        
        for i, mask_name in enumerate(mask_names):
            mask = cv2.imread(os.path.join(data_dir, subdir, video_name, frame_name.replace(".jpg", ""), mask_name), cv2.IMREAD_GRAYSCALE)
            mask = mask / 255.0
            mask = mask[:, 590:]
            postprocess(mask)
            mask = mask[:, :, None]
            green = np.zeros_like(frame)
            green[:, :, 1] = 255
            overlay = frame * mask + green * (1.0 - mask)
            overlay = resize_padding(overlay)
            crop_size = crop_sizes[i]
            overlay = overlay[:, crop_size[0]:crop_size[1]]
            all_overlays.append(overlay)
        all_overlays = np.concatenate(all_overlays, axis=1)
        all_overlays = resize_padding(all_overlays)

        out_path = os.path.join(outdir, video_name, subdir, frame_name)
        os.makedirs(os.path.dirname(out_path), exist_ok=True)
        cv2.imwrite(out_path, all_overlays)
# InstMatt


In [34]:
process_frames_with_crops(frame_names, video_name, 0, 120, "instmatt", outdir, ["00.png", "01.png", "02.png"], crop_sizes)

In [35]:
process_frames_with_crops(frame_names, video_name, 0, 120, "sparsemat_hr", outdir, ["00.png", "01.png", "02.png"], crop_sizes)

In [36]:
process_frames_with_crops(frame_names, video_name, 0, 120, "mgm_single_tcvom", outdir, ["00.png", "01.png", "02.png"], crop_sizes)

In [37]:
process_frames_with_crops(frame_names, video_name, 0, 120, "mgm_stacked_tcvom", outdir, ["00.png", "01.png", "02.png"], crop_sizes)

In [41]:
process_frames_with_crops(frame_names, video_name, 0, 120, "ours_ss", outdir, ["00.png", "01.png", "02.png"], crop_sizes)

In [45]:
for subdir in ['video', 'instmatt', 'sparsemat_hr', 'mgm_single_tcvom', 'mgm_stacked_tcvom', 'ours_ss']:
    input = os.path.join(outdir, video_name, subdir)
    !ffmpeg -y -framerate 3 -pattern_type glob -i "{input}/*.jpg" -c:v libx264 -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" -pix_fmt yuv420p {input}.mp4

ffmpeg version 4.2.2 Copyright (c) 2000-2019 the FFmpeg developers
  built with gcc 7.3.0 (crosstool-NG 1.23.0.449-a04d0)
  configuration: --prefix=/tmp/build/80754af9/ffmpeg_1587154242452/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeho --cc=/tmp/build/80754af9/ffmpeg_1587154242452/_build_env/bin/x86_64-conda_cos6-linux-gnu-cc --disable-doc --enable-avresample --enable-gmp --enable-hardcoded-tables --enable-libfreetype --enable-libvpx --enable-pthreads --enable-libopus --enable-postproc --enable-pic --enable-pthreads --enable-shared --enable-static --enable-version3 --enable-zlib --enable-libmp3lame --disable-nonfree --enable-gpl --enable-gnutls --disable-openssl --enable-libopenh264 --enable-libx264
  libavutil      56. 31.100 / 56. 31.100
  libavcodec     58. 54.100 / 58. 54.100
  libavformat    58. 29.100 / 58. 29.100


frame=   25 fps=0.0 q=-1.0 Lsize=    3038kB time=00:00:07.33 bitrate=3393.4kbits/s speed=7.57x    
video:3037kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.038399%
[1;36m[libx264 @ 0x5642034cc900] [0mframe I:1     Avg QP:16.97  size:309926
[1;36m[libx264 @ 0x5642034cc900] [0mframe P:7     Avg QP:17.36  size:180669
[1;36m[libx264 @ 0x5642034cc900] [0mframe B:17    Avg QP:18.98  size: 90246
[1;36m[libx264 @ 0x5642034cc900] [0mconsecutive B-frames:  8.0%  0.0% 12.0% 80.0%
[1;36m[libx264 @ 0x5642034cc900] [0mmb I  I16..4:  1.5% 86.4% 12.1%
[1;36m[libx264 @ 0x5642034cc900] [0mmb P  I16..4:  1.0% 31.0%  4.8%  P16..4: 25.3% 24.5% 12.6%  0.0%  0.0%    skip: 0.8%
[1;36m[libx264 @ 0x5642034cc900] [0mmb B  I16..4:  0.1%  3.1%  1.1%  B16..8: 38.4% 20.7%  7.2%  direct:12.9%  skip:16.3%  L0:43.3% L1:38.3% BI:18.4%
[1;36m[libx264 @ 0x5642034cc900] [0m8x8 transform intra:82.6% inter:64.4%
[1;36m[libx264 @ 0x5642034cc900] [0mcoded y,uvDC,uvAC intra: 

# 6_production_id_4880458_2160p

In [47]:
# Prepare video with each mask overlayed
video_name = "6_production_id_4880458_2160p"
data_dir = "/home/chuongh/vm2m/data/vis/real_qual"
outdir = "/home/chuongh/vm2m/data/vis/website"
mask_names = ["00.png", "02.png", "05.png", "01.png"]

frame_names = sorted(os.listdir(os.path.join(data_dir, "images", video_name)))

colors = [(255, 0, 255), (255, 255, 0), (0, 165, 255), (255, 255, 255)]
for frame_name in frame_names:
    if int(frame_name.replace(".jpg", "")) < 0 or int(frame_name.replace(".jpg", "")) > 120:
        continue
    frame = cv2.imread(os.path.join(data_dir, "images", video_name, frame_name))
    frame = frame[:, 590:]
    # load all masks
    all_overlays = []
    
    for i, mask_name in enumerate(mask_names):
        mask = cv2.imread(os.path.join(data_dir, "masks", video_name, frame_name.replace(".jpg", ""), mask_name), cv2.IMREAD_GRAYSCALE)
        color = colors[i]
        color = np.array(color)[None, None]
        mask = mask[:, 590:, None]
        mask = (mask > 0) * color
        frame[mask > 0] = mask[mask > 0] * 0.7 + frame[mask > 0] * 0.3
    #     all_overlays.append(overlay)
    # all_overlays = np.concatenate(all_overlays, axis=1)
    all_overlays = resize_padding(frame)

    out_path = os.path.join(outdir, video_name, "video", frame_name)
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    cv2.imwrite(out_path, all_overlays)

In [50]:
crop_sizes = [[250, 555], [435, 915], [755, 1285], [1120, 1672]]
process_frames_with_crops(frame_names, video_name, 0, 120, "instmatt", outdir, mask_names, crop_sizes)

In [51]:
process_frames_with_crops(frame_names, video_name, 0, 120, "sparsemat_hr", outdir, mask_names, crop_sizes)

In [52]:
process_frames_with_crops(frame_names, video_name, 0, 120, "mgm_single_tcvom", outdir, mask_names, crop_sizes)

In [53]:
process_frames_with_crops(frame_names, video_name, 0, 120, "mgm_stacked_tcvom", outdir, mask_names, crop_sizes)

In [54]:
process_frames_with_crops(frame_names, video_name, 0, 120, "ours_tokens", outdir, mask_names, crop_sizes)

In [55]:
for subdir in ['video', 'instmatt', 'sparsemat_hr', 'mgm_single_tcvom', 'mgm_stacked_tcvom', 'ours_tokens']:
    input = os.path.join(outdir, video_name, subdir)
    !ffmpeg -y -framerate 3 -pattern_type glob -i "{input}/*.jpg" -c:v libx264 -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" -pix_fmt yuv420p {input}.mp4

ffmpeg version 4.2.2 Copyright (c) 2000-2019 the FFmpeg developers
  built with gcc 7.3.0 (crosstool-NG 1.23.0.449-a04d0)
  configuration: --prefix=/tmp/build/80754af9/ffmpeg_1587154242452/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeho --cc=/tmp/build/80754af9/ffmpeg_1587154242452/_build_env/bin/x86_64-conda_cos6-linux-gnu-cc --disable-doc --enable-avresample --enable-gmp --enable-hardcoded-tables --enable-libfreetype --enable-libvpx --enable-pthreads --enable-libopus --enable-postproc --enable-pic --enable-pthreads --enable-shared --enable-static --enable-version3 --enable-zlib --enable-libmp3lame --disable-nonfree --enable-gpl --enable-gnutls --disable-openssl --enable-libopenh264 --enable-libx264
  libavutil      56. 31.100 / 56. 31.100
  libavcodec     58. 54.100 / 58. 54.100
  libavformat    58. 29.100 / 58. 29.100
