# take video and split it into scenes

In [29]:
import cv2
import os
import shutil
import ffmpeg
import numpy as np
import subprocess
from typing import Union
import PIL

# TODO: I don;t like having functions that return nothing. Can they be included in other functions, or written outside of a function?
    
def save_scene(video_path: str, output_folder: str, start_time: int, end_time: int, scene_number: int) -> None:
    output_file = os.path.join(output_folder, f"scene_{scene_number}.avi")
    ffmpeg.input(video_path, ss=start_time/1000, to=end_time/1000).output(output_file, vcodec='huffyuv').run()

# TODO: add overwrite and simplify by using ffmpeg filter scene, also goes in error at the last scene, irrelevant for now but maybe I can fix it
def split_video_into_scenes(video_path: str, output_folder: str, threshold: int = 100) -> None:

# watch the video, identify scene changes, set threshold accordingly 
        
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    prev_frame = None
    scene_start = 0
    scene_number = 1

    # parse the video frame by frame
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if prev_frame is not None:
            # if frame is different enough from previous, consider it a scene change
            diff = cv2.absdiff(prev_frame, frame)
            mean_diff = diff.mean()
            if mean_diff > threshold:
                # get times of scene start and end, save new video between those times
                scene_end = cap.get(cv2.CAP_PROP_POS_MSEC)
                save_scene(video_path, output_folder, scene_start, scene_end, scene_number)
                scene_start = scene_end
                scene_number += 1

        prev_frame = frame.copy()

    # Save the last scene
    save_scene(video_path, output_folder, scene_start, cap.get(cv2.CAP_PROP_POS_MSEC), scene_number)

    cap.release()

unprocessed_video_path = "Tears_of_Steel_1080p.mov"
parent_video_folder = "Tears_of_Steel_1080p"
os.makedirs(parent_video_folder, exist_ok=True)

# split_video_into_scenes(unprocessed_video_path, parent_video_folder, threshold=45)

In [2]:
scene_path = 'Tears_of_Steel_1080p/scene_5.avi'

def split_video_into_frames(video_path: str) -> None:
    video_name, _ = video_path.rsplit('.', 1)
    # if folder exists, delete it, then create it
    if os.path.exists(video_name):
        shutil.rmtree(video_name)
    os.mkdir(video_name)
    subprocess.run(f'ffmpeg -i {video_path} {video_name}/frame%04d.png', shell=True)

# split_video_into_frames(scene_path)

In [46]:
def crop_image(image: np.array, width: int, height: int) -> np.array:
    """
    Crop the input image to the specified size centered around its center.
    
    Parameters:
        image (numpy.ndarray): Input image represented as a numpy array.
        width (int): Width of the region to be cropped.
        height (int): Height of the region to be cropped.
    
    Returns:
        numpy.ndarray: Cropped image.
    """
    # Calculate coordinates of the top-left corner for cropping
    center_x = image.shape[1] // 2
    center_y = image.shape[0] // 2
    x = max(0, center_x - width // 2)
    y = max(0, center_y - height // 2)
    
    # Crop the image
    cropped_image = image[y:y+height, x:x+width]
    
    return cropped_image

def split_image_into_squares(image: np.array, l: int) -> np.array:
    """
    Split an image into squares of a specific size.

    Args:
    - image: numpy array representing the image with shape [n, m, c]
    - l: integer representing the side length of each square

    Returns:
    - numpy array with shape [n//l, m//l, l, l, c] containing the squares
    """
    n, m, c = image.shape
    num_rows = n // l
    num_cols = m // l
    squares = np.zeros((num_rows, num_cols, l, l, c), dtype=image.dtype)
    for i in range(num_rows):
        for j in range(num_cols):
            squares[i, j] = image[i*l:(i+1)*l, j*l:(j+1)*l, :]
    return squares

def filter_squares(squares: np.array, filter_factor: int, inpaint_white: bool = True) -> Union[np.array, np.array]:
    """
    Filter squares based on their indices.

    Args:
    - squares: numpy array with shape [n, m, l, l, c] containing the squares

    Returns:
    - filtered_squares: numpy array with shape [n_filtered, m_filtered, l, l, c] containing the filtered squares
    - filter_mask: numpy array with shape [n, m] indicating which blocks were kept (1) or filtered out (0)
    """
    n, m, _, _, _ = squares.shape
    if inpaint_white:
        filter_mask = np.ones(squares.shape, dtype=int) * 255
        filter_mask[::filter_factor, ::filter_factor] = 0
    else:
        filter_mask = np.zeros(squares.shape, dtype=int)
        filter_mask[::filter_factor, ::filter_factor] = 255

    filtered_squares = squares[::filter_factor, ::filter_factor]

    return filtered_squares, filter_mask

def flatten_squares_into_image(squares: np.array) -> np.array:
    """
    Reconstruct the original image from split squares.

    Args:
    - squares: numpy array with shape [n, m, l, l, c] containing the split squares

    Returns:
    - numpy array representing the reconstructed image
    """
    n, m, l, _, c = squares.shape
    num_rows = n * l
    num_cols = m * l
    image = np.zeros((num_rows, num_cols, c), dtype=squares.dtype)
    for i in range(n):
        for j in range(m):
            image[i*l:(i+1)*l, j*l:(j+1)*l, :] = squares[i, j]
    return image

# TODO: this is fundamentally only one command, and returns None, probably better to remove the function and just call the command in the loop
def save_image(frame: np.array, output_folder: str, file_name: str) -> None:

    # Create the output directory if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    # save filtered frame, overwrite if it exists
    cv2.imwrite(output_folder + '/' + file_name, frame, [int(cv2.IMWRITE_PNG_COMPRESSION), 0])

square_size = 16
filter_factor = 2
width, height = (512, 512)
scene_name, _ = scene_path.rsplit('.', 1)
for frame_name in os.listdir(scene_name):
    if frame_name.endswith('.png'):
        frame = cv2.imread(scene_name + '/' + frame_name)
        frame = crop_image(frame, width, height)
        save_image(frame, scene_name + '/' + 'cropped_original', frame_name)
        squared_frame = split_image_into_squares(frame, square_size)
        filtered_squares, mask_squares = filter_squares(squared_frame, filter_factor)
        filtered_flattened = flatten_squares_into_image(filtered_squares)
        mask_flattened = flatten_squares_into_image(mask_squares)
        save_image(filtered_flattened, scene_name + '/' + 'reconstructed_filtered', frame_name)
        save_image(mask_flattened, scene_name + '/' + 'reconstructed_filtered_masks', frame_name)

In [47]:
# take all frames and recreate video

def get_frame_rate(video_path: str) -> int:
    """
    Get the frame rate of a video file using OpenCV.

    Args:
    - video_path: Path to the video file.

    Returns:
    - Frame rate of the video file.
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Couldn't open the video file.")
        return None

    frame_rate = cap.get(cv2.CAP_PROP_FPS)
    cap.release()
    return frame_rate

def reconstruct_video_from_frames(input_folder: str, frame_rate: int) -> None:
    """
    Create a lossless video from frames in a folder using OpenCV with FFV1 codec.

    Args:
    - input_folder: Path to the folder containing the frames.
    - frame_rate: Frame rate of the output video.

    Returns:
    - None
    """
    frame_files = sorted(os.listdir(input_folder))
    output_video_path = input_folder + '.avi'

    # If the video file exists, delete it
    if os.path.exists(output_video_path):
        os.remove(output_video_path)

    # Get the first frame to obtain its dimensions
    first_frame_path = os.path.join(input_folder, frame_files[0])
    first_frame = cv2.imread(first_frame_path)
    frame_height, frame_width, _ = first_frame.shape

    # Initialize VideoWriter object with FFV1 codec for lossless compression
    fourcc = cv2.VideoWriter_fourcc(*'FFV1')  # FFV1 codec for lossless compression
    out = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (frame_width, frame_height))

    # Iterate through each frame file and write it to the video
    for frame_file in frame_files:
        frame_path = os.path.join(input_folder, frame_file)
        frame = cv2.imread(frame_path)
        out.write(frame)

    # Release the VideoWriter object
    out.release()

frame_rate = get_frame_rate(scene_path)
reconstruct_video_from_frames(scene_name + '/cropped_original', frame_rate)
reconstruct_video_from_frames(scene_name + '/reconstructed_filtered', frame_rate)


# encode video and check size reduction from encoded original


In [48]:
def encode_video(input_video_path: str, output_video_path: str, codec: str, bitrate: str = None, crf: int = None) -> Union[str, None]:
    """
    Encode a video file with the specified codec and bitrate.

    Args:
    - input_video_path: Path to the input video file.
    - output_video_path: Path to save the output video.
    - codec: Codec to use for encoding (default: libx264).
    - bitrate: Bitrate for the output video (default: '10M' for 10 Mbps).

    Returns:
    - A warning string if the parameters are not correctly selected
    - None otherwise
    """

    # If the video file exists, delete it
    if os.path.exists(output_video_path):
        os.remove(output_video_path)
    
    if bitrate:
        command = ['ffmpeg', '-y', '-i', input_video_path, '-c:v', codec, '-b:v', bitrate, output_video_path]
    elif crf:
        command = ['ffmpeg', '-y', '-i', input_video_path, '-c:v', codec, '-crf', str(crf), output_video_path]
    else:
        return 'Please specify either a target bitrate or a compression rate factor'
    subprocess.run(command)

# constants
codec = 'libx264'
bitrate = '1M'
crf = 23

# encode original
original = scene_name + '/cropped_original.avi'
encoded_original = scene_name + '/encoded_original.mp4'
encode_video(original, encoded_original, codec, crf=20)

# encode filtered
filtered = scene_name + '/reconstructed_filtered.avi'
encoded_filtered = scene_name + '/encoded_filtered.mp4'
encode_video(filtered, encoded_filtered, codec, crf=20)

ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-5)
  configuration: --prefix=/home/itec/emanuele/.conda/envs/inpainting --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass --enable-pthreads --enable-vaapi --enable-libopenvino --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --

In [49]:
# compare sizes
encoded_original_size = os.stat(encoded_original).st_size
encoded_filtered_size = os.stat(encoded_filtered).st_size

print(f'encoded original: {encoded_original_size}')
print(f'encoded filtered size reduction: {1 - encoded_filtered_size/encoded_original_size}%')

# TODO: split into blocks, calculate quality of each block against the uncompressed, average them

encoded original: 3451699
encoded filtered size reduction: 0.6223086080217308%


# Client side

In [50]:
def expand_frame(frame: np.array, mask: np.array) -> np.array:
    
    '''
    Arguments:
    - frame: numpy array with shape [n, m, l, l, c] containing the squares
    - mask: numpy array with shape [N, M, l, l, c] containing [n//l, m//l] white squares and [N - n//l, M - m//l] black squares
    - l: an integer specifying the side in pixel of a square

    Returns:
    - expanded_frame: numpy array with shape [N, M, l, l, c] where the squares of frame are substituted to the white squares of mask
    '''

    # Get dimensions
    n, m, _, _, _ = frame.shape
    N, _, _, _, _ = mask.shape
    filter_factor = N / n

    expanded_frame = np.copy(mask)

    for i in range(n):
        for j in range(m):
            # iterate through each frame block
            frame_block = frame[i, j, :, :, :]
            # put it into the corresponding mask white block by multiplying its indices by the filter factor
            expanded_frame[int(i * filter_factor), int(j * filter_factor), :, :, :] = frame_block
    return expanded_frame

# split encoded video into frames
encoded_filtered_path = scene_name + '/encoded_filtered.mp4'
encoded_filtered_name, _ = encoded_filtered_path.rsplit('.', 1)
split_video_into_frames(encoded_filtered_path)

# TODO: now we're passing a predetermined mask, needs a function that takes as input one or more masks, and yields one mask after another then loops back from the first, each time it's called
mask_frame_path = scene_name + '/reconstructed_filtered_masks/frame0001.png'
mask_frame = cv2.imread(mask_frame_path)
mask_squares = split_image_into_squares(mask_frame, square_size)

# TODO: instead of filtered, shrunk is a more telling name (I put the comment here but it applies to anything I called filtered)
encoded_expanded_folder = scene_name + '/encoded_expanded'

# iterate through frames, expand them, save them
for frame_name in os.listdir(encoded_filtered_name):
    if frame_name.endswith('.png'):
        encoded_filtered_frame_path = encoded_filtered_name + '/' + frame_name
        encoded_filtered_frame = cv2.imread(encoded_filtered_frame_path)
        encoded_filtered_squares = split_image_into_squares(encoded_filtered_frame, square_size)
        expanded_squares = expand_frame(encoded_filtered_squares, mask_squares)
        expanded_frame = flatten_squares_into_image(expanded_squares)
        save_image(expanded_frame, encoded_expanded_folder, frame_name)

reconstruct_video_from_frames(encoded_expanded_folder, frame_rate)

ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-5)
  configuration: --prefix=/home/itec/emanuele/.conda/envs/inpainting --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass --enable-pthreads --enable-vaapi --enable-libopenvino --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --

In [None]:
# inpaint video
import torch
from diffusers import StableDiffusionInpaintPipeline
pipe = StableDiffusionInpaintPipeline.from_pretrained(
    "runwayml/stable-diffusion-inpainting",
    variant='fp16',
    torch_dtype=torch.float16,
)
pipe = pipe.to("cuda")
prompt = ''

mask_frame_path = scene_name + '/reconstructed_filtered_masks/frame0001.png'
mask_frame = PIL.Image.open(mask_frame_path)

inpainted_folder = scene_name + '/inpainted'
os.makedirs(inpainted_folder, exist_ok=True)

for frame_name in os.listdir(encoded_expanded_folder):
    if frame_name.endswith('.png'):
        print(frame_name)
        frame_path = encoded_expanded_folder + '/' + frame_name
        # image and mask_image should be PIL images
        frame = PIL.Image.open(frame_path)
        # The mask structure is white for inpainting and black for keeping as is
        output_image = pipe(prompt=prompt, image=frame, mask_image=mask_frame).images[0]
        output_image.save(inpainted_folder + '/' + frame_name)

reconstruct_video_from_frames(inpainted_folder, frame_rate)

In [52]:
# TODO: check similarity between inpainted video and original