In [1]:
import cv2
import os
import shutil
import ffmpeg
import numpy as np
import subprocess
from typing import Union
import PIL
from skimage.metrics import structural_similarity as ssim
from skimage.transform import resize
import json
from concurrent.futures import ProcessPoolExecutor

In [2]:
unprocessed_video_path = 'Tears_of_Steel_1080p.mov'

def get_video_info(video_path):
    # FFprobe command to get specific video information in JSON format
    ffprobe_cmd = [
        'ffprobe', '-v', 'error', '-print_format', 'json', '-show_format',
        '-show_streams', '-select_streams', 'v:0', video_path
    ]

    # Execute FFprobe command and capture output
    result = subprocess.run(ffprobe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    # Check if FFprobe command was successful
    if result.returncode != 0:
        print("Error: Failed to execute FFprobe command")
        return None

    # Parse JSON output
    video_info = json.loads(result.stdout)

    # Extract specific information
    info_dict = {
        'codec_name': video_info['streams'][0]['codec_name'],
        'codec_tag_string': video_info['streams'][0]['codec_tag_string'],
        'width': video_info['streams'][0]['width'],
        'height': video_info['streams'][0]['height'],
        'average_frame_rate': video_info['streams'][0]['avg_frame_rate'],
        'duration': video_info['format']['duration'],
        'bit_rate': video_info['format']['bit_rate'],
        'bits_per_raw_sample': video_info['streams'][0].get('bits_per_raw_sample', None),
        'encoder': video_info['streams'][0].get('encoder', None),
        'file_name': video_info['format']['filename'],
        'format_name': video_info['format']['format_name'],
        'size': video_info['format']['size']
    }

    return info_dict

unprocessed_video_info = get_video_info(unprocessed_video_path)
unprocessed_video_info

{'codec_name': 'h264',
 'codec_tag_string': 'avc1',
 'width': 1920,
 'height': 800,
 'average_frame_rate': '24/1',
 'duration': '734.166667',
 'bit_rate': '6361215',
 'bits_per_raw_sample': '8',
 'encoder': None,
 'file_name': 'Tears_of_Steel_1080p.mov',
 'format_name': 'mov,mp4,m4a,3gp,3g2,mj2',
 'size': '583774083'}

In [3]:
# TODO: simplify by using ffmpeg filter scene, also does not save last scene, irrelevant for now but maybe I can fix it
def split_video_into_scenes(video_path: str, threshold: int = 100, output_format: str = 'avi', vcodec: str = 'huffyuv', max_scenes: int = None) -> str:
    """
    Split a video into scenes based on the difference between consecutive frames.

    Args:
    - video_path (str): Path to the input video file.
    - threshold (int): Threshold value for detecting scene changes. Default is 100.
    - output_format (str): Output video format for the scenes. Default is 'avi'.
    - vcodec (str): Video codec for the output videos. Default is 'huffyuv'.
    - max_scenes (int): Maximum number of scenes to extract. Default is None (extract all scenes).

    Returns:
    - str: Path to the folder containing the extracted scenes.

    This function reads a video file frame by frame, calculates the absolute difference between consecutive frames,
    and detects scene changes based on the mean difference exceeding a threshold. It saves each scene as a separate
    video file in a folder named after the input video file.

    """

    # Extract folder path and create the folder if it doesn't exist
    folder_path, _ = video_path.rsplit('.', 1)
    if os.path.exists(folder_path):
        shutil.rmtree(folder_path)
    os.mkdir(folder_path)

    # Open the video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    prev_frame = None
    scene_start = 0
    scene_number = 1
    scenes_extracted = 0

    # Parse the video frame by frame
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if prev_frame is not None:
            # Calculate absolute difference between frames
            diff = cv2.absdiff(prev_frame, frame)
            mean_diff = diff.mean()
            # Detect scene changes based on mean difference exceeding the threshold
            if mean_diff > threshold:
                # Get times of scene start and end, and save new video between those times
                scene_end = cap.get(cv2.CAP_PROP_POS_MSEC)
                # Save scene using ffmpeg
                output_file = os.path.join(folder_path, f"scene_{scene_number}.{output_format}")
                ffmpeg.input(video_path, ss=scene_start/1000, to=scene_end/1000).output(output_file, vcodec=vcodec).run()
                scene_start = scene_end
                scene_number += 1
                scenes_extracted += 1
                if max_scenes is not None and scenes_extracted >= max_scenes:
                    break

        prev_frame = frame.copy()

    cap.release()

    return folder_path

scene_format = 'avi'
scene_codec = 'huffyuv'
max_scenes = 5
scene_similarity_threshold = 45

video_folder = split_video_into_scenes(
    unprocessed_video_path, 
    scene_similarity_threshold, 
    scene_format, 
    scene_codec,
    max_scenes
)

ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-5)
  configuration: --prefix=/home/itec/emanuele/.conda/envs/inpainting --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass --enable-pthreads --enable-vaapi --enable-libopenvino --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --

In [4]:
def encode_video(input_file, output_file, vcodec='libx265', resolution=None, preset=None, bitrate=None, crf=None):
    
    # delete folder is if exists, then create it
    output_folder, _ = output_file.rsplit('/', 1)
    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)
    os.mkdir(output_folder)
    
    try:
        command = ['ffmpeg', '-y', '-i', input_file, '-c:v', vcodec]

        if resolution:
            resolution_str = f'{resolution[0]}:{resolution[1]}'
            command.extend(['-vf', f'scale={resolution_str}:force_original_aspect_ratio=increase,crop={resolution_str}'])

        if crf:
            command.extend(['-crf', crf, output_file])
        elif preset:
            command.extend(['-preset', preset, output_file])
        elif bitrate:
            command.extend(['-b:v', bitrate, output_file])
        
        subprocess.run(command)
        print("Video encoding successful!")
    except subprocess.CalledProcessError as e:
        print(f"Video encoding failed: {e}")

scene_number = 5
encoded_format = 'mp4'
resolution = (800, 600)
unprocessed_video_path = f'{video_folder}/scene_{scene_number}.{scene_format}'
video_folder = f'{video_folder}/scene_{scene_number}'
encoded_video_path = f'{video_folder}/encoded.{encoded_format}'

encode_video(
    unprocessed_video_path, 
    encoded_video_path, 
    vcodec = 'libx264', 
    resolution = resolution, 
    crf='0'
)

encoded_video_info = get_video_info(encoded_video_path)
encoded_video_info

ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-5)
  configuration: --prefix=/home/itec/emanuele/.conda/envs/inpainting --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass --enable-pthreads --enable-vaapi --enable-libopenvino --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --

Video encoding successful!


[out#0/mp4 @ 0x56475da79f40] video:58950kB audio:243kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.016966%
frame=  367 fps= 79 q=-1.0 Lsize=   59203kB time=00:00:15.27 bitrate=31743.0kbits/s dup=1 drop=0 speed=3.29x    
[libx264 @ 0x56475da64300] frame I:2     Avg QP: 0.00  size:270590
[libx264 @ 0x56475da64300] frame P:365   Avg QP: 0.00  size:163900
[libx264 @ 0x56475da64300] mb I  I16..4: 23.1%  5.0% 71.8%
[libx264 @ 0x56475da64300] mb P  I16..4:  2.2%  3.6%  6.0%  P16..4: 34.1% 23.4% 26.0%  0.0%  0.0%    skip: 4.6%
[libx264 @ 0x56475da64300] 8x8 transform intra:29.4% inter:53.6%
[libx264 @ 0x56475da64300] coded y,uvDC,uvAC intra: 97.0% 94.0% 93.9% inter: 90.1% 81.5% 81.2%
[libx264 @ 0x56475da64300] i16 v,h,dc,p: 52% 47%  1%  0%
[libx264 @ 0x56475da64300] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 54% 40%  2%  0%  1%  1%  1%  1%  1%
[libx264 @ 0x56475da64300] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 44% 34%  4%  2%  4%  4%  3%  2%  2%
[libx264 @ 0x56475da64300] i8c dc,h,v,p:  4% 

{'codec_name': 'h264',
 'codec_tag_string': 'avc1',
 'width': 800,
 'height': 600,
 'average_frame_rate': '24/1',
 'duration': '15.291667',
 'bit_rate': '31716185',
 'bits_per_raw_sample': '8',
 'encoder': None,
 'file_name': 'Tears_of_Steel_1080p/scene_5/encoded.mp4',
 'format_name': 'mov,mp4,m4a,3gp,3g2,mj2',
 'size': '60624169'}

In [16]:
def split_video_into_frames(video_path: str, frames_folder: str) -> str:
    
    # delete folder is if exists, then create it
    if os.path.exists(frames_folder):
        shutil.rmtree(frames_folder)
    os.mkdir(frames_folder)

    subprocess.run(f'ffmpeg -i {video_path} {frames_folder}/frame%04d.png', shell=True)
    return frames_folder

def split_image_into_squares(image: np.array, l: int) -> np.array:
    """
    Split an image into squares of a specific size.

    Args:
    - image: numpy array representing the image with shape [n, m, c]
    - l: integer representing the side length of each square

    Returns:
    - numpy array with shape [n//l, m//l, l, l, c] containing the squares
    """
    n, m, c = image.shape
    num_rows = n // l
    num_cols = m // l
    squares = np.zeros((num_rows, num_cols, l, l, c), dtype=image.dtype)
    for i in range(num_rows):
        for j in range(num_cols):
            squares[i, j] = image[i*l:(i+1)*l, j*l:(j+1)*l, :]
    return squares

def filter_squares(squares: np.array, filter_factor: int, inpaint_white: bool = True) -> Union[np.array, np.array]:
    """
    Filter squares based on their indices.

    Args:
    - squares: numpy array with shape [n, m, l, l, c] containing the squares

    Returns:
    - filtered_squares: numpy array with shape [n_filtered, m_filtered, l, l, c] containing the filtered squares
    - filter_mask: numpy array with shape [n, m] indicating which blocks were kept (1) or filtered out (0)
    """
    n, m, _, _, _ = squares.shape
    if inpaint_white:
        filter_mask = np.ones(squares.shape, dtype=int) * 255
        filter_mask[::filter_factor, ::filter_factor] = 0
    else:
        filter_mask = np.zeros(squares.shape, dtype=int)
        filter_mask[::filter_factor, ::filter_factor] = 255

    filtered_squares = squares[::filter_factor, ::filter_factor]

    return filtered_squares, filter_mask

def flatten_squares_into_image(squares: np.array) -> np.array:
    """
    Reconstruct the original image from split squares.

    Args:
    - squares: numpy array with shape [n, m, l, l, c] containing the split squares

    Returns:
    - numpy array representing the reconstructed image
    """
    n, m, l, _, c = squares.shape
    num_rows = n * l
    num_cols = m * l
    image = np.zeros((num_rows, num_cols, c), dtype=squares.dtype)
    for i in range(n):
        for j in range(m):
            image[i*l:(i+1)*l, j*l:(j+1)*l, :] = squares[i, j]
    return image

def save_image(frame: np.array, output_folder: str, file_name: str) -> None:

    # Create the output directory if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    # save filtered frame, overwrite if it exists
    cv2.imwrite(output_folder + '/' + file_name, frame, [int(cv2.IMWRITE_PNG_COMPRESSION), 0])

def process_frame_server_side(frame_name, frames_folder, square_size, filter_factor):
    frame = cv2.imread(frames_folder + '/' + frame_name)
    squared_frame = split_image_into_squares(frame, square_size)
    filtered_squares, mask_squares = filter_squares(squared_frame, filter_factor)
    filtered_flattened = flatten_squares_into_image(filtered_squares)
    mask_flattened = flatten_squares_into_image(mask_squares)
    parent_folder, _ = frames_folder.rsplit('/', 1)
    save_image(filtered_flattened, parent_folder + '/' + 'shrunk', frame_name)
    save_image(mask_flattened, parent_folder + '/' + 'masks', frame_name)
    return frame_name

def process_frames_in_parallel(frames_folder, processing_function, num_processes, **kwargs):
    frame_names = [frame_name for frame_name in os.listdir(frames_folder) if frame_name.endswith('.png')]
    
    with ProcessPoolExecutor(max_workers=num_processes) as executor:
        results = []
        for frame_name in frame_names:
            results.append(executor.submit(processing_function, frame_name, frames_folder, **kwargs))
        
        # Retrieve results
        processed_frame_names = [future.result() for future in results]

    return processed_frame_names

def reconstruct_video_from_frames(frames_folder, output_video_path, frame_rate=30):

    # If the video file exists, delete it
    if os.path.exists(output_video_path):
        os.remove(output_video_path)

    # Construct ffmpeg command
    cmd = ['ffmpeg', '-framerate', str(frame_rate), '-pattern_type', 'glob', '-i', f'{frames_folder}/*.png',
                  '-c:v', 'libx264', '-pix_fmt', 'yuv420p', output_video_path]

    # Run ffmpeg command
    subprocess.run(cmd)

encoded_frames_folder, _ = encoded_video_path.rsplit('.', 1)
split_video_into_frames(encoded_video_path, encoded_frames_folder)
square_size = 20
filter_factor = 2
num_processes = 16
processed_frame_names = process_frames_in_parallel(
    encoded_frames_folder, 
    process_frame_server_side, 
    num_processes,
    square_size=square_size, 
    filter_factor=filter_factor
)
frame_rate = eval(unprocessed_video_info['average_frame_rate'])
reconstruct_video_from_frames(video_folder + '/shrunk', video_folder + '/shrunk.mp4', frame_rate)

ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-5)
  configuration: --prefix=/home/itec/emanuele/.conda/envs/inpainting --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass --enable-pthreads --enable-vaapi --enable-libopenvino --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --

In [6]:
def encode_video(input_video_path: str, output_video_path: str, codec: str, bitrate: str = None, crf: int = None) -> Union[str, None]:
    """
    Encode a video file with the specified codec and bitrate.

    Args:
    - input_video_path: Path to the input video file.
    - output_video_path: Path to save the output video.
    - codec: Codec to use for encoding (default: libx264).
    - bitrate: Bitrate for the output video (default: '10M' for 10 Mbps).

    Returns:
    - A warning string if the parameters are not correctly selected
    - None otherwise
    """

    # If the video file exists, delete it
    if os.path.exists(output_video_path):
        os.remove(output_video_path)
    
    if bitrate:
        command = ['ffmpeg', '-y', '-i', input_video_path, '-c:v', codec, '-b:v', bitrate, output_video_path]
    elif crf:
        command = ['ffmpeg', '-y', '-i', input_video_path, '-c:v', codec, '-crf', str(crf), output_video_path]
    else:
        return 'Please specify either a target bitrate or a compression rate factor'
    subprocess.run(command)

def calculate_mse(img1, img2):
    err = np.sum((img1.astype("float") - img2.astype("float")) ** 2)
    err /= float(img1.shape[0] * img1.shape[1])
    return err

def calculate_psnr(img1, img2):
    mse = calculate_mse(img1, img2)
    if mse == 0:
        return float('inf')
    PIXEL_MAX = 255.0
    return 20 * np.log10(PIXEL_MAX / np.sqrt(mse))

def calculate_ssim(img1, img2):
    img1_gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    img2_gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
    return ssim(img1_gray, img2_gray)

def calculate_vmaf(distorted_video_path, original_video_path, model_path):
    command = [
        'ffmpeg',
        '-i', distorted_video_path,
        '-i', original_video_path,
        '-filter_complex libvmaf -f null -'
    ]
    result = subprocess.run(' '.join(command), shell=True, capture_output=True, text=True)
    print(result)
    output = result.stdout
    print(output)
    vmaf_score = json.loads(output)['aggregate']['VMAF_score']
    return vmaf_score

def compare_videos(video1_path, video2_path, model_path):
    cap1 = cv2.VideoCapture(video1_path)
    cap2 = cv2.VideoCapture(video2_path)
    frame_count = min(int(cap1.get(cv2.CAP_PROP_FRAME_COUNT)), int(cap2.get(cv2.CAP_PROP_FRAME_COUNT)))
    mse_total = 0
    psnr_total = 0
    ssim_total = 0
    # vmaf_score = calculate_vmaf(video1_path, video2_path, model_path)
    vmaf_score = 0
    for _ in range(frame_count):
        ret1, frame1 = cap1.read()
        ret2, frame2 = cap2.read()
        if not (ret1 and ret2):
            break
        mse_total += calculate_mse(frame1, frame2)
        psnr_total += calculate_psnr(frame1, frame2)
        ssim_total += calculate_ssim(frame1, frame2)
    cap1.release()
    cap2.release()
    mse_avg = mse_total / frame_count
    psnr_avg = psnr_total / frame_count
    ssim_avg = ssim_total / frame_count
    return mse_avg, psnr_avg, ssim_avg, vmaf_score

In [17]:
# CLIENT SIDE

def stretch_frame(frame: np.array, mask: np.array, filter_factor: int) -> np.array:
    
    '''
    Arguments:
    - frame: numpy array with shape [n, m, l, l, c] containing the squares
    - mask: numpy array with shape [N, M, l, l, c] containing [n//l, m//l] white squares and [N - n//l, M - m//l] black squares
    - l: an integer specifying the side in pixel of a square

    Returns:
    - stretched_frame: numpy array with shape [N, M, l, l, c] where the squares of frame are substituted to the white squares of mask
    '''

    # Get dimensions
    n, m, _, _, _ = frame.shape
    N, _, _, _, _ = mask.shape

    stretched_frame = np.copy(mask)

    for i in range(n):
        for j in range(m):
            # iterate through each frame block
            frame_block = frame[i, j, :, :, :]
            # put it into the corresponding mask white block by multiplying its indices by the filter factor
            stretched_frame[int(i * filter_factor), int(j * filter_factor), :, :, :] = frame_block
    return stretched_frame

def process_frame_client_side(frame_name, frames_folder, square_size, mask_squares, filter_factor):
    frame = cv2.imread(frames_folder + '/' + frame_name)
    squares = split_image_into_squares(frame, square_size)
    stretched_squares = stretch_frame(squares, mask_squares, filter_factor)
    stretched_flattened = flatten_squares_into_image(stretched_squares)
    parent_folder, _ = frames_folder.rsplit('/', 1)
    save_image(stretched_flattened, parent_folder + '/stretched', frame_name)
    return frame_name

square_size = 20
filter_factor = 2
num_processes = 16

# TODO: now we're passing a predetermined mask, needs a function that takes as input one or more masks, 
# and yields one mask after another then loops back from the first, each time it's called
mask_frame_path = video_folder + '/masks/frame0001.png'
mask_frame = cv2.imread(mask_frame_path)
mask_squares = split_image_into_squares(mask_frame, square_size)
shrunk_frames_folder = video_folder + '/shrunk'
processed_frame_names = process_frames_in_parallel(
    shrunk_frames_folder, 
    process_frame_client_side, 
    num_processes,
    square_size=square_size, 
    mask_squares=mask_squares, 
    filter_factor=filter_factor
)
reconstruct_video_from_frames(video_folder + '/stretched', video_folder + '/stretched.mp4', frame_rate)

ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 12.3.0 (conda-forge gcc 12.3.0-5)
  configuration: --prefix=/home/itec/emanuele/.conda/envs/inpainting --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1706918361713/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass --enable-pthreads --enable-vaapi --enable-libopenvino --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --

In [None]:







# inpaint with ProPainter

# def inpaint_with_Propainter(
#         input_path: str, 
#         mask_path: str, 
#         neighbor_length: int = 10, 
#         ref_stride: int = 10, 
#         resize_ratio: float = 1.0, 
#         output_height: int = None, 
#         output_width: int = None
# )

propainter_input_video = scene_name + 'stretched.mp4'
propainter_input_mask = mask_frame_path
# TODO: make into function with optional parameters mentioned in repo readme
cmd = f'python inference_propainter.py --video {propainter_input_video} --mask {propainter_input_mask} --height {height} --width {width}'

# compare inpainting with original

# Compare videos
inpainted = '/home/itec/emanuele/ProPainter/results/stretched/inpaint_out.mp4'
# TODO: instead of inpainting from original, we need to inpaint from the encoded version the client receives to be precise
original = '/home/itec/emanuele/ProPainter/inputs/video_completion/stretched.mp4'
model_path = '/home/shared/athena/vmaf/model/vmaf_v0.6.1.pkl'
mse_avg, psnr_avg, ssim_avg, vmaf_score = compare_videos(original, inpainted, model_path)
print("Average MSE:", mse_avg)
print("Average PSNR:", psnr_avg)
print("Average SSIM:", ssim_avg)
print("VMAF Score:", vmaf_score) # TODO: vmaf still does not work

# TODO: either merge the requirements of each inpainting model, or move this code to a script 
#       so that the environment can be changed during execution (probably a mess to implement)

# TODO: fix the results of this model, reimplement old one, find new ones, especially for video inpainting.

# TODO: develop a model that trains on videos with masked portions, and outputs the goodness of fit to the original. 
#       So it learns to recognize which masks would allow a certain video to be inpainted with minimal error.
#       The step further would be to have another model training on videos, and outputting the best mask for them.

# TODO: find or create metric that measures how "generated" an image or video looks. 
#       It should realize which artefacts are typical of GenAI and penalize them, or in a simpler form, 
#       penalize also cartoons and such, but then it can only be applied to real videos.

# TODO: we can use animation videos to improve quality and train our own network with only a few animation videos 
#       to showcase the potential of the approach, and then say that a bigger, more general model can reach this results on any video

# TODO: I don't like having functions that return nothing. Can they be included in other functions, or written outside of a function?

# TODO: recognize objects, generate masks that follow them, attribute a fidelity coefficient to each object, 
#       inpaint each object at different qualities, based on their fidelity coefficients. 
#       To do this with current model, inpaint the video once per object, then patch together all inpaintings. 
#       This is slow, but will offer the same result as a single, multi-inpainting model.

# TODO: split into blocks, calculate quality of each block against the uncompressed, average them

# TODO: multithread the processing to speed up