In [None]:
import os
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt
from moviepy.editor import VideoFileClip, concatenate_videoclips
from typing import Dict, Union
import ast
import operator as op
import ffmpeg

In [None]:
ROOT_DIR = os.path.dirname(os.getcwd())
DATA_FOLDER = os.path.join(ROOT_DIR, "data")
QUMRAN_FOLDER = "/media/jakki/Qumran/Videos/meow_tmp/example_matsi/left"

In [None]:
#files = glob.glob(f'{os.path.join(DATA_FOLDER, "example_videos_left")}/*.mp4')
files = sorted(glob.glob(f'{QUMRAN_FOLDER}/*.MP4'))

In [None]:
files

In [None]:
float(ffmpeg.probe(files[0])["format"]["duration"])

In [None]:
def frame_similarity(frame1, frame2):
    
    gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

    frame_height = gray1.shape[0]
    frame_width = gray1.shape[1]
    
    n_pixels = frame_height * frame_width

    diff_frame = cv2.absdiff(frame1, frame2)
    
    thresh_frame = cv2.threshold(src=diff_frame, thresh=50, maxval=255, type=cv2.THRESH_BINARY)[1]
    
    difference = np.sum(thresh_frame)
    difference_per_pixel = difference / n_pixels
    print(difference)
    print(f"Difference in %: {difference_per_pixel}")
    
    return difference_per_pixel

In [None]:
allowed_operators = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul,
                     ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor,
                     ast.USub: op.neg}

def eval_expr(expr):
    """
    >>> eval_expr('2^6')
    4
    >>> eval_expr('2**6')
    64
    >>> eval_expr('1 + 2*3**(4^5) / (6 + -7)')
    -5.0
    """
    return eval_(ast.parse(expr, mode='eval').body)

def eval_(node):
    if isinstance(node, ast.Num):  # <number>
        return node.n
    elif isinstance(node, ast.BinOp):  # <left> <operator> <right>
        return allowed_operators[type(node.op)](eval_(node.left), eval_(node.right))
    elif isinstance(node, ast.UnaryOp):  # <operator> <operand> e.g., -1
        return allowed_operators[type(node.op)](eval_(node.operand))
    else:
        raise TypeError(node)

def get_video_info(video_path) -> Dict[str, Union[int, float, str]]:
    probe = ffmpeg.probe(video_path)
    file_path = str(probe['format']['filename'])
    video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
    width = int(video_stream['width'])
    height = int(video_stream['height'])
    duration = float(probe['format']['duration'])
    frame_rate = round(eval_expr(video_stream['avg_frame_rate']))
    # Frame rate removed in notebook because cannot import relative local utils
    
    return {
        "file_path" : file_path,
        "frame_height": height,
        "frame_width": width,
        "frame_rate": frame_rate,
        "duration": duration
    }

In [None]:
def get_last_frame(capture, duration):
    # Dirty hack to get to last 3 seconds to avoid reading to whole video file
    capture.set(cv2.CAP_PROP_POS_MSEC,(duration-3)*1000)
    last_frame = None
    while True:
        ret, tmp_frame = capture.read()
        if not ret:
            break
        last_frame = tmp_frame
        
    success = last_frame is not None
    return success, last_frame

In [None]:
frames = {}

for file_path in files:
    
    video_info = get_video_info(file_path)
    duration = video_info['duration']
    print(duration)
    
    capture = cv2.VideoCapture(file_path)
    
    # Read first frame
    capture.set(cv2.CAP_PROP_POS_FRAMES, 0)
    ret_first, first_frame = capture.read()

    # Read last frame
    ret_last, last_frame = get_last_frame(capture, duration)
    
    if ret_first is False or ret_last is False:
        print(f"Failed to read frame from video {file_path}, first frame: {ret_first}, last frame: {ret_last}")
    
    frames[file_path] = {
        'first_frame': first_frame,
        'last_frame': last_frame
    }
    
    capture.release()

In [None]:
for key, values in frames.items():
    plt.imshow(values['first_frame'])
    plt.show()
    plt.imshow(values['last_frame'])
    plt.show()

In [None]:
# Compare frames
n_files = len(frames)
similarity_matrix = {}

for i, pair1 in enumerate(frames.items()):
    for j, pair2 in enumerate(frames.items()):
        if i < j:
            key1, values1 = pair1
            key2, values2 = pair2
                                    
            first_vs_last = frame_similarity(values1['first_frame'], values2['last_frame'])      
            last_vs_first = frame_similarity(values1['last_frame'], values2['first_frame'])
            
            similarity_matrix[(key1, key2)] = [first_vs_last, last_vs_first]

print(similarity_matrix)

In [None]:
similarity_values = sorted([item for value in similarity_matrix.values() for item in value])
print(similarity_values)

In [None]:
plt.figure(figsize=(16, 16))
plt.hist(similarity_values, bins=400)
plt.show()

In [None]:
print(len([value for value in similarity_values if value < 5]))

In [None]:
def add_edge(mapping, node1, node2):
    if node1 in mapping:
        mapping[node1].append(node2)
    else:
        mapping[node1] = [node2]
        
def find_linking(mapping):
    
    valid_linking = check_linking_valid(mapping)
    if valid_linking is False:
        print("Cannot find linked list")
        return None
    
    childs = [value[0] for value in mapping.values()]
    keys = list(mapping.keys())
    
    root_node = list(set(keys) - set(childs))[0]
    order = [root_node]
    
    next_node = None
    last_node = root_node
    for _ in range(len(mapping)):
        next_node = mapping[last_node][0]
        order.append(next_node)
        last_node = next_node
        
    return order
        
def check_linking_valid(mapping):
    """We want to check that
        a) there is only one root node
        b) the chain is not broken eg. each node links to another node
        c) there is only one child node that doesn't exists in keys
        d) each node has exactly one parent except root node and each node has exactly one child
    """
    
    childs = [value[0] for value in mapping.values()]
    keys = list(mapping.keys())
        
    # a)
    
    root_nodes = list(set(keys) - set(childs))
    if len(root_nodes) != 1:
        return False
    
    # b)
    
    visited_nodes = []
    last_node = root_nodes[0]
    next_node = None
    for _ in range(len(mapping)):
        next_nodes = mapping[last_node]
        assert len(next_nodes) == 1
        next_node = next_nodes[0]
        visited_nodes.append(next_node)
        last_node = next_node
        visited_nodes
        
    if len(visited_nodes) != len(mapping):
        return False
    
    # c)
    
    leafs = list(set(childs) - set(keys))
    if len(leafs) != 1:
        return False
    
    return True

In [None]:
threshold = 10

video_linking = {}

for key, values in similarity_matrix.items():
    key1 = key[0]
    key2 = key[1]
    value1 = values[0]
    value2 = values[1]
    
    if value1 < threshold:
        add_edge(video_linking, key2, key1)
    if value2 < threshold:
        add_edge(video_linking, key1, key2)

In [None]:
video_linking

In [None]:
correct_linking = find_linking(video_linking)
print(correct_linking)

In [None]:
def concatenate_video_clips(video_file_paths):
    clips = [VideoFileClip(file) for file in video_file_paths]
    final_clip = concatenate_videoclips(clips)
    return final_clip

In [None]:
concatenated_clip = concatenate_video_clips(correct_linking)
print(concatenated_clip.fps)

In [None]:
concatenated_clip.audio.to_soundarray()