In [None]:
import os
import math
import numpy as np
import ffmpeg
import cv2
import imutils
from imutils.video import count_frames
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [None]:
ROOT_DIR = os.path.dirname(os.getcwd())
DATA_FOLDER = os.path.join(ROOT_DIR, "data")

In [None]:
video_L_path = os.path.join(DATA_FOLDER, "keparoicam_clipL_synchronized.mp4")
video_R_path = os.path.join(DATA_FOLDER, "keparoicam_clipR_synchronized.mp4")

In [None]:
video_left_capture = cv2.VideoCapture(video_L_path)
video_right_capture = cv2.VideoCapture(video_R_path)

In [None]:
left_n_frames = int(video_left_capture.get(cv2.CAP_PROP_FRAME_COUNT))
right_n_frames = int(video_right_capture.get(cv2.CAP_PROP_FRAME_COUNT))

print(left_n_frames)
print(right_n_frames)

total_frames = min(left_n_frames, right_n_frames)
print(total_frames)

left_width = int(video_left_capture.get(cv2.CAP_PROP_FRAME_WIDTH)) 
left_height = int(video_left_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
left_fps = video_left_capture.get(cv2.CAP_PROP_FPS)

right_width = int(video_right_capture.get(cv2.CAP_PROP_FRAME_WIDTH)) 
right_height = int(video_right_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
right_fps = video_right_capture.get(cv2.CAP_PROP_FPS)

print(left_width)
print(left_height)
print(left_fps)

print(right_width)
print(right_height)
print(right_fps)

In [None]:
final_fps = 60.0
final_height = 1080
final_width = 1920
fourcc = cv2.VideoWriter_fourcc('M','J','P','G')

In [None]:
video_path = os.path.join(DATA_FOLDER, "example_keparoiCam_of.avi")
video_output = cv2.VideoWriter(video_path, fourcc, final_fps, (final_width,final_height))

In [None]:
def equalize_histogram(rgb_image):
    r_image, g_image, b_image = cv2.split(rgb_image)

    r_image_eq = cv2.equalizeHist(r_image)
    g_image_eq = cv2.equalizeHist(g_image)
    b_image_eq = cv2.equalizeHist(b_image)

    image_eq = cv2.merge([r_image_eq, g_image_eq, b_image_eq])
    return image_eq

In [None]:
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))

def apply_clahe(image):
    image_lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
    image_lab[...,0] = clahe.apply(image_lab[...,0])

    bgr_clahe_image = cv2.cvtColor(image_lab, cv2.COLOR_LAB2BGR)
    rgb_clahe_image = cv2.cvtColor(bgr_clahe_image, cv2.COLOR_BGR2RGB)

    return rgb_clahe_image

In [None]:
def preprocess_image(image, equalize_hist=True, clahe=False):
    if equalize_hist:
        image = equalize_histogram(image)
    if clahe:
        image = apply_clahe(image)
    
    return image

def preprocess_images(images):
    preprocessed_images = []
    
    for image in images:
        preprocessed_image = preprocess_image(image)
        preprocessed_images.append(preprocessed_image)
        
    return preprocessed_images

In [None]:
def calculate_optical_flow(frame, frame2):
    gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    gray1 = cv2.GaussianBlur(gray1, (21, 21), 0)
    
    gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.GaussianBlur(gray2, (21, 21), 0)

    frame_delta = cv2.absdiff(gray1, gray2)

    thresh = cv2.threshold(frame_delta, 25, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.dilate(thresh, None, iterations=2)
    
    ones = thresh == 255
    ones_flat = ones.flatten()

    optical_flow = np.sum(ones_flat)
    
    return optical_flow

def calculate_optical_flow_score_from_mask(mask):
    gray_mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray_mask, 25, 255, cv2.THRESH_BINARY)[1]
    ones = gray_mask == 255
    ones_flat = ones.flatten()
    optical_flow_score = np.sum(ones_flat)
    
    return optical_flow_score

def calculate_dense_optical_flow(frame1, frame2):
    frame1_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    hsv = np.zeros_like(frame1)
    hsv[..., 1] = 255
    
    frame2_gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) if len(frame2.shape) == 3 else frame2
    flow = cv2.calcOpticalFlowFarneback(frame1_gray, frame2_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    hsv[..., 0] = ang * 180 / np.pi / 2
    hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
    bgr_flow = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
    
    optical_flow_score = calculate_optical_flow_score_from_mask(bgr_flow)
    
    return optical_flow_score
    

def calculate_optical_flow_with_background(frame, background_frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame = cv2.GaussianBlur(frame, (5, 5), 0)
    frame_delta = cv2.absdiff(frame, background_frame)

    thresh = cv2.threshold(frame_delta, 25, 255, cv2.THRESH_BINARY)[1]
    # thresh = cv2.dilate(thresh, None, iterations=2)
    
    ones = thresh == 255
    ones_flat = ones.flatten()

    optical_flow = np.sum(ones_flat)
    
    return optical_flow

def calculate_optical_flow_metric_with_background(frames, background_frame):
    n_frames = len(frames)
    n_frames_middle = int(math.floor(n_frames/2))
    
    total_optical_flow = 0
    
    for frame in frames[::5]:
        total_optical_flow += calculate_dense_optical_flow(frame, background_frame)
        
    return total_optical_flow

def calculate_optical_flow_metric(frames):
    n_frames = len(frames)
    n_frames_middle = int(math.floor(n_frames/2))
    
    total_optical_flow = 0
    
    #for frame1, frame2 in zip(frames[0:n_frames_middle], frames[n_frames_middle:n_frames]):
    #    total_optical_flow += calculate_optical_flow(frame1, frame2)
    
    prev = None
    this = None
    for frame in frames[::5]:
        if prev is None:
            prev = frame
            continue
            
        this = frame
        total_optical_flow += calculate_dense_optical_flow(this, prev)
        prev = this
        
    return total_optical_flow
    
    
def write_frames(output_handle, frames):
    for frame in frames:
        output_handle.write(frame)

In [None]:
def estimate_background(video_capture):
    
    random_frames = []
    
    frame_ids = [int(math.floor(id)) for id in video_capture.get(cv2.CAP_PROP_FRAME_COUNT) * np.random.uniform(size=200)]
    
    for frame_id in frame_ids:
        video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
        ret, frame = video_capture.read()
        # frame = cv2.GaussianBlur(frame, (5, 5), 0)
        random_frames.append(frame)

    randon_frames_np = np.array(random_frames)
        
    print(randon_frames_np.shape)
        
    median_frame = np.median(randon_frames_np, axis=0).astype(dtype=np.uint8)   
    
    median_frame = cv2.cvtColor(median_frame, cv2.COLOR_BGR2GRAY)
    
    return median_frame


In [None]:
# Without background estimation

captured_frames = []

optical_flow_window_length = int(math.floor(final_fps / 2))
n_windows = math.floor(total_frames/optical_flow_window_length)

for i in tqdm(range(n_windows)):
    
    left_frames = []
    right_frames = []
    
    for j in range(optical_flow_window_length):
        frame_number = i*optical_flow_window_length + j
        video_left_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        res, frame = video_left_capture.read()
        if res:
            left_frames.append(frame)
        else:
            print("Error reading frame")
    
    for j in range(optical_flow_window_length):
        frame_number = i*optical_flow_window_length + j
        video_right_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        res, frame = video_right_capture.read()
        if res:
            right_frames.append(frame)
        else:
            print("Error reading frame")

    
    left_optical_flow = calculate_optical_flow_metric(left_frames)
    right_optical_flow = calculate_optical_flow_metric(right_frames)
    
    if left_optical_flow > right_optical_flow:
        images_processed = preprocess_images(left_frames)
    else:
        images_processed = preprocess_images(right_frames)

    write_frames(video_output, images_processed)

video_left_capture.release()
video_right_capture.release()
video_output.release()

In [None]:
left_camera_background = estimate_background(video_left_capture)
right_camera_background = estimate_background(video_right_capture)

In [None]:
# With background estimation

captured_frames = []

optical_flow_window_length = int(math.floor(final_fps / 2))
n_windows = math.floor(total_frames/optical_flow_window_length)

# left_camera_background = estimate_background(video_left_capture)
# right_camera_background = estimate_background(video_right_capture)


for i in tqdm(range(n_windows)):
    
    left_frames = []
    right_frames = []
    
    for j in range(optical_flow_window_length):
        frame_number = i*optical_flow_window_length + j
        video_left_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        res, frame = video_left_capture.read()
        if res:
            left_frames.append(frame)
        else:
            print("Error reading frame")
    
    for j in range(optical_flow_window_length):
        frame_number = i*optical_flow_window_length + j
        video_right_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        res, frame = video_right_capture.read()
        if res:
            right_frames.append(frame)
        else:
            print("Error reading frame")

    left_optical_flow = calculate_optical_flow_metric_with_background(left_frames, left_camera_background)
    right_optical_flow = calculate_optical_flow_metric_with_background(right_frames, right_camera_background)
    
    if left_optical_flow > right_optical_flow:
        # images_processed = preprocess_images(left_frames)
        images_processed = left_frames
    else:
        # images_processed = preprocess_images(right_frames)
        images_processed = right_frames
        
    write_frames(video_output, images_processed)

video_left_capture.release()
video_right_capture.release()
video_output.release()

In [None]:
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())


def calculate_humans_from_frames(frames):
    n_of_humans = 0
    
    for frame in frames[::5]:
        # frame = cv2.resize(frame, (640, 480))
        gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        boxes, weights = hog.detectMultiScale(gray, winStride=(8,8) )
        
        #boxes = np.array([[x, y, x + w, y + h] for (x, y, w, h) in boxes])
        #for (xA, yA, xB, yB) in boxes:
        #    cv2.rectangle(frame, (xA, yA), (xB, yB),(0, 255, 0), 2)
        
        n_of_humans += len(boxes)
        
    return n_of_humans
    

In [None]:
# With human detection

captured_frames = []

optical_flow_window_length = int(math.floor(final_fps / 2))
n_windows = math.floor(total_frames/optical_flow_window_length)

# left_camera_background = estimate_background(video_left_capture)
# right_camera_background = estimate_background(video_right_capture)

prev = None

for i in tqdm(range(n_windows)):
    
    left_frames = []
    right_frames = []
    
    for j in range(optical_flow_window_length):
        frame_number = i*optical_flow_window_length + j
        video_left_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        res, frame = video_left_capture.read()
        if res:
            left_frames.append(frame)
        else:
            print("Error reading frame")
    
    for j in range(optical_flow_window_length):
        frame_number = i*optical_flow_window_length + j
        video_right_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        res, frame = video_right_capture.read()
        if res:
            right_frames.append(frame)
        else:
            print("Error reading frame")

    left_n_of_humans = calculate_humans_from_frames(left_frames)
    right_n_of_humans = calculate_humans_from_frames(right_frames)
    
    if left_n_of_humans == right_n_of_humans:
        if prev == 'left':
            left_n_of_humans += 1
        else:
            right_n_of_humans += 1
    
    if left_n_of_humans > right_n_of_humans:
        images_processed = preprocess_images(left_frames)
        prev = "left"
    else:
        images_processed = preprocess_images(right_frames)
        prev = "right"

    write_frames(video_output, images_processed)

video_left_capture.release()
video_right_capture.release()
video_output.release()
