In [1]:
"""
"Computational Augmentation of Dance Videos through Artistic Renderings" semester project

Author: Irina Serenko

In this project the MediaPipe model was used for pose estimation and human segmentation.

Credits: https://github.com/google/mediapipe
         https://google.github.io/mediapipe/solutions/pose#resources
         https://jaantollander.com/post/noise-filtering-using-one-euro-filter/

"""

import math
import numpy as np
import cv2
import pandas as pd
import glob
import os
import warnings
warnings.filterwarnings("ignore")

from drawing_styles_mine import *
from drawing_utils_mine import *

from oneEuro import *

In [2]:
data_editions = pd.read_csv("data/data_performances.csv", sep="\t")

In [3]:
import mediapipe as mp
mp_pose = mp.solutions.pose
#mp_drawing = mp.solutions.drawing_utils 
#mp_drawing_styles = mp.solutions.drawing_styles

In [4]:
DATA_PATH = 'D:/prix_lausanne/videos_cut/'
OUTPUT_PATH = 'results/'

# DATA_PATH = './LausannePrix_newtest/videos_cut/'
# OUTPUT_PATH = './LausannePrix_newtest/Results'

# Number of inpainted frames (alpha parameter should be fine-tuned in case of changing the number of frames)
NUM_INPAINTED_FRAMES_INPAINTING = 19
NUM_INPAINTED_FRAMES_SKELETON = 9

# OneEuro filtering parameters
MIN_CUTOFF = 0.05
BETA = 1.0

# Processing of one video from each edition
testing = False
# Save a video with a skeleton
skeleton = True
# Filter (smoothen) the skeleton movements
lpfilter = True
# Save an inpainting video with a background
inpainting_with_background = True
# Save an inpainting video without background
inpainting_without_background = True
# Save csv with coordinates
csv_coordinates = True

global x_track, y_track

# Create folders
if (not os.path.isdir(OUTPUT_PATH)):
    os.mkdir(OUTPUT_PATH)
if skeleton:
    if (not os.path.isdir(OUTPUT_PATH + '/Skeleton')):
        os.mkdir(OUTPUT_PATH + '/Skeleton')
if inpainting_with_background:
    if (not os.path.isdir(OUTPUT_PATH + '/Inpainting')):
        os.mkdir(OUTPUT_PATH + '/Inpainting')
if inpainting_without_background:
    if (not os.path.isdir(OUTPUT_PATH + '/Inpainting_no_background')):
        os.mkdir(OUTPUT_PATH + '/Inpainting_no_background')
if csv_coordinates:
    if (not os.path.isdir(OUTPUT_PATH + '/Landmarks/')):
            os.mkdir(OUTPUT_PATH + '/Landmarks/')

# Save list of folder names (editions)
editions = [os.path.basename(f.path) for f in os.scandir(DATA_PATH) if f.is_dir()]

In [5]:
# Create a dataframe for landmarks coordinates
if csv_coordinates:
    column_names = ["frame_num", "pose_landmarks", "pose_world_landmarks"]
    df = pd.DataFrame(columns = column_names)

In [None]:
# For video input we are creating a new video with the pose annotation/segmentation mask:

# Create folders for each edition
for edition in editions:
    if skeleton:
        if (not os.path.isdir(OUTPUT_PATH + '/Skeleton/' + edition)):
            os.mkdir(OUTPUT_PATH + '/Skeleton/' + edition)
    if inpainting_with_background:
        if (not os.path.isdir(OUTPUT_PATH + '/Inpainting/' + edition)):
            os.mkdir(OUTPUT_PATH + '/Inpainting/' + edition)
    if inpainting_without_background:
        if (not os.path.isdir(OUTPUT_PATH + '/Inpainting_no_background/' + edition)):
            os.mkdir(OUTPUT_PATH + '/Inpainting_no_background/' + edition)
    if csv_coordinates:
        if (not os.path.isdir(OUTPUT_PATH + '/Landmarks/' + edition)):
            os.mkdir(OUTPUT_PATH + '/Landmarks/' + edition)
    
#     # Get video files list from an edition folder
#     all_videos_from_edition = glob.glob(DATA_PATH + edition + "/*.mp4")
    
    videos_edition = data_editions[data_editions.edition == int(edition.replace("edition",""))].filename.values
    videos_edition = [DATA_PATH + fp for fp in videos_edition]
     
    video_count = 0
    
    for video in videos_edition:
#     for video in all_videos_from_edition:
        
        # Testing (one video from edition)
        if testing:
            if video_count > 0:
                break

        video_input = video
        video_name = os.path.basename(video)

        # Paths and names for each new video
        if skeleton:
            video_output_skeleton = OUTPUT_PATH + '/Skeleton/' + edition + '/skeleton_' + video_name
        if inpainting_with_background:
            video_output_inpainting = OUTPUT_PATH + '/Inpainting/' + edition + '/inpainting_' + video_name
        if inpainting_without_background:
            video_output_inpainting_no_background = OUTPUT_PATH + '/Inpainting_no_background/' + edition + '/inpainting_no_background' + video_name
        
        # Capture the existing input video
        cap = cv2.VideoCapture(video_input)

        # Obtain video information using get() method
        frame_width = int(cap.get(3))
        frame_height = int(cap.get(4))
        frame_size = (frame_width,frame_height)
        fps = int(cap.get(cv2.CAP_PROP_FPS))

        # Initialize video writer objects
        if skeleton:
            output_skeleton = cv2.VideoWriter(video_output_skeleton, cv2.VideoWriter_fourcc(*'mp4v'), fps, frame_size)
        if inpainting_with_background:
            output_inpainting = cv2.VideoWriter(video_output_inpainting, cv2.VideoWriter_fourcc(*'mp4v'), fps, frame_size)
        if inpainting_without_background:
            output_inpainting_no_background = cv2.VideoWriter(video_output_inpainting_no_background, cv2.VideoWriter_fourcc(*'mp4v'), fps, frame_size)
        
        # Number of the frame that is being processed 
        count = 0
        
        
        # Lists of the results from previous frames
        previous_frame_results = []
    
        inpainted_frame_results = []
        inpainted_frames = []
        
        previous_frame_results_filtered = []
        
        inpainted_skeleton_results = []
        
        # MediaPipe model parameters 
        with mp_pose.Pose(
        min_detection_confidence=0.8,
        min_tracking_confidence=0.8,
        #model_complexity = 2,
        enable_segmentation=True) as pose:
            while cap.isOpened():

                # Read the frame
                success, image = cap.read()
                if not success:
                    print(f'Ignoring empty camera frame. Video {video_name} processing finished.')
                    break

                #print(f'Processing frame {count} of the video {video_name}')
                
                # Save frame number to dataframe
                if csv_coordinates:
                    one_row = {"frame_num":[],"pose_landmarks":[],"pose_world_landmarks":[]}    
                    one_row["frame_num"].append(count)

                # To improve performance, optionally mark the image as not writeable to
                # pass by reference.
                image.flags.writeable = False
                # From BGR to RGB
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                # Run the model
                results = pose.process(image)            
                
                # INPAINTING
                if inpainting_with_background or inpainting_without_background:
                    
                    image.flags.writeable = True
                    # From RGB to BGR
                    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  

                    if inpainting_with_background:
                        annotated_image = image.copy()
                    if inpainting_without_background:
                        annotated_image_no_background = np.zeros(shape=image.shape, dtype=np.uint8)
                    
                    # Save each 5th frame and predictions for inpainting    
                    if (count % 5 == 0):
                        cur_res = results
                        mask_dancer_cur = cur_res.segmentation_mask
                        if mask_dancer_cur is not None:
                            inpainted_frame_results.append(cur_res)
                            inpainted_frames.append(image)

                    # Inpainting of the previous 20 frames with the rising transparency level
                    
                    alpha = 0.04
                    for num_frame in range(NUM_INPAINTED_FRAMES_INPAINTING, 0, -1):
                        if len(inpainted_frame_results) > num_frame:
                            cur_res = inpainted_frame_results[-num_frame]
                            mask_dancer_cur = cur_res.segmentation_mask
                            if mask_dancer_cur is None:
                                continue
                            mask_dancer_cur = np.repeat(mask_dancer_cur[..., np.newaxis], 3, axis=2)
                            previous_frame_image = inpainted_frames[-num_frame]
                            mask_background_cur = (1 - mask_dancer_cur)
                            dancer = previous_frame_image * mask_dancer_cur
                            if inpainting_with_background:
                                background = annotated_image * mask_background_cur
                                background_behind_dancer = image * mask_dancer_cur
                                annotated_image = np.uint8(background + dancer * alpha + background_behind_dancer * (1 - alpha))
                            if inpainting_without_background:
                                background = annotated_image_no_background * mask_background_cur
                                annotated_image_no_background = np.uint8(background + dancer * alpha)
                            alpha += 0.04
                        else:
                            alpha += 0.04
                            continue

                    # Current frame: inpaint a dancer from the current frame or empty the list of frames to inpaint if the current prediction result is None 
                    global_frame_mask = results.segmentation_mask
                    if global_frame_mask is not None:
                        global_frame_mask = np.repeat(global_frame_mask[..., np.newaxis], 3, axis=2)
                        global_background_mask = (1 - global_frame_mask)
                        dancer = image * global_frame_mask
                        if inpainting_with_background:
                            background = annotated_image * global_background_mask
                            annotated_image = np.uint8(background + dancer)
                        if inpainting_without_background:
                            background = annotated_image_no_background * global_background_mask
                            annotated_image_no_background = np.uint8(background + dancer)
                    else:
                        if inpainting_with_background:
                            annotated_image = image
                        if inpainting_without_background:
                            annotated_image_no_background = np.zeros(shape=image.shape, dtype=np.uint8)
                        inpainted_frame_results = []
                        inpainted_frames = []

                # Write the resulting frames to video files
                if inpainting_with_background:
                    output_inpainting.write(annotated_image)
                if inpainting_without_background:
                    output_inpainting_no_background.write(annotated_image_no_background)
                    
                previous_frame_results.append(results)
                #END OF INPAINTING
                
                # Save the actual coordinates for all points
                if csv_coordinates:
                    one_row["pose_landmarks"].append(results.pose_landmarks)
                    one_row["pose_world_landmarks"].append(results.pose_world_landmarks)
                
                ############## Low-pass filter, changing the actual coordinates
                
                if lpfilter:
                    if results.pose_landmarks is not None:
                        num_kps = len(results.pose_landmarks.landmark)
                        curr_kp = results.pose_landmarks.landmark
                        if count == 0:
                            # track for all keypoints
                            x_track = [OneEuroFilter(count, curr_kp[k].x, min_cutoff=MIN_CUTOFF, beta=BETA) for k in range(num_kps)]
                            y_track = [OneEuroFilter(count, curr_kp[k].y, min_cutoff=MIN_CUTOFF, beta=BETA) for k in range(num_kps)]
                            
                        elif previous_frame_results_filtered[-1].pose_landmarks is None:
                            x_track = [OneEuroFilter(count, curr_kp[k].x, min_cutoff=MIN_CUTOFF, beta=BETA) for k in range(num_kps)]
                            y_track = [OneEuroFilter(count, curr_kp[k].y, min_cutoff=MIN_CUTOFF, beta=BETA) for k in range(num_kps)]
                            
                        else:
                            for i in range(num_kps):
                                ## x coordinate
                                results.pose_landmarks.landmark[i].x = x_track[i](count, curr_kp[i].x)
                                ## y coordinate
                                results.pose_landmarks.landmark[i].y = y_track[i](count, curr_kp[i].y)
                #############

                # SKELETON PROCESSING
                if skeleton:
                    # Background
                    image = np.zeros(shape=image.shape, dtype=np.uint8)
                    # Transparency
                    alpha = 0.08
                    # Save each 5th frame results
                    if count % 5 == 0:
                        if results is not None:
                            inpainted_skeleton_results.append(results)
                    # Inpainting of the previous 10 frames results        
                    for num_frame in range(NUM_INPAINTED_FRAMES_SKELETON, 0, -1):
                        if len(inpainted_skeleton_results) > num_frame:
                            cur_res = inpainted_skeleton_results[-num_frame]
                            draw_landmarks(
                                    image,
                                    alpha,
                                    cur_res.pose_landmarks,
                                    mp_pose.POSE_CONNECTIONS,
                                    landmark_drawing_spec=get_default_pose_landmarks_style_mine())
                            alpha += 0.08
                        else:
                            alpha += 0.08
                            continue

                    alpha = 1.0
                    draw_landmarks(
                        image,
                        alpha,
                        results.pose_landmarks,
                        mp_pose.POSE_CONNECTIONS,
                        landmark_drawing_spec=get_default_pose_landmarks_style_mine())

                    output_skeleton.write(image)

                previous_frame_results_filtered.append(results)
                
                # END OF SKELETON PROCESSING
                
                # Save landmarks coordinates to the dataframe
                if csv_coordinates:
                    df = df.append(one_row, ignore_index=True)    
                    # Clear the dictionary
                    one_row.clear()
                    
                count += 1

        # Release video capturers and video writers
        cap.release()
        if skeleton:
            output_skeleton.release()
        if inpainting_with_background:
            output_inpainting.release()
        if inpainting_without_background:
            output_inpainting_no_background.release()
        
        if csv_coordinates:
            # Save the coordinates to csv file
            df.to_csv(OUTPUT_PATH + '/Landmarks/' + edition + '/' + video_name + '.csv', index=False)
            df = pd.DataFrame(columns = column_names)
        
        #### FOR TESTING ONLY
        video_count +=1
        ####