# Human Video Segmentation

<span style="font-size: 20px; line-height: 30px;">
<ol>
    <li> Load model </li>
    <li> Load video </li>
    <li> Predict the mask for each frame </li>
    <li> Join each frame and save them </li>
</ol>
</span>

## Import

In [1]:
import os
import time
import numpy as np
import cv2
from glob import glob
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.utils import CustomObjectScope

## Seeding

In [2]:
os.environ["PYTHONHASHSEED"] = str(42)
np.random.seed(42)
tf.random.set_seed(42)

## Hyperparameters

In [3]:
height = 512
width = 512

## Path

In [4]:
model_path = os.path.join("files" ,"unet.h5")
input_video_path = os.path.join("test-videos", "inputs", "Video-2.mp4")
output_video_path = os.path.join("test-videos", "outputs", "Video-2.avi")

## Load model

In [5]:
def dice_loss(y_true, y_pred):
    y_true = tf.keras.layers.Flatten()(y_true)
    y_pred = tf.keras.layers.Flatten()(y_pred)
    intersection = tf.reduce_sum(y_true * y_pred)
    dice = (2. * intersection + 1e-15) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + 1e-15)
    return 1.0 - dice

In [6]:
with CustomObjectScope({'dice_loss': dice_loss}):
    model = tf.keras.models.load_model(model_path)

In [7]:
model.summary()

Model: "UNET"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 512, 512, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 518, 518, 3)          0         ['input_1[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 256, 256, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 256, 256, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                           

## Load Video and Read First Frame

In [8]:
vs = cv2.VideoCapture(input_video_path)
_, frame = vs.read()
H, W, _ = frame.shape
vs.release()

print(H, W)

2160 3840


## Output  Video

In [9]:
fourcc = cv2.VideoWriter_fourcc('M','J','P','G')
fps = 30
out = cv2.VideoWriter(output_video_path, fourcc, fps, (W, H), True)

## Video Segmentation

In [10]:
cap = cv2.VideoCapture(input_video_path)

while True:
    ret, frame = cap.read()
    if ret == False:
        cap.release()
        out.release()
        break
        
    H, W, _ = frame.shape
    ori_frame = frame
    
    frame = cv2.resize(frame, (width, height))
    frame = frame/255
    frame = np.expand_dims(frame, axis=0)
    
    
    mask = model.predict(frame)[0]
    mask = cv2.resize(mask, (W, H))
    mask = mask > 0.5
    mask = np.expand_dims(mask, axis=-1)
    mask = np.concatenate([mask, mask, mask], axis=-1) * 255
    
    mask = mask.astype(np.float32)
    ori_frame = ori_frame.astype(np.float32)
    
    alpha = 0.6
    output = cv2.addWeighted(mask, alpha, ori_frame, 1-alpha, 0)
    output = output.astype(np.uint8)
    
    out.write(output)
    
print("Process Completed!")

Process Completed!
